imports:
from subprocess import Popen, PIPE
from lxml import etree
from io import StringIO
Downloading:
user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.95 Safari/537.36'
url = 'http://stackoverflow.com'
get = Popen(['curl', '-s', '-A', user_agent, url], stdout=PIPE)
result = get.stdout.read().decode('utf8')
-s
: silent download
-A
: user agent flag
Parsing:
tree = etree.parse(StringIO(result), etree.HTMLParser())
divs = tree.xpath('//div')