import lxml.etree doc = lxml.etree.HTML(html_string) # ... or ... doc = lxml.etree.XML(xml_string) doc.xpath("//div[@id='hoge']")[0].text doc.xpath("//div[@id='hoge']")[0].get("class") # attribute doc.xpath("//ns:entry", namespaces={"ns":"http://example.com/test"}) # namespace |