lxml 读取文本解析节点
from lxml import etree
text='''
<div>
<ul>
<li class="item-0"><a href="link1.html">第一个</a></li>
<li class="item-1"><a href="link2.html">second item</a></li>
<li class="item-0"><a href="link5.html">a属性</a>
</ul>
</div>
'''
html=etree.HTML(text) #初始化生成一个XPath解析对象
result=etree.tostring(html,encoding='utf-8') #解析对象输出代码
print(type(html))
print(type(result))
print(result.decode('utf-8'))
#etree会修复HTML文本节点
<class 'lxml.etree._Element'>
<class 'bytes'>
<html><body><div>
<ul>
<li class="item-0"><a href="link1.html">第一个</a></li>
<li class="item-1"><a href="link2.html">second item</a></li>
<li class="item-0"><a href="link5.html">a属性</a>
</li></ul>
</div>
</body></html>