1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
| from lxml import html
etree = html.etree
wb_data = """ <div> <ul> <li class="item-0"><a href="link1.html">first item</a></li> <li class="item-1"><a href="link2.html">second item</a></li> <li class="item-inactive"><a href="link3.html">third item</a></li> <li class="item-1"><a href="link4.html">fourth item</a></li> <li class="item-0"><a href="link5.html">fifth item</a> </ul> </div> """ wb = etree.HTML(wb_data) #获取a标签下文本数据 s = wb.xpath('//li/a/text()') print(s,type(s)) #['first item', 'second item', 'third item', 'fourth item', 'fifth item'] <class 'list'>
|