import re from lxml import etree import requests from bs4 import BeautifulSoup import requests url = ‘https://www.qiushibaike.com/imgrank/‘ headers = { ‘RequestURL‘: ‘https://eclick.baidu.com/fp.htm?br=2&fp=8D1371255901FBD7974323B7D8E17C98&fp2=E8ECE829116D278272FB03F89C616E7E&ci=&bi=&im=0&wf=0&ct=2011&bp=&m=&t=0&ft=&_=1606374621886‘, ‘User-Agent‘: ‘Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36‘ } proxies = { ‘http‘: ‘124.205.155.147:9090‘ } rep = requests.get(url, headers=headers, proxies=proxies) html = etree.HTML(rep.content.decode(‘utf-8‘)) # print(html.xpath("//div[contains(@class,‘article block untagged mb15‘)][1]//div[@class=‘content‘]/span")[0].text) # print(div_tags) print("==================男生数据=====================") div_tag = html.xpath("//div[contains(@class,‘article block untagged mb15‘)]") for i in div_tag: t = i.xpath(".//div[@class=‘articleGender manIcon‘]") temp_xpath = i.xpath(".//div[@class=‘content‘]/span") name_xpath = i.xpath(".//h2") ping_xpath = i.xpath(".//div[@class=‘stats‘]//span[@class=‘stats-vote‘]//i[@class=‘number‘]") pingshus_xpath = i.xpath(".//div[@class=‘stats‘]//span[@class=‘stats-comments‘]//i[@class=‘number‘]") img_xpath = i.xpath(".//div[@class=‘thumb‘]//a//img") if name_xpath: print("\n"+"姓名:"+name_xpath[0].text.replace(‘\n‘, ‘‘)) if t: print("男年龄:"+t[0].text) if temp_xpath: print("内容:"+temp_xpath[0].text.replace(‘\n‘, ‘‘)) if ping_xpath: print("好笑:"+ping_xpath[0].text) if pingshus_xpath: print("评论数:"+pingshus_xpath[0].text) if img_xpath: print("图片:"+"http:"+img_xpath[0].attrib.get(‘src‘)) print("\n") print("==================女生数据=====================") div_tag = html.xpath("//div[contains(@class,‘article block untagged mb15‘)]") for i in div_tag: t2 = i.xpath(".//div[@class=‘articleGender womenIcon‘]") temp_xpath = i.xpath(".//div[@class=‘content‘]/span") name_xpath = i.xpath(".//h2") ping_xpath2 = i.xpath(".//div[@class=‘stats‘]//span[@class=‘stats-vote‘]//i[@class=‘number‘]") pingshus2_xpath = i.xpath(".//div[@class=‘stats‘]//span[@class=‘stats-comments‘]//i[@class=‘number‘]") img_xpath2 = i.xpath(".//div[@class=‘thumb‘]//a//img") if name_xpath: print(‘\n‘+"姓名:"+name_xpath[0].text.replace(‘\n‘, ‘‘)) if t2: print("女年龄:"+t2[0].text) if temp_xpath: print("内容:"+temp_xpath[0].text.replace(‘\n‘, ‘‘)) if ping_xpath2: print("好笑:"+ping_xpath2[0].text) if pingshus2_xpath: print("评论数:"+pingshus2_xpath[0].text) if img_xpath2: print("图片:http:"+img_xpath2[0].attrib.get(‘src‘))
(仅供学习参考)
原文:https://www.cnblogs.com/Outsider07/p/14537708.html