python爬虫糗事百科

时间：2021-03-15 19:36:24 阅读：33 评论：0 收藏：0 [点我收藏+]

import re
from lxml import etree
import requests
from bs4 import BeautifulSoup
import requests
url = ‘https://www.qiushibaike.com/imgrank/‘
headers = {
    ‘RequestURL‘: ‘https://eclick.baidu.com/fp.htm?br=2&fp=8D1371255901FBD7974323B7D8E17C98&fp2=E8ECE829116D278272FB03F89C616E7E&ci=&bi=&im=0&wf=0&ct=2011&bp=&m=&t=0&ft=&_=1606374621886‘,
    ‘User-Agent‘: ‘Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36‘
}
proxies = {
    ‘http‘: ‘124.205.155.147:9090‘
}
rep = requests.get(url, headers=headers, proxies=proxies)
html = etree.HTML(rep.content.decode(‘utf-8‘))
# print(html.xpath("//div[contains(@class,‘article block untagged mb15‘)][1]//div[@class=‘content‘]/span")[0].text)
# print(div_tags)
print("==================男生数据=====================")
div_tag = html.xpath("//div[contains(@class,‘article block untagged mb15‘)]")
for i in div_tag:
    t = i.xpath(".//div[@class=‘articleGender manIcon‘]")
    temp_xpath = i.xpath(".//div[@class=‘content‘]/span")
    name_xpath = i.xpath(".//h2")
    ping_xpath = i.xpath(".//div[@class=‘stats‘]//span[@class=‘stats-vote‘]//i[@class=‘number‘]")
    pingshus_xpath = i.xpath(".//div[@class=‘stats‘]//span[@class=‘stats-comments‘]//i[@class=‘number‘]")
    img_xpath = i.xpath(".//div[@class=‘thumb‘]//a//img")
    if name_xpath:   
        print("\n"+"姓名:"+name_xpath[0].text.replace(‘\n‘, ‘‘))
    if t:
        print("男年龄:"+t[0].text)
    if temp_xpath:
        print("内容："+temp_xpath[0].text.replace(‘\n‘, ‘‘))
    if ping_xpath:
        print("好笑:"+ping_xpath[0].text)
    if pingshus_xpath:
        print("评论数:"+pingshus_xpath[0].text)
    if img_xpath:
        print("图片："+"http:"+img_xpath[0].attrib.get(‘src‘))

print("\n")
print("==================女生数据=====================")
div_tag = html.xpath("//div[contains(@class,‘article block untagged mb15‘)]")
for i in div_tag:
    t2 = i.xpath(".//div[@class=‘articleGender womenIcon‘]")
    temp_xpath = i.xpath(".//div[@class=‘content‘]/span")
    name_xpath = i.xpath(".//h2")
    ping_xpath2 = i.xpath(".//div[@class=‘stats‘]//span[@class=‘stats-vote‘]//i[@class=‘number‘]")
    pingshus2_xpath = i.xpath(".//div[@class=‘stats‘]//span[@class=‘stats-comments‘]//i[@class=‘number‘]")
    img_xpath2 = i.xpath(".//div[@class=‘thumb‘]//a//img")
    if name_xpath:          
        print(‘\n‘+"姓名:"+name_xpath[0].text.replace(‘\n‘, ‘‘))
    if t2:
        print("女年龄："+t2[0].text)
    if temp_xpath:
        print("内容："+temp_xpath[0].text.replace(‘\n‘, ‘‘))
    if ping_xpath2:
        print("好笑:"+ping_xpath2[0].text)
    if pingshus2_xpath:
        print("评论数:"+pingshus2_xpath[0].text)
    if img_xpath2:
        print("图片：http:"+img_xpath2[0].attrib.get(‘src‘))

（仅供学习参考）

python爬虫糗事百科

原文：https://www.cnblogs.com/Outsider07/p/14537708.html

踩

(0)

评论一句话评论（0）

分享档案

更多>

2021年09月23日 (328)
2021年09月24日 (313)
2021年09月17日 (191)
2021年09月15日 (369)
2021年09月16日 (411)
2021年09月13日 (439)
2021年09月11日 (398)
2021年09月12日 (393)
2021年09月10日 (160)
2021年09月08日 (222)