首页 > 其他 > 详细

全自动爬取壁纸

时间:2020-07-04 16:01:27      阅读:49      评论:0      收藏:0      [点我收藏+]

如图

技术分享图片

 

 代码:

 1 import requests
 2 import re
 3 from lxml import html
 4 import _thread
 5 
 6 
 7 class wallpaper:
 8     def __init__(self):
 9         ‘‘‘
10         初始化
11         ‘‘‘
12         self.url = "http://simpledesktops.com/"
13         self.req_header = {
14             User-Agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36,
15         }
16         self.S = requests.session()
17 
18     def home(self, num):
19         ‘‘‘
20         访问首页
21         ‘‘‘
22         self.numx = num
23         X = self.S.get(self.url+f"browse/{num}")
24         self.context = X.text
25 
26     def url_all(self):
27         ‘‘‘
28         获取所有子页链接
29         ‘‘‘
30         tree = html.fromstring(self.context)
31         self.res = tree.xpath(//div[@class="desktop"]/a/@href)
32         print(self.res)
33 
34     def Single(self, url):
35         ‘‘‘
36         访问子页,下载图片
37         ‘‘‘
38         url = url
39         O = self.S.get(self.url+url)
40         Otree = html.fromstring(O.text)
41         res = Otree.xpath(//div[@class="desktop"]/a/@href)
42         title = Otree.xpath(//h2/a/text())
43         print(f"第{self.numx}页_标题:"+title[0])
44         cs = self.S.get(self.url+res[0])
45         print("链接:"+self.url+res[0])
46         with open(fimg/{self.numx}_{title[0]}.png, wb) as f:
47             f.write(cs.content)
48 
49     def dowlod(self):
50         ‘‘‘
51         下载所有图片
52         ‘‘‘
53         for i in self.res:
54             self.Single(url=i)
55 
56 
57 def job(IN, OUT):
58     ‘‘‘
59     线程函数
60     ‘‘‘
61     for num in range(IN, OUT):
62         A = wallpaper()
63         A.home(num=num)
64         A.url_all()
65         A.dowlod()
66 
67 
68 try:  # 多线程分段下载
69     _thread.start_new_thread(job, (5, 10, ))
70     _thread.start_new_thread(job, (14, 20, ))
71     _thread.start_new_thread(job, (23, 30, ))
72     _thread.start_new_thread(job, (32, 40, ))
73     _thread.start_new_thread(job, (41, 51, ))
74 except:
75     print("Error: 无法启动线程")
76 
77 
78 while 1:  # 阻塞程序
79     pass

 

全自动爬取壁纸

原文:https://www.cnblogs.com/ksxh/p/13235050.html

(0)
(0)
   
举报
评论 一句话评论(0
关于我们 - 联系我们 - 留言反馈 - 联系我们:wmxa8@hotmail.com
© 2014 bubuko.com 版权所有
打开技术之扣,分享程序人生!