首页 > 其他 > 详细

豆瓣电影分类排行

时间:2020-03-23 10:10:04      阅读:124      评论:0      收藏:0      [点我收藏+]
 1 import requests
 2 # 导入lxml使用xpath提取数据
 3 from lxml import etree
 4 def douban_movies(m_type,nums):
 5     ‘‘‘
 6     豆瓣电影排行榜爬取
 7     ‘‘‘
 8 
 9     url = "https://movie.douban.com/j/chart/top_list?"+m_type+"&interval_id=100%3A90&action=&start=0&limit="+nums
10     headers = {
11         User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:74.0) Gecko/20100101 Firefox/74.0
12     }
13     response = requests.get(url=url,headers=headers)
14     datas = response.json()
15     for data in datas:
16         movies_info = {}
17         movies_info [image] = data[cover_url]
18         movies_info [types] = data[types]
19         movies_info [regions] = data[regions]
20         movies_info [title] = data[title]
21         movies_info [url] = data[url]
22         movies_info [release_date] = data[release_date]
23         movies_info [score] = data[score]
24         movies_info [actors] = data[actors]
25 
26         with open(./+key_word+豆瓣电影分类排行榜爬取.csv,a+,encoding=utf-8)as f:
27             f.writelines(str(movies_info))
28 
29 
30 def get_type():
31 
32     movies_type = {}
33     url = https://movie.douban.com/typerank?type_name=%E5%89%A7%E6%83%85&type=11&interval_id=100:90&action=
34     headers = {
35         User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:74.0) Gecko/20100101 Firefox/74.0
36     }
37     response = requests.get(url=url, headers=headers)
38     douban_html = etree.HTML(response.text)
39     datas = douban_html.xpath("//div[@class=‘article‘]/div[2]/div[@class=‘types‘]/span")
40 
41     for href in datas:
42         info = href.xpath(".//@href")[0].split(&)[1]
43         name = href.xpath(".//a")[0].text
44         movies_type[name] = info
45         # print(info)
46 
47     return movies_type
48 
49 if __name__ == __main__:
50     ‘‘‘
51     分析
52         页面分析一次显示20条信息
53         json数据中的请求url显示为: 
54         https://movie.douban.com/j/chart/top_list?type=11&interval_id=100%3A90&action=&start=0&limit=20
55         https://movie.douban.com/j/chart/top_list?type=11&interval_id=100%3A90&action=&start=20&limit=20
56         https://movie.douban.com/j/chart/top_list?type=11&interval_id=100%3A90&action=&start=40&limit=20
57         start 为起始点
58         limit 为显示信息
59         type 为剧情类型
60     ‘‘‘
61     key_word = input(请输入查询分类排行榜>>)
62     nums = input(请输入查询数据数量>>)
63     # 获取分类
64     movies_type = get_type()
65     if key_word in movies_type.keys():
66         # 执行爬取
67         m_type = movies_type[key_word]
68         douban_movies(m_type,nums)
69         pass
70     else:
71         print(输入电影分类不存在!!!)

 

豆瓣电影分类排行

原文:https://www.cnblogs.com/lizhihoublog/p/12550223.html

(0)
(0)
   
举报
评论 一句话评论(0
关于我们 - 联系我们 - 留言反馈 - 联系我们:wmxa8@hotmail.com
© 2014 bubuko.com 版权所有
打开技术之扣,分享程序人生!