import scrapy from scrapy.http.cookies import CookieJar from scrapy.http import Request from urllib.parse import urlencode class ChoutiSpider(scrapy.Spider): name = ‘chouti‘ allowed_domains = [‘chouti.com‘] start_urls = [‘http://chouti.com/‘] # start_urls = [‘http://127.0.0.1:80/app01/login/‘] cookie_dict = {} def parse(self, response): ‘‘‘ 第一次访问抽屉返回的内容: response :param response: :return: ‘‘‘ # 去响应头中获取cookie,cookie保存在cookie_jar对象 cookie_jar = CookieJar() cookie_jar.extract_cookies(response, response.request) # 去对象中将cookie解析到字典 for k, v in cookie_jar._cookies.items(): for i, j in v.items(): for m, n in j.items(): self.cookie_dict[m] = n.value # 去登入抽屉网 yield Request( url=‘https://dig.chouti.com/login‘, method=‘POST‘, body=‘phone=18669195713&password=123456‘, # body=urlencode({‘phone‘:‘18669195713‘, ‘password‘:‘123456‘}) # urlencode会把字典解析成上面&连接的格式 cookies=self.cookie_dict, headers={ ‘User-Agent‘: ‘‘, # 这个也可以在配置文件里面加上,就不用每次在这加了 ‘Content-Type‘: ‘‘, }, callback=self.check_login ) def check_login(self, response): ‘‘‘ 登入成功后再去访问首页 :param response: :return: ‘‘‘ print(response.text) # 只是检测有没有登入成功 yield Request( url=‘https://dig.chouti.com/all/hot/recent/1‘, cookies=self.cookie_dict, callback=self.index, ) def index(self, response): ‘‘‘ 进入首页后再去拿到所有新闻的ID去点赞 :param response: :return: ‘‘‘ news_list = response.xpath(‘‘) for new in news_list: link_id = new.xpath(‘‘).extract_first() yield Request( url=‘https://dig.chouti.com/link/vote_linksId=%s‘ % (link_id), method=‘POST‘, cookies=self.cookie_dict, callback=self.check_result, ) def check_result(self, response): # 点赞成功返回的信息 print(response.text)
原文:https://www.cnblogs.com/xiongfanyong/p/13096501.html