手动处理cookie（实现一个点赞爬虫）

时间：2020-06-11 23:37:15 阅读：64 评论：0 收藏：0 [点我收藏+]

import scrapy
from scrapy.http.cookies import CookieJar
from scrapy.http import Request
from urllib.parse import urlencode

class ChoutiSpider(scrapy.Spider):
    name = ‘chouti‘
    allowed_domains = [‘chouti.com‘]
    start_urls = [‘http://chouti.com/‘]
    # start_urls = [‘http://127.0.0.1:80/app01/login/‘]
    cookie_dict = {}
    def parse(self, response):
        ‘‘‘
        第一次访问抽屉返回的内容: response
        :param response:
        :return:
        ‘‘‘

        # 去响应头中获取cookie,cookie保存在cookie_jar对象
        cookie_jar = CookieJar()
        cookie_jar.extract_cookies(response, response.request)

        # 去对象中将cookie解析到字典
        for k, v in cookie_jar._cookies.items():
            for i, j in v.items():
                for m, n in j.items():
                    self.cookie_dict[m] = n.value
        # 去登入抽屉网
        yield Request(
            url=‘https://dig.chouti.com/login‘,
            method=‘POST‘,
            body=‘phone=18669195713&password=123456‘,
            # body=urlencode({‘phone‘:‘18669195713‘, ‘password‘:‘123456‘})
            # urlencode会把字典解析成上面&连接的格式
            cookies=self.cookie_dict,
            headers={
                ‘User-Agent‘: ‘‘,  # 这个也可以在配置文件里面加上,就不用每次在这加了
                ‘Content-Type‘: ‘‘,
            },
            callback=self.check_login
        )

    def check_login(self, response):
        ‘‘‘
        登入成功后再去访问首页
        :param response:
        :return:
        ‘‘‘
        print(response.text)  # 只是检测有没有登入成功
        yield Request(
            url=‘https://dig.chouti.com/all/hot/recent/1‘,
            cookies=self.cookie_dict,
            callback=self.index,
        )

    def index(self, response):
        ‘‘‘
        进入首页后再去拿到所有新闻的ID去点赞
        :param response:
        :return:
        ‘‘‘
        news_list = response.xpath(‘‘)
        for new in news_list:
            link_id = new.xpath(‘‘).extract_first()
            yield Request(
                url=‘https://dig.chouti.com/link/vote_linksId=%s‘ % (link_id),
                method=‘POST‘,
                cookies=self.cookie_dict,
                callback=self.check_result,
            )

    def check_result(self, response):
        # 点赞成功返回的信息
        print(response.text)

原文：https://www.cnblogs.com/xiongfanyong/p/13096501.html

踩

(0)

评论一句话评论（0）

分享档案

更多>

2021年09月23日 (328)
2021年09月24日 (313)
2021年09月17日 (191)
2021年09月15日 (369)
2021年09月16日 (411)
2021年09月13日 (439)
2021年09月11日 (398)
2021年09月12日 (393)
2021年09月10日 (160)
2021年09月08日 (222)