# -*- coding: utf-8 -*-
import scrapy
import urllib.request
# https://accounts.douban.com/login
class DoubanSpider(scrapy.Spider):
      name = ‘douban‘
      allowed_domains = [‘www.douban.com‘, ‘accounts.douban.com‘]
      start_urls = [‘https://accounts.douban.com/login‘]
      def parse(self, response):
           # 查找验证码图片,看有没有验证码
           image = response.xpath(‘//img[@id="captcha_image"]/@src‘)
           # 判断image这个列表是否为空,如果为空,就是没有验证码
           if len(image) == 0:
                print(‘不带验证码的‘ * 10)
                # 不带验证码的
                formdata = {
                    ‘source‘: ‘index_nav‘,
                    ‘form_email‘: ‘1090509990@qq.com‘,
                    ‘form_password‘: ‘lizhibin666‘,
                }
           else:
                print(‘带验证码的‘ * 10)
                # 通过属性选择器获取得到
                captchaid = response.css(‘input[name="captcha-id"]::attr(value)‘).extract_first()
                # 获取验证码链接 
                image_url = image.extract_first()
                # print(‘*‘ * 50)
                # print(captchaid)
                # print(image_url)
                # print(‘*‘ * 50)
                urllib.request.urlretrieve(image_url, ‘code.png‘)
                code = input(‘请输入验证码:‘)
                # 带验证码的
                formdata = {
                    ‘source‘: ‘None‘,
                    ‘redir‘: ‘https://www.douban.com/‘,
                    ‘form_email‘: ‘1090509990@qq.com‘,
                    ‘form_password‘: ‘lizhibin666‘,
                    ‘captcha-solution‘: code,
                    ‘captcha-id‘: captchaid,
                    ‘login‘: ‘登录‘,
                }
            post_url = ‘https://accounts.douban.com/login‘
            # 发送post请求
            yield scrapy.FormRequest(url=post_url, formdata=formdata, callback=self.lala)
      #保存文件,查看是否登录
      def lala(self, response):
            print(‘*‘ * 50)
            with open(‘douban.html‘, ‘wb‘) as fp:
                fp.write(response.body)
            print(‘*‘ * 50)
原文:https://www.cnblogs.com/airapple/p/9158846.html