import scrapy
from ZuCai.items import ZucaiItem
from ZuCai.spiders.get_date import GetBetweenday
class ZucaiSpider(scrapy.Spider):
      name = ‘zucai‘
      allowed_domains = [‘trade.500.com/jczq/‘]
      start_urls = [‘https://trade.500.com/jczq/‘]
      def start_requests(self):
            next_url = GetBetweenday(‘2019-04-15‘, ‘https://trade.500.com/jczq/‘)     -----这里调用获取日期的函数,这里是获取2019-04-15到当前日期
            for url in next_url:
                  yield scrapy.Request(url, callback=self.parse)
 
      def parse(self, response):
            datas = response.xpath(‘//div[@class="bet-main bet-main-dg"]/table/tbody/tr‘)
            for data in datas:
                  item = ZucaiItem()
                  item[‘League‘] = data.xpath(‘.//td[@class="td td-evt"]/a/text()‘).extract()[0]
                  item[‘Time‘] = data.xpath(‘.//td[@class="td td-endtime"]/text()‘).extract()[0]
                  item[‘Home_team‘] = data.xpath(‘.//span[@class="team-l"]/a/text()‘).extract()[0]
                  item[‘Result‘] = data.xpath(‘.//i[@class="team-vs team-bf"]/a/text()‘).extract()[0]
                  item[‘Away_team‘] = data.xpath(‘.//span[@class="team-r"]/a/text()‘).extract()[0]
                  item[‘Win‘] = data.xpath(‘.//div[@class="betbtn-row itm-rangB1"]/p[1]/span/text()‘).extract()[0]
                  item[‘Level‘] = data.xpath(‘.//div[@class="betbtn-row itm-rangB1"]/p[2]/span/text()‘).extract()[0]
                  item[‘Negative‘] = data.xpath(‘.//div[@class="betbtn-row itm-rangB1"]/p[3]/span/text()‘).extract()[0]
                  yield item
执行过程中可能回报超出数组限制,需要将extract()[0]换成extract_first()
至此爬取任意日期到当前日期之间的竞彩数据完成,可以在数据库中看到完成的数据