首页 > 数据库技术 > 详细

scrapy将爬取的数据存入MySQL数据库

时间:2019-10-10 13:10:30      阅读:251      评论:0      收藏:0      [点我收藏+]
items.py

import scrapy


class InsistItem(scrapy.Item):

    positionname=scrapy.Field()
    type=scrapy.Field()
    place=scrapy.Field()
    mian=scrapy.Field()
    time=scrapy.Field()

pipelines.py

import json
import scrapy
import pymysql
from scrapy.pipelines.images import ImagesPipeline
class InsistPipeline(object):
    def __init__(self):
        self.db=pymysql.connect(host=localhost,user=dsuser,passwd=badpassword,db=dsdb,charset=utf8,port=3306)
        self.cur=self.db.cursor()
    def process_item(self, item, spider):
        sql=INSERT INTO job(name,type,place,mian,time) VALUES(%s,%s,%s,%s,%s) 
        self.cur.execute(sql,(item[positionname],item[type],item[place],item[mian],item[time]))
        self.db.commit()
        return item
      
    def close_spider(self, spider):
        self.cur.close()
        self.db.close()

insisits.py
#爬虫程序
import scrapy
from insist.items import InsistItem
import json
class InsistsSpider(scrapy.Spider):
    name = insists
    allowed_domains = [careers.tencent.com]
    #start_urls =[‘https://careers.tencent.com/search.html?index=‘]
    baseURL=https://careers.tencent.com/tencentcareer/api/post/Query?pageSize=10&pageIndex=
    offset=1
    start_urls=[baseURL+str(offset)]

    def parse(self, response):
        contents = json.loads(response.text)
        jobs = contents[Data][Posts]
        item = InsistItem()
        for job in jobs:
            item[positionname] = job[RecruitPostName]
            item[type] = job[BGName]
            item[place] = job[LocationName]
            item[mian] = job[CategoryName]
            item[time] = job[LastUpdateTime]
            yield item#返回后继续执行数据
        if self.offset<=5:
              self.offset+=1
              url=self.baseURL+str(self.offset)
              yield scrapy.Request(url,callback=self.parse)

技术分享图片

scrapy将爬取的数据存入MySQL数据库

原文:https://www.cnblogs.com/persistence-ok/p/11647296.html

(0)
(0)
   
举报
评论 一句话评论(0
关于我们 - 联系我们 - 留言反馈 - 联系我们:wmxa8@hotmail.com
© 2014 bubuko.com 版权所有
打开技术之扣,分享程序人生!