首页 > 其他 > 详细

爬取河北省招投标公共服务平台招标信息

时间:2020-06-05 16:15:53      阅读:55      评论:0      收藏:0      [点我收藏+]
import requests
from lxml import etree

# 招标网址
url = ‘http://121.28.195.124:9001/tender/xxgk/zbgg.do‘

# session = requests.session()

# UA
headers = {
    ‘User-Agent‘: ‘Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.61 Safari/537.36‘,
}

data = {
    ‘page‘:‘2‘,
    ‘TimeStr‘:‘‘,
    ‘allDq‘:‘reset2‘,
    ‘allHy‘:‘reset1‘,
    ‘AllPtName‘:‘‘,
    ‘KeyStr‘:‘‘,
    ‘KeyType‘:‘‘,
    ‘ggname‘:‘‘,
}

# session.post(url=url,headers=headers)

#创建excel
from openpyxl import Workbook
wb = Workbook()
wb1 = wb.create_sheet(‘index‘,0)
wb1.title = ‘投标信息‘
for i in range(1, 10):
    data[‘page‘] = i
    # 发送请求,获取响应对象
    page_text = requests.post(url=url, data=data, headers=headers).text
    tree = etree.HTML(page_text)
    div_list = tree.xpath(‘.//div[@class="publicont"]‘)
    for d in div_list:
        title = d.xpath(‘./div[1]//a/text()‘)  # 标题
        s_url = ‘http://121.28.195.124:9001‘+str(d.xpath(‘./div[1]//a/@href‘)).replace("[",‘‘).replace("]",‘‘).replace("\‘",‘‘)  # 链接
        time = d.xpath(‘./div[1]//span[@class="span_o"]/text()‘)[0]  # 公告时间
        info = d.xpath(‘.//p[@class="p_tw"]//span[@class="span_on"]/text()‘)
        area = info[0].strip().replace("[",‘‘).replace("]",‘‘).replace("\‘",‘‘)  # 公告位置
        hangye = info[-1]  # 所属行业

        if hangye == ‘[软件和信息技术服务业]‘:
            print(title,area,hangye,time,s_url)
            wb1.append([str(title),str(area),str(time),s_url])
            # text = (str(title)+‘ ‘+str(area)+‘ ‘+str(time)+‘ ‘+str(s_url)+‘\n‘).replace("[",‘‘).replace("]",‘‘).replace("\‘",‘‘)

wb.save(‘1.xlsx‘)
    # print(d)
# print(div_list)

爬取河北省招投标公共服务平台招标信息

原文:https://www.cnblogs.com/robertx/p/13049714.html

(0)
(0)
   
举报
评论 一句话评论(0
关于我们 - 联系我们 - 留言反馈 - 联系我们:wmxa8@hotmail.com
© 2014 bubuko.com 版权所有
打开技术之扣,分享程序人生!