首页 > 其他 > 详细

request +lxml 天眼查爬虫

时间:2019-02-17 17:08:31      阅读:210      评论:0      收藏:0      [点我收藏+]
这个爬虫可以爬取指定公司的注册时间、公司性质。
如果你有其他需求,也可以自己添加其他功能
源代码奉上。
# -*- coding:utf-8 -*-
import requests
from lxml import etree
import random
import re
# import HTMLParser
from html.parser import HTMLParser
import time

proxy = [

    http://112.83.86.88:2589,
    https://117.92.128.239:2444,
    https://117.94.120.55:4734,
    https://116.149.201.121:6436,
    https://111.72.104.133:4184,
    https://113.103.151.180:4217,
    https://60.189.139.208:4241,
    https://222.191.171.98:4263,
    https://182.108.168.108:4234,
    https://115.209.194.193:4270

]
USER_AGENTS = [
    "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; AcooBrowser; .NET CLR 1.1.4322; .NET CLR 2.0.50727)",
    "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0; Acoo Browser; SLCC1; .NET CLR 2.0.50727; Media Center PC 5.0; .NET CLR 3.0.04506)",
    "Mozilla/4.0 (compatible; MSIE 7.0; AOL 9.5; AOLBuild 4337.35; Windows NT 5.1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)",
    "Mozilla/5.0 (Windows; U; MSIE 9.0; Windows NT 9.0; en-US)",
    "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Win64; x64; Trident/5.0; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET CLR 2.0.50727; Media Center PC 6.0)",
    "Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0; WOW64; Trident/4.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET CLR 1.0.3705; .NET CLR 1.1.4322)",
    "Mozilla/4.0 (compatible; MSIE 7.0b; Windows NT 5.2; .NET CLR 1.1.4322; .NET CLR 2.0.50727; InfoPath.2; .NET CLR 3.0.04506.30)",
    "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN) AppleWebKit/523.15 (KHTML, like Gecko, Safari/419.3) Arora/0.3 (Change: 287 c9dfb30)",
    "Mozilla/5.0 (X11; U; Linux; en-US) AppleWebKit/527+ (KHTML, like Gecko, Safari/419.3) Arora/0.6",
    "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.2pre) Gecko/20070215 K-Ninja/2.1.1",
    "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.9) Gecko/20080705 Firefox/3.0 Kapiko/3.0",
    "Mozilla/5.0 (X11; Linux i686; U;) Gecko/20070322 Kazehakase/0.4.5",
    "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.8) Gecko Fedora/1.9.0.8-1.fc10 Kazehakase/0.5.6",
    "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11",
    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/535.20 (KHTML, like Gecko) Chrome/19.0.1036.7 Safari/535.20",
    "Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; fr) Presto/2.9.168 Version/11.52"
]

cookie = [
    #cloud_token=bc34c50c90c2446c83aed5cb1be47d45; Hm_lpvt_e92c8d65d92d534b0fc290df538b4758=1544282809; RTYCID=74927dd837fb4732a031f393165e04eb; TYCID=f81d6a20af4d11e88c773753f20cd2b6; _gid=GA1.2.1641349744.1544194803; undefined=f81d6a20af4d11e88c773753f20cd2b6; CT_TYCID=dd01fb472ae5479ba38a69ae86aeb2f7; ssuid=4039911408; _ga=GA1.2.176006030.1535961067; Hm_lvt_e92c8d65d92d534b0fc290df538b4758=1544203531,1544208878,1544228671,1544282750; tyc-user-info=%257B%2522myQuestionCount%2522%253A%25220%2522%252C%2522integrity%2522%253A%25220%2525%2522%252C%2522state%2522%253A%25220%2522%252C%2522vipManager%2522%253A%25220%2522%252C%2522onum%2522%253A%25220%2522%252C%2522monitorUnreadCount%2522%253A%25221%2522%252C%2522discussCommendCount%2522%253A%25221%2522%252C%2522token%2522%253A%2522eyJhbGciOiJIUzUxMiJ9.eyJzdWIiOiIxNjYxOTc3NzQzNiIsImlhdCI6MTU0NDI4Mjc1OSwiZXhwIjoxNTU5ODM0NzU5fQ.ZaElaRIl4I-UTb78LCUl3FREMdI19fbD58hFV8rs6JJoDuiLAxfKqn41yLJmDf8RpYBzAw8sG9nyvg6G9zMelQ%2522%252C%2522redPoint%2522%253A%25220%2522%252C%2522pleaseAnswerCount%2522%253A%25221%2522%252C%2522vnum%2522%253A%25220%2522%252C%2522bizCardUnread%2522%253A%25220%2522%252C%2522mobile%2522%253A%252216619777436%2522%257D; auth_token=eyJhbGciOiJIUzUxMiJ9.eyJzdWIiOiIxNjYxOTc3NzQzNiIsImlhdCI6MTU0NDI4Mjc1OSwiZXhwIjoxNTU5ODM0NzU5fQ.ZaElaRIl4I-UTb78LCUl3FREMdI19fbD58hFV8rs6JJoDuiLAxfKqn41yLJmDf8RpYBzAw8sG9nyvg6G9zMelQ; _gat_gtag_UA_123487620_1=1; aliyungf_tc=AQAAAP3ZjDwU1g4Atqirc9QaiHisIOU/; csrfToken=7gGwERRS-FP8JQzmif8q07j0,
    #"aliyungf_tc=AQAAAMwIX1VNRgAAUhVFeRNLGKWzKDfZ; csrfToken=X48pVLAHE61Kepl8V8_utX4N; TYCID=34e5cdc0faf411e88d5a635382706bce; undefined=34e5cdc0faf411e88d5a635382706bce; ssuid=1308960292; Hm_lvt_e92c8d65d92d534b0fc290df538b4758=1544278750; _ga=GA1.2.1258357000.1544278750; _gid=GA1.2.243951796.1544278750; token=db623234589f45998cf6d6268788b2c3; _utm=fe7630aebe56447688616fbc70c42dab; tyc-user-info=%257B%2522myQuestionCount%2522%253A%25220%2522%252C%2522integrity%2522%253A%25220%2525%2522%252C%2522state%2522%253A%25220%2522%252C%2522vipManager%2522%253A%25220%2522%252C%2522onum%2522%253A%25220%2522%252C%2522monitorUnreadCount%2522%253A%25221%2522%252C%2522discussCommendCount%2522%253A%25221%2522%252C%2522token%2522%253A%2522eyJhbGciOiJIUzUxMiJ9.eyJzdWIiOiIxNjYxOTc3NzQzNiIsImlhdCI6MTU0NDI3ODc4MCwiZXhwIjoxNTU5ODMwNzgwfQ.IdPpEcWpRgVdEEawKP1qIjg3U_x5_K6E16Dpg0kaHYxsfuD03Yn3vRtMGk5sVa8RaAvBOVgKkP4YXZ6BTFZZeQ%2522%252C%2522redPoint%2522%253A%25220%2522%252C%2522pleaseAnswerCount%2522%253A%25221%2522%252C%2522vnum%2522%253A%25220%2522%252C%2522bizCardUnread%2522%253A%25220%2522%252C%2522mobile%2522%253A%252216619777436%2522%257D; auth_token=eyJhbGciOiJIUzUxMiJ9.eyJzdWIiOiIxNjYxOTc3NzQzNiIsImlhdCI6MTU0NDI3ODc4MCwiZXhwIjoxNTU5ODMwNzgwfQ.IdPpEcWpRgVdEEawKP1qIjg3U_x5_K6E16Dpg0kaHYxsfuD03Yn3vRtMGk5sVa8RaAvBOVgKkP4YXZ6BTFZZeQ; Hm_lpvt_e92c8d65d92d534b0fc290df538b4758=1544278786",
    #aliyungf_tc=AQAAAMwIX1VNRgAAUhVFeRNLGKWzKDfZ; csrfToken=X48pVLAHE61Kepl8V8_utX4N; TYCID=34e5cdc0faf411e88d5a635382706bce; undefined=34e5cdc0faf411e88d5a635382706bce; ssuid=1308960292; Hm_lvt_e92c8d65d92d534b0fc290df538b4758=1544278750; _ga=GA1.2.1258357000.1544278750; _gid=GA1.2.243951796.1544278750; token=db623234589f45998cf6d6268788b2c3; _utm=fe7630aebe56447688616fbc70c42dab; RTYCID=f1acc84a6142405ebb406208812ca57e; CT_TYCID=12bfe0aaa4854b3fbbc3b1e6a8e1772f; cloud_token=f67c730341da4819ad72051ebd9aa270; tyc-user-info=%257B%2522myQuestionCount%2522%253A%25220%2522%252C%2522integrity%2522%253A%25220%2525%2522%252C%2522state%2522%253A%25220%2522%252C%2522vipManager%2522%253A%25220%2522%252C%2522onum%2522%253A%25220%2522%252C%2522monitorUnreadCount%2522%253A%25221%2522%252C%2522discussCommendCount%2522%253A%25221%2522%252C%2522token%2522%253A%2522eyJhbGciOiJIUzUxMiJ9.eyJzdWIiOiIxNjYxOTc3NzQzNiIsImlhdCI6MTU0NDI4NTU4NywiZXhwIjoxNTU5ODM3NTg3fQ.55n1rChdGWXW2OUrYE9QX1ETnn4Yhzg5_-gajD8F7yp0bW2b6G2VYeZKNtJT24F9QsiSkYeM5IpsANgyRD-U5w%2522%252C%2522redPoint%2522%253A%25220%2522%252C%2522pleaseAnswerCount%2522%253A%25221%2522%252C%2522vnum%2522%253A%25220%2522%252C%2522bizCardUnread%2522%253A%25220%2522%252C%2522mobile%2522%253A%252216619777436%2522%257D; auth_token=eyJhbGciOiJIUzUxMiJ9.eyJzdWIiOiIxNjYxOTc3NzQzNiIsImlhdCI6MTU0NDI4NTU4NywiZXhwIjoxNTU5ODM3NTg3fQ.55n1rChdGWXW2OUrYE9QX1ETnn4Yhzg5_-gajD8F7yp0bW2b6G2VYeZKNtJT24F9QsiSkYeM5IpsANgyRD-U5w; Hm_lpvt_e92c8d65d92d534b0fc290df538b4758=1544285733; _gat_gtag_UA_123487620_1=1
    #cloud_token=bc34c50c90c2446c83aed5cb1be47d45; Hm_lpvt_e92c8d65d92d534b0fc290df538b4758=1544286129; RTYCID=74927dd837fb4732a031f393165e04eb; TYCID=f81d6a20af4d11e88c773753f20cd2b6; _gid=GA1.2.1641349744.1544194803; undefined=f81d6a20af4d11e88c773753f20cd2b6; CT_TYCID=dd01fb472ae5479ba38a69ae86aeb2f7; ssuid=4039911408; _ga=GA1.2.176006030.1535961067; Hm_lvt_e92c8d65d92d534b0fc290df538b4758=1544208878,1544228671,1544282750,1544286017; tyc-user-info=%257B%2522myQuestionCount%2522%253A%25220%2522%252C%2522integrity%2522%253A%25220%2525%2522%252C%2522state%2522%253A%25220%2522%252C%2522vipManager%2522%253A%25220%2522%252C%2522onum%2522%253A%25220%2522%252C%2522monitorUnreadCount%2522%253A%25220%2522%252C%2522discussCommendCount%2522%253A%25220%2522%252C%2522token%2522%253A%2522eyJhbGciOiJIUzUxMiJ9.eyJzdWIiOiIxNzgzOTE5MTY5MSIsImlhdCI6MTU0NDI4NjEyMCwiZXhwIjoxNTU5ODM4MTIwfQ.vJDqZCjey7bEslU-cXFA37Vm3fTieNYfm3mDSKGTsu1RIez1tcnRsLfEkMSuoJKekAQdv54BQcg5cMdFlGNXdw%2522%252C%2522redPoint%2522%253A%25220%2522%252C%2522pleaseAnswerCount%2522%253A%25220%2522%252C%2522vnum%2522%253A%25220%2522%252C%2522bizCardUnread%2522%253A%25220%2522%252C%2522mobile%2522%253A%252217839191691%2522%257D; auth_token=eyJhbGciOiJIUzUxMiJ9.eyJzdWIiOiIxNzgzOTE5MTY5MSIsImlhdCI6MTU0NDI4NjEyMCwiZXhwIjoxNTU5ODM4MTIwfQ.vJDqZCjey7bEslU-cXFA37Vm3fTieNYfm3mDSKGTsu1RIez1tcnRsLfEkMSuoJKekAQdv54BQcg5cMdFlGNXdw; aliyungf_tc=AQAAAP3ZjDwU1g4Atqirc9QaiHisIOU/; csrfToken=7gGwERRS-FP8JQzmif8q07j0; token=48828ad9e5e2473b9d80a3ce50affed5; _utm=2d38f2012e5a4923ad7c3a49064cfab6
    #aliyungf_tc=AQAAAMwIX1VNRgAAUhVFeRNLGKWzKDfZ; csrfToken=X48pVLAHE61Kepl8V8_utX4N; TYCID=34e5cdc0faf411e88d5a635382706bce; undefined=34e5cdc0faf411e88d5a635382706bce; ssuid=1308960292; Hm_lvt_e92c8d65d92d534b0fc290df538b4758=1544278750; _ga=GA1.2.1258357000.1544278750; _gid=GA1.2.243951796.1544278750; token=db623234589f45998cf6d6268788b2c3; _utm=fe7630aebe56447688616fbc70c42dab; RTYCID=f1acc84a6142405ebb406208812ca57e; CT_TYCID=12bfe0aaa4854b3fbbc3b1e6a8e1772f; cloud_token=f67c730341da4819ad72051ebd9aa270; tyc-user-info=%257B%2522myQuestionCount%2522%253A%25220%2522%252C%2522integrity%2522%253A%25220%2525%2522%252C%2522state%2522%253A%25220%2522%252C%2522vipManager%2522%253A%25220%2522%252C%2522onum%2522%253A%25220%2522%252C%2522monitorUnreadCount%2522%253A%25221%2522%252C%2522discussCommendCount%2522%253A%25221%2522%252C%2522token%2522%253A%2522eyJhbGciOiJIUzUxMiJ9.eyJzdWIiOiIxNjYxOTc3NzQzNiIsImlhdCI6MTU0NDI4NTU4NywiZXhwIjoxNTU5ODM3NTg3fQ.55n1rChdGWXW2OUrYE9QX1ETnn4Yhzg5_-gajD8F7yp0bW2b6G2VYeZKNtJT24F9QsiSkYeM5IpsANgyRD-U5w%2522%252C%2522redPoint%2522%253A%25220%2522%252C%2522pleaseAnswerCount%2522%253A%25221%2522%252C%2522vnum%2522%253A%25220%2522%252C%2522bizCardUnread%2522%253A%25220%2522%252C%2522mobile%2522%253A%252216619777436%2522%257D; auth_token=eyJhbGciOiJIUzUxMiJ9.eyJzdWIiOiIxNjYxOTc3NzQzNiIsImlhdCI6MTU0NDI4NTU4NywiZXhwIjoxNTU5ODM3NTg3fQ.55n1rChdGWXW2OUrYE9QX1ETnn4Yhzg5_-gajD8F7yp0bW2b6G2VYeZKNtJT24F9QsiSkYeM5IpsANgyRD-U5w; Hm_lpvt_e92c8d65d92d534b0fc290df538b4758=1544285733; _gat_gtag_UA_123487620_1=1
    #aliyungf_tc=AQAAAMwIX1VNRgAAUhVFeRNLGKWzKDfZ; csrfToken=X48pVLAHE61Kepl8V8_utX4N; TYCID=34e5cdc0faf411e88d5a635382706bce; undefined=34e5cdc0faf411e88d5a635382706bce; ssuid=1308960292; Hm_lvt_e92c8d65d92d534b0fc290df538b4758=1544278750; _ga=GA1.2.1258357000.1544278750; _gid=GA1.2.243951796.1544278750; token=db623234589f45998cf6d6268788b2c3; _utm=fe7630aebe56447688616fbc70c42dab; RTYCID=f1acc84a6142405ebb406208812ca57e; CT_TYCID=12bfe0aaa4854b3fbbc3b1e6a8e1772f; cloud_token=f67c730341da4819ad72051ebd9aa270; bannerFlag=true; tyc-user-info=%257B%2522myQuestionCount%2522%253A%25220%2522%252C%2522integrity%2522%253A%25220%2525%2522%252C%2522state%2522%253A%25220%2522%252C%2522vipManager%2522%253A%25220%2522%252C%2522onum%2522%253A%25220%2522%252C%2522monitorUnreadCount%2522%253A%25220%2522%252C%2522discussCommendCount%2522%253A%25220%2522%252C%2522token%2522%253A%2522eyJhbGciOiJIUzUxMiJ9.eyJzdWIiOiIxNzgzOTE5MTY5MSIsImlhdCI6MTU0NDI4NzMwNCwiZXhwIjoxNTU5ODM5MzA0fQ.SyqDyJ46V5twVcZpS1uXXQpQrzQn8rXEwnqcbul5BpoK7J1FY_0UFMs9e-BZTOdxLK2DRxbZ2l-ytuCtVKyFog%2522%252C%2522redPoint%2522%253A%25220%2522%252C%2522pleaseAnswerCount%2522%253A%25220%2522%252C%2522vnum%2522%253A%25220%2522%252C%2522bizCardUnread%2522%253A%25220%2522%252C%2522mobile%2522%253A%252217839191691%2522%257D; auth_token=eyJhbGciOiJIUzUxMiJ9.eyJzdWIiOiIxNzgzOTE5MTY5MSIsImlhdCI6MTU0NDI4NzMwNCwiZXhwIjoxNTU5ODM5MzA0fQ.SyqDyJ46V5twVcZpS1uXXQpQrzQn8rXEwnqcbul5BpoK7J1FY_0UFMs9e-BZTOdxLK2DRxbZ2l-ytuCtVKyFog; Hm_lpvt_e92c8d65d92d534b0fc290df538b4758=1544310215
    #aliyungf_tc=AQAAAMwIX1VNRgAAUhVFeRNLGKWzKDfZ; csrfToken=X48pVLAHE61Kepl8V8_utX4N; TYCID=34e5cdc0faf411e88d5a635382706bce; undefined=34e5cdc0faf411e88d5a635382706bce; ssuid=1308960292; Hm_lvt_e92c8d65d92d534b0fc290df538b4758=1544278750; _ga=GA1.2.1258357000.1544278750; _gid=GA1.2.243951796.1544278750; token=db623234589f45998cf6d6268788b2c3; _utm=fe7630aebe56447688616fbc70c42dab; RTYCID=f1acc84a6142405ebb406208812ca57e; CT_TYCID=12bfe0aaa4854b3fbbc3b1e6a8e1772f; cloud_token=f67c730341da4819ad72051ebd9aa270; bannerFlag=true; _gat_gtag_UA_123487620_1=1; tyc-user-info=%257B%2522myQuestionCount%2522%253A%25220%2522%252C%2522integrity%2522%253A%25220%2525%2522%252C%2522state%2522%253A%25220%2522%252C%2522vipManager%2522%253A%25220%2522%252C%2522onum%2522%253A%25220%2522%252C%2522monitorUnreadCount%2522%253A%25222%2522%252C%2522discussCommendCount%2522%253A%25221%2522%252C%2522token%2522%253A%2522eyJhbGciOiJIUzUxMiJ9.eyJzdWIiOiIxNjYxOTc3NzQzNiIsImlhdCI6MTU0NDMzMTU4NiwiZXhwIjoxNTU5ODgzNTg2fQ.XWqR0uUhTI6O6Vd9NpFC5LDtTp8_O7FjV3StRMb7kY_pQ9fi_4QJkyAlN-aZK_2hEHtdBqJ83mK09Jo4GZhLYA%2522%252C%2522redPoint%2522%253A%25220%2522%252C%2522pleaseAnswerCount%2522%253A%25221%2522%252C%2522vnum%2522%253A%25220%2522%252C%2522bizCardUnread%2522%253A%25220%2522%252C%2522mobile%2522%253A%252216619777436%2522%257D; auth_token=eyJhbGciOiJIUzUxMiJ9.eyJzdWIiOiIxNjYxOTc3NzQzNiIsImlhdCI6MTU0NDMzMTU4NiwiZXhwIjoxNTU5ODgzNTg2fQ.XWqR0uUhTI6O6Vd9NpFC5LDtTp8_O7FjV3StRMb7kY_pQ9fi_4QJkyAlN-aZK_2hEHtdBqJ83mK09Jo4GZhLYA; Hm_lpvt_e92c8d65d92d534b0fc290df538b4758=1544331591
    #aliyungf_tc=AQAAAMwIX1VNRgAAUhVFeRNLGKWzKDfZ; csrfToken=X48pVLAHE61Kepl8V8_utX4N; TYCID=34e5cdc0faf411e88d5a635382706bce; undefined=34e5cdc0faf411e88d5a635382706bce; ssuid=1308960292; Hm_lvt_e92c8d65d92d534b0fc290df538b4758=1544278750; _ga=GA1.2.1258357000.1544278750; _gid=GA1.2.243951796.1544278750; token=db623234589f45998cf6d6268788b2c3; _utm=fe7630aebe56447688616fbc70c42dab; RTYCID=f1acc84a6142405ebb406208812ca57e; CT_TYCID=12bfe0aaa4854b3fbbc3b1e6a8e1772f; cloud_token=f67c730341da4819ad72051ebd9aa270; bannerFlag=true; tyc-user-info=%257B%2522myQuestionCount%2522%253A%25220%2522%252C%2522integrity%2522%253A%25220%2525%2522%252C%2522state%2522%253A%25220%2522%252C%2522vipManager%2522%253A%25220%2522%252C%2522onum%2522%253A%25220%2522%252C%2522monitorUnreadCount%2522%253A%25222%2522%252C%2522discussCommendCount%2522%253A%25221%2522%252C%2522token%2522%253A%2522eyJhbGciOiJIUzUxMiJ9.eyJzdWIiOiIxNjYxOTc3NzQzNiIsImlhdCI6MTU0NDMzMTU4NiwiZXhwIjoxNTU5ODgzNTg2fQ.XWqR0uUhTI6O6Vd9NpFC5LDtTp8_O7FjV3StRMb7kY_pQ9fi_4QJkyAlN-aZK_2hEHtdBqJ83mK09Jo4GZhLYA%2522%252C%2522redPoint%2522%253A%25220%2522%252C%2522pleaseAnswerCount%2522%253A%25221%2522%252C%2522vnum%2522%253A%25220%2522%252C%2522bizCardUnread%2522%253A%25220%2522%252C%2522mobile%2522%253A%252216619777436%2522%257D; auth_token=eyJhbGciOiJIUzUxMiJ9.eyJzdWIiOiIxNjYxOTc3NzQzNiIsImlhdCI6MTU0NDMzMTU4NiwiZXhwIjoxNTU5ODgzNTg2fQ.XWqR0uUhTI6O6Vd9NpFC5LDtTp8_O7FjV3StRMb7kY_pQ9fi_4QJkyAlN-aZK_2hEHtdBqJ83mK09Jo4GZhLYA; Hm_lpvt_e92c8d65d92d534b0fc290df538b4758=1544331613
    #aliyungf_tc=AQAAAMwIX1VNRgAAUhVFeRNLGKWzKDfZ; csrfToken=X48pVLAHE61Kepl8V8_utX4N; TYCID=34e5cdc0faf411e88d5a635382706bce; undefined=34e5cdc0faf411e88d5a635382706bce; ssuid=1308960292; Hm_lvt_e92c8d65d92d534b0fc290df538b4758=1544278750; _ga=GA1.2.1258357000.1544278750; _gid=GA1.2.243951796.1544278750; RTYCID=f1acc84a6142405ebb406208812ca57e; CT_TYCID=12bfe0aaa4854b3fbbc3b1e6a8e1772f; cloud_token=f67c730341da4819ad72051ebd9aa270; bannerFlag=true; token=fb752813c3804392967213185f054782; _utm=afb6110122f64712bcb1e0c6323d1640; tyc-user-info=%257B%2522myQuestionCount%2522%253A%25220%2522%252C%2522integrity%2522%253A%25220%2525%2522%252C%2522state%2522%253A%25220%2522%252C%2522vipManager%2522%253A%25220%2522%252C%2522onum%2522%253A%25220%2522%252C%2522monitorUnreadCount%2522%253A%25220%2522%252C%2522discussCommendCount%2522%253A%25220%2522%252C%2522token%2522%253A%2522eyJhbGciOiJIUzUxMiJ9.eyJzdWIiOiIxNzgzOTE5MzgzMyIsImlhdCI6MTU0NDMzNTkyOSwiZXhwIjoxNTU5ODg3OTI5fQ.QACN7m079VlZsDYFYGsSAbQoYhXHt_phxFxJkNCizbpN1bIoUn0FnGY2B6uOZhFG-p-OUYsIc8qFcK0UStwfgQ%2522%252C%2522redPoint%2522%253A%25220%2522%252C%2522pleaseAnswerCount%2522%253A%25220%2522%252C%2522vnum%2522%253A%25220%2522%252C%2522bizCardUnread%2522%253A%25220%2522%252C%2522mobile%2522%253A%252217839193833%2522%257D; auth_token=eyJhbGciOiJIUzUxMiJ9.eyJzdWIiOiIxNzgzOTE5MzgzMyIsImlhdCI6MTU0NDMzNTkyOSwiZXhwIjoxNTU5ODg3OTI5fQ.QACN7m079VlZsDYFYGsSAbQoYhXHt_phxFxJkNCizbpN1bIoUn0FnGY2B6uOZhFG-p-OUYsIc8qFcK0UStwfgQ; Hm_lpvt_e92c8d65d92d534b0fc290df538b4758=1544335934; _gat_gtag_UA_123487620_1=1
    aliyungf_tc=AQAAAMwIX1VNRgAAUhVFeRNLGKWzKDfZ; csrfToken=X48pVLAHE61Kepl8V8_utX4N; TYCID=34e5cdc0faf411e88d5a635382706bce; undefined=34e5cdc0faf411e88d5a635382706bce; ssuid=1308960292; Hm_lvt_e92c8d65d92d534b0fc290df538b4758=1544278750; _ga=GA1.2.1258357000.1544278750; _gid=GA1.2.243951796.1544278750; RTYCID=f1acc84a6142405ebb406208812ca57e; CT_TYCID=12bfe0aaa4854b3fbbc3b1e6a8e1772f; cloud_token=f67c730341da4819ad72051ebd9aa270; bannerFlag=true; token=5092657e644e48b6a5e82360b799e518; _utm=21899f8b8a05449e8975c8bec22037af; tyc-user-info=%257B%2522myQuestionCount%2522%253A%25220%2522%252C%2522integrity%2522%253A%25220%2525%2522%252C%2522state%2522%253A%25220%2522%252C%2522vipManager%2522%253A%25220%2522%252C%2522onum%2522%253A%25220%2522%252C%2522monitorUnreadCount%2522%253A%25220%2522%252C%2522discussCommendCount%2522%253A%25220%2522%252C%2522token%2522%253A%2522eyJhbGciOiJIUzUxMiJ9.eyJzdWIiOiIxNjYwMTE2NjUwNCIsImlhdCI6MTU0NDM0MDkyNiwiZXhwIjoxNTU5ODkyOTI2fQ.5GS2SnPon-4fmd-yIYtQ9Yr0ZWMqHvrXr_ks3oRmeHm-rgPjcP63yMeuPrPb1axXpg8syEkiyzwBxUc03TpeYg%2522%252C%2522redPoint%2522%253A%25220%2522%252C%2522pleaseAnswerCount%2522%253A%25220%2522%252C%2522vnum%2522%253A%25220%2522%252C%2522bizCardUnread%2522%253A%25220%2522%252C%2522mobile%2522%253A%252216601166504%2522%257D; auth_token=eyJhbGciOiJIUzUxMiJ9.eyJzdWIiOiIxNjYwMTE2NjUwNCIsImlhdCI6MTU0NDM0MDkyNiwiZXhwIjoxNTU5ODkyOTI2fQ.5GS2SnPon-4fmd-yIYtQ9Yr0ZWMqHvrXr_ks3oRmeHm-rgPjcP63yMeuPrPb1axXpg8syEkiyzwBxUc03TpeYg; Hm_lpvt_e92c8d65d92d534b0fc290df538b4758=1544340930; _gat_gtag_UA_123487620_1=1
]
cookie1 = random.choice(cookie)
dd=random.choice(USER_AGENTS)
headers={
    "Referer": "https://www.baidu.com/",
    "Cookie" :  cookie1,
    "User-Agent": "%s"%dd
}
def down_load(url):
    proxy1 = {http: random.choice(proxy)}
    cc=requests.get(url=url,headers=headers,proxies=proxy1)
    # cc=etree.HTML(cc)
    # cc.encode("utf-8").decode("utf-8")
    cc.encoding="utf-8"
    return  cc.text
list1 = ["中科润蓝环保技术(北京)有限公司","中联先进钢铁材料技术有限责任公司","中铝国际工程设备有限公司","中铝润滑科技有限公司","中石化催化剂(北京)有限公司","中石化三菱化学聚碳酸酯(北京)有限公司","中天开元(北京)建筑装饰工程有限公司","中铁丰桥桥梁有限公司","中铁物总轨道装备贸易有限公司","中新高科工程技术(北京)有限公司","中新联进出口公司","中信国安盟固利电源技术有限公司"
         ]
for i in list1:
    time.sleep(2)
    first_url="https://m.tianyancha.com/search?key=%s"%i
    a=down_load(first_url)#下载页面
    # print(a)
    a=etree.HTML(a)
    detail_url=a.xpath(//div[contains(@class,"col-xs-10")]/a/@href)
    # boss=a.xpath(//div[@class="search_row_new_mobil"]//a/text())
    # the_registered_capital=a.xpath(//div[@class="search_row_new_mobil"]/div/div[2]/span/text())
    the_registered_time=a.xpath(//div[@class="search_row_new_mobil"]/div/div[3]/span/text())
    # print(detail_url[0],company,the_registered_time[0])
    detail_html = down_load(detail_url[0])
    bb = etree.HTML(detail_html)
    company_type = bb.xpath(//div[@class="item-line"][6]/span/text())
    # 公司注册时间
    the_registered_time = the_registered_time[0]
    #公司名称
    company = i
    #企业类型
    company_type = company_type[1]
    print(company,the_registered_time,company_type)

爬取结果:

技术分享图片

 

request +lxml 天眼查爬虫

原文:https://www.cnblogs.com/liangliangzz/p/10391774.html

(0)
(0)
   
举报
评论 一句话评论(0
关于我们 - 联系我们 - 留言反馈 - 联系我们:wmxa8@hotmail.com
© 2014 bubuko.com 版权所有
打开技术之扣,分享程序人生!