首页 > 其他 > 详细

selenium爬12306

时间:2019-09-26 17:42:53      阅读:118      评论:0      收藏:0      [点我收藏+]
from selenium import webdriver
from pyquery import PyQuery as pq
import time
fs = input("出发站:")
ts = input("达到站:")
date = input("时间:")
browser = webdriver.Chrome()
browser.get("https://kyfw.12306.cn/otn/leftTicket/init?linktypeid=dc&fs=%s,HGH&ts=%s,TLU&date=%s&flag=N,N,Y"%(fs,ts,date))
# browser.get("https://kyfw.12306.cn/otn/leftTicket/init?linktypeid=dc&fs=杭州东,HGH&ts=桐庐,TLU&date=2019-09-30&flag=N,N,Y")
time.sleep(2)
a = "window.scrollTo(0,800);"
b = "window.scrollTo(0,1600);"
c = "window.scrollTo(0,3200);"

browser.execute_script(a)
time.sleep(1)
browser.execute_script(b)
time.sleep(1)
browser.execute_script(c)
time.sleep(1)


def start():
    print("开始")
    label = browser.find_element_by_xpath(//*[@id="queryLeftTable"])
    # print(label)

    aaa()


def aaa():
    html = browser.page_source
    row = browser.find_elements_by_tag_name(tr)
    list = []
    for i in row:
        j = i.find_elements_by_tag_name(td)
        for item in j:
            text = item.text
            if text != "":
                list.append(text)
    # print(list)
    # print(html)
    doc = pq(html)
    qq = doc(.t-list tbody tr).items()

    j = 1
    for a in range(len(list)):

        for i in qq:

            if i.find(.train).text() != "":
                # res = browser.find_element_by_xpath(‘//*[contains(@id,"SWZ_")]‘).text

                qq_data = {
                    "车次": i.find(.train).text().split("\n")[0],
                    "出发站到达站": i.find(.cdz).text(),
                    "出发时间到达时间": i.find(.cds).text(),
                    "历时": i.find(.ls).text(),
                    "商务座": list[j],
                    "一等座": list[j+1],
                    "二等座": list[j+2],
                    "高级软卧": list[j+3],
                    "软卧": list[j+4],
                    "动卧": list[j+5],
                    "硬卧": list[j+6],
                    "软座": list[j+7],
                    "硬座": list[j+8],
                    "无座": list[j+9],

                }

                print(qq_data)
                j += 13

def main():
    start()


if __name__ == __main__:
    main()

 

selenium爬12306

原文:https://www.cnblogs.com/xuezhihao/p/11592101.html

(0)
(0)
   
举报
评论 一句话评论(0
关于我们 - 联系我们 - 留言反馈 - 联系我们:wmxa8@hotmail.com
© 2014 bubuko.com 版权所有
打开技术之扣,分享程序人生!