首页 > 编程语言 > 详细

python:模拟浏览器获取百度指数,并截取长图

时间:2020-01-20 17:57:15      阅读:242      评论:0      收藏:0      [点我收藏+]
from selenium import webdriver
import time

#此代码是模拟浏览器操作
# 每次点击后都要睡眠一会,要不网页还没响应就进行下一步,很容易报错,也容易出现验证码

def getBaiDu(userName,password,keyword):
try:
chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument(‘lang=zh_CN.UTF-8‘)
#这是隐藏窗体
chrome_options.add_argument("--headless")
browser = webdriver.Chrome(options=chrome_options)
#全屏
browser.maximize_window()
url = "http://index.baidu.com/v2/index.html#/"
browser.get(url)
#点击登录 弹出登录框
browser.find_element_by_class_name("username-text").click()
time.sleep(4)
browser.find_element_by_id("TANGRAM__PSP_4__userName").clear()
browser.find_element_by_id("TANGRAM__PSP_4__password").clear()
time.sleep(1)
#用户名密码
browser.find_element_by_id("TANGRAM__PSP_4__userName").send_keys(userName)
time.sleep(1)
browser.find_element_by_id("TANGRAM__PSP_4__password").send_keys(password)
#登录
time.sleep(5)
browser.find_element_by_id("TANGRAM__PSP_4__submit").click()
time.sleep(3)
browser.find_element_by_class_name("search-input").clear()
time.sleep(1)
#关键字
browser.find_element_by_class_name("search-input").send_keys(keyword)
time.sleep(1)
#点击搜索
browser.find_element_by_class_name("search-input-cancle").click()
except:
print("没有此标签")
time.sleep(2)
picname = ‘baiduzhishu.png‘
try:
#截图的size
scroll_width = browser.execute_script(‘return document.body.parentNode.scrollWidth‘)
scroll_height = browser.execute_script(‘return document.body.parentNode.scrollHeight‘)
browser.set_window_size(scroll_width, scroll_height)
browser.get_screenshot_as_file(".//images//feedBack//" + picname)
# time.sleep(3)
except Exception as e:
print(picname, e)

if(__name__=="__main__"):
userName = "********"
password = "******"
#搜索的关键字
keyword = "*****"
getBaiDu(userName, password, keyword)

python:模拟浏览器获取百度指数,并截取长图

原文:https://www.cnblogs.com/maocai2018/p/12218595.html

(0)
(0)
   
举报
评论 一句话评论(0
关于我们 - 联系我们 - 留言反馈 - 联系我们:wmxa8@hotmail.com
© 2014 bubuko.com 版权所有
打开技术之扣,分享程序人生!