count=0 while status_code!=200: count+=1 time.sleep(count*0.1) headers={‘User-Agent‘:UserAgent().random} response=requests.get(url,headers=headers) response.encoding=response.apparent_encoding html=response.text return html
import requests
import csv
import time
from lxml import etree
from fake_useragent import UserAgent
import random
#爬取页面代码并解析 def get_html(url): try: headers={‘User-Agent‘:UserAgent().random} response=requests.get(url,headers=headers) status_code=response.status_code count=0 while status_code!=200: count+=1 time.sleep(count*0.1) headers={‘User-Agent‘:UserAgent().random} response=requests.get(url,headers=headers) response.encoding=response.apparent_encoding html=response.text return html response.encoding=response.apparent_encoding html=response.text return html except: print(‘爬取出错‘)
原文:https://www.cnblogs.com/fran-py-/p/12391003.html