import requests from bs4 import BeautifulSoup #import csv import pandas as pd allUniv=[] def getHTMLText(url): try: r=requests.get(url,timeout=30) r.raise_for_status() r.encoding=‘utf-8‘ return r.text except: return "" def fillUnivList(soup): data=soup.find_all(‘tr‘) for tr in data: ltd=tr.find_all(‘td‘) if len(ltd)==0: continue singleUniv=[] for td in ltd: singleUniv.append(td.string) allUniv.append(singleUniv) def printUnivList(num): print("{:^4}{:^10}{:^5}{:^8}{:^10}".format("排名","学校名称","省市","总分","年费")) for i in range(num): u=allUniv[i] print("{:^4}{:^10}{:^5}{:^8}{:^10}".format(u[0],u[1],u[2],u[3],u[6])) return u ‘‘‘ def main(num): url=‘http://www.zuihaodaxue.com/zuihaodaxuepaiming2018.html‘ html=getHTMLText(url) soup=BeautifulSoup(html,"html.parser") fillUnivList(soup) printUnivList(num) #saveUnivData(u) ‘‘‘ def write_csv(list): name = [‘排名‘, ‘学校名称‘, ‘省份‘, ‘总分‘, ‘生源质量(新生高考成绩得分)‘, ‘培养结果(毕业生就业率)‘, ‘社会声誉(社会捐赠收入·千元)‘, ‘科研规模(论文数量·篇)‘, ‘科研质量(论文质量·FWCI)‘, ‘顶尖成果(高被引论文·篇)‘, ‘顶尖人才(高被引学者·人)‘, ‘科技服务(企业科研经费·千元)‘, ‘成果转化(技术转让收入·千元)‘, ‘学生国际化(留学生比例)‘] name2 = [‘a‘, ‘b‘, ‘c‘] test = pd.DataFrame(columns=name, data=list) test.to_csv(‘D:/我的文件/Python作业/CrawUniv.csv‘, encoding=‘gbk‘)
原文:https://www.cnblogs.com/MARCOREUS11/p/12883202.html