import requests,bs4,re,xlwt from bs4 import BeautifulSoup def getHTMLText(url): try: r=requests.get(url) r.raise_for_status() r.encoding=r.apparent_encoding return r.text except: print("request failed") def getLocation(addr): demo=getHTMLText(addr) a = re.findall(r"\d+.\d+°\w", demo) latitude = re.split("°", a[0])[0] + re.split("°", a[0])[1] longitude = re.split("°", a[1])[0] + re.split("°", a[1])[1] b = re.search(r"elev. \d+[.\d+] m", demo) ele = re.search(r"\d+[.\d+]", b.group()).group() + "m" return [latitude, longitude, ele] def getObjectData(soup): li=[] for tr in soup.find(‘tbody‘).children: if isinstance(tr,bs4.element.Tag): tds=tr(‘td‘) if len(tds)==1: continue addr="http://volcano.si.edu/"+tds[0].a.attrs[‘href‘] ret=getLocation(addr) li.append([tds[0].string,tds[1].string,ret[0],ret[1],ret[2],tds[2].string]) return li def printUnivList(result,num): # print("{:^20}\t{:^20}\t{:^20}".format("Name","Location","Activity")) # print("火山名".ljust(30)+"\t"+"所属国家".ljust(40)+"\t"+"纬度".ljust(20)+"\t"+"经度".ljust(20)+"\t" # +" 海拔".ljust(20)+"\t"+"新增/持续".ljust(20)) print("火山名".ljust(30) + "所属国家".ljust(40) + "纬度".ljust(20) + "经度".ljust(20) + " 海拔".ljust(20) + "新增/持续".ljust(20)) for i in range(num): # print("{:^20}\t{:^20}\t{:^20}".format(result[i][0],result[i][1],result[i][2])) # print("{:^6}".format(result[i][0])) print(result[i][0].ljust(30)+"\t"+result[i][1].ljust(40)+"\t"+result[i][2].ljust(20)+"\t"+result[i][3].ljust(20) +"\t"+result[i][4].ljust(20)+"\t"+result[i][5].ljust(20)) return result.insert(0,["火山名","所属国家","纬度","经度","海拔","新增/持续"]) def data_write(file_path, datas): f = xlwt.Workbook() sheet1 = f.add_sheet(u‘sheet1‘, cell_overwrite_ok=True) # 创建sheet # 将数据写入第 i 行,第 j 列 i = 0 for data in datas: for j in range(len(data)): sheet1.write(i, j, data[j]) i = i + 1 f.save(file_path) # 保存文件 url="http://volcano.si.edu/reports_weekly.cfm" demo = getHTMLText(url) soup = BeautifulSoup(demo,"html.parser") result = getObjectData(soup) full_result=printUnivList(result,len(result)) file_path=‘/home/guoming/volcano1.xls‘ data_write(file_path,result)
软件运行后屏幕打印如下信息:
火山名 所属国家 纬度 经度 海拔 新增/持续
Fernandina Ecuador 0.37S 91.55W 1476m New
Krakatau Indonesia 6.102S 105.423E 155m New
Kuchinoerabujima Ryukyu Islands (Japan) 30.443N 130.217E 657m New
Taal Luzon (Philippines) 14.002N 120.993E 311m New
Aira Kyushu (Japan) 31.593N 130.657E 1117m Ongoing
Dukono Halmahera (Indonesia) 1.693N 127.894E 1229m Ongoing
Ebeko Paramushir Island (Russia) 50.686N 156.014E 1103m Ongoing
Karangetang Siau Island (Indonesia) 2.781N 125.407E 1797m Ongoing
Klyuchevskoy Central Kamchatka (Russia) 56.056N 160.642E 4754m Ongoing
Popocatepetl Mexico 19.023N 98.622W 5393m Ongoing
Sabancaya Peru 15.787S 71.857W 5960m Ongoing
Sangay Ecuador 2.005S 78.341W 5286m Ongoing
Semisopochnoi United States 51.93N 179.58E 1221m Ongoing
Sheveluch Central Kamchatka (Russia) 56.653N 161.36E 3283m Ongoing
Shishaldin Fox Islands (USA) 54.756N 163.97W 2857m Ongoing
Suwanosejima Ryukyu Islands (Japan) 29.638N 129.714E 796m Ongoing
White Island North Island (New Zealand) 37.52S 177.18E 294m Ongoing
程序获取了http://volcano.si.edu/reports_weekly.cfm中全球火山活动信息,并制作成表格,生成volcano1.xml文件
原文:https://www.cnblogs.com/iceberg710815/p/12209643.html