首页 > 编程语言 > 详细

python爬虫实现火山周报表格程序

时间:2020-01-18 18:07:56      阅读:88      评论:0      收藏:0      [点我收藏+]
import requests,bs4,re,xlwt
from bs4 import BeautifulSoup
def getHTMLText(url):
    try:
        r=requests.get(url)
        r.raise_for_status()
        r.encoding=r.apparent_encoding
        return r.text
    except:
        print("request failed")

def getLocation(addr):
    demo=getHTMLText(addr)
    a = re.findall(r"\d+.\d+°\w", demo)
    latitude = re.split("°", a[0])[0] + re.split("°", a[0])[1]
    longitude = re.split("°", a[1])[0] + re.split("°", a[1])[1]
    b = re.search(r"elev. \d+[.\d+] m", demo)
    ele = re.search(r"\d+[.\d+]", b.group()).group() + "m"
    return [latitude, longitude, ele]


def getObjectData(soup):
    li=[]
    for tr in soup.find(tbody).children:
        if isinstance(tr,bs4.element.Tag):
            tds=tr(td)
            if len(tds)==1:
                continue
            addr="http://volcano.si.edu/"+tds[0].a.attrs[href]
            ret=getLocation(addr)
            li.append([tds[0].string,tds[1].string,ret[0],ret[1],ret[2],tds[2].string])
    return li

def printUnivList(result,num):
    # print("{:^20}\t{:^20}\t{:^20}".format("Name","Location","Activity"))
    # print("火山名".ljust(30)+"\t"+"所属国家".ljust(40)+"\t"+"纬度".ljust(20)+"\t"+"经度".ljust(20)+"\t"
    #     +" 海拔".ljust(20)+"\t"+"新增/持续".ljust(20))
    print("火山名".ljust(30) + "所属国家".ljust(40) + "纬度".ljust(20) +  "经度".ljust(20) +
          " 海拔".ljust(20) +  "新增/持续".ljust(20))
    for i in range(num):
        # print("{:^20}\t{:^20}\t{:^20}".format(result[i][0],result[i][1],result[i][2]))
        # print("{:^6}".format(result[i][0]))
        print(result[i][0].ljust(30)+"\t"+result[i][1].ljust(40)+"\t"+result[i][2].ljust(20)+"\t"+result[i][3].ljust(20)
              +"\t"+result[i][4].ljust(20)+"\t"+result[i][5].ljust(20))
    return result.insert(0,["火山名","所属国家","纬度","经度","海拔","新增/持续"])

def data_write(file_path, datas):
    f = xlwt.Workbook()
    sheet1 = f.add_sheet(usheet1, cell_overwrite_ok=True)  # 创建sheet

    # 将数据写入第 i 行,第 j 列
    i = 0
    for data in datas:
        for j in range(len(data)):
            sheet1.write(i, j, data[j])
        i = i + 1

    f.save(file_path)  # 保存文件

url="http://volcano.si.edu/reports_weekly.cfm"
demo = getHTMLText(url)
soup = BeautifulSoup(demo,"html.parser")
result = getObjectData(soup)
full_result=printUnivList(result,len(result))
file_path=/home/guoming/volcano1.xls
data_write(file_path,result)

软件运行后屏幕打印如下信息:

火山名                           所属国家                                    纬度                  经度                   海拔                 新增/持续               
Fernandina                        Ecuador                                     0.37S                   91.55W                  1476m                   New                 
Krakatau                          Indonesia                                   6.102S                  105.423E                155m                    New                 
Kuchinoerabujima                  Ryukyu Islands (Japan)                      30.443N                 130.217E                657m                    New                 
Taal                              Luzon (Philippines)                         14.002N                 120.993E                311m                    New                 
Aira                              Kyushu (Japan)                              31.593N                 130.657E                1117m                   Ongoing             
Dukono                            Halmahera (Indonesia)                       1.693N                  127.894E                1229m                   Ongoing             
Ebeko                             Paramushir Island (Russia)                  50.686N                 156.014E                1103m                   Ongoing             
Karangetang                       Siau Island (Indonesia)                     2.781N                  125.407E                1797m                   Ongoing             
Klyuchevskoy                      Central Kamchatka (Russia)                  56.056N                 160.642E                4754m                   Ongoing             
Popocatepetl                      Mexico                                      19.023N                 98.622W                 5393m                   Ongoing             
Sabancaya                         Peru                                        15.787S                 71.857W                 5960m                   Ongoing             
Sangay                            Ecuador                                     2.005S                  78.341W                 5286m                   Ongoing             
Semisopochnoi                     United States                               51.93N                  179.58E                 1221m                   Ongoing             
Sheveluch                         Central Kamchatka (Russia)                  56.653N                 161.36E                 3283m                   Ongoing             
Shishaldin                        Fox Islands (USA)                           54.756N                 163.97W                 2857m                   Ongoing             
Suwanosejima                      Ryukyu Islands (Japan)                      29.638N                 129.714E                796m                    Ongoing             
White Island                      North Island (New Zealand)                  37.52S                  177.18E                 294m                    Ongoing             
程序获取了http://volcano.si.edu/reports_weekly.cfm中全球火山活动信息,并制作成表格,生成volcano1.xml文件

python爬虫实现火山周报表格程序

原文:https://www.cnblogs.com/iceberg710815/p/12209643.html

(0)
(0)
   
举报
评论 一句话评论(0
关于我们 - 联系我们 - 留言反馈 - 联系我们:wmxa8@hotmail.com
© 2014 bubuko.com 版权所有
打开技术之扣,分享程序人生!