首页 > 编程语言 > 详细

python写csv文件,解决中文乱码问题

时间:2020-03-19 14:55:36      阅读:113      评论:0      收藏:0      [点我收藏+]
# pip install BeautifulSoup4

from bs4 import BeautifulSoup
import requests
import json, time, datetime
import csv, codecs

def getUrlText(url):    #根据url获取html资源,返回html文本
    while True:
        try:
            html = requests.get(url)
            html.encoding = utf-8
            html = html.text
            break
        except requests.exceptions.ConnectionError:
            print(ConnectionError -- please wait 3 seconds)
            time.sleep(3)
        except requests.exceptions.ChunkedEncodingError:
            print(ChunkedEncodingError -- please wait 3 seconds)
            time.sleep(3)    
        except:
            print(Unfortunitely -- An Unknow Error Happened, Please wait 3 seconds)
            time.sleep(3)
    return html

def test():
    url = http://www.sxkszx.cn/news/201989/n378377624.html
    html = getUrlText(url)
    # print(html)
    
    soup = BeautifulSoup(html, features="lxml")
    table = soup.table
    f = codecs.open("1.csv", wb, "gbk")
    w = csv.writer(f)
    w.writerow([院校代码,院校名称,科类,计划性质,最低分])
    for idx, tr in enumerate(table.select(tr)):
        if idx != 0:
            tds = tr.select(td)
            if(tds[0].text.strip().isdigit()):
                w.writerow([tds[0].text.strip(),
                    tds[1].text.strip(),
                    tds[2].text.strip(),
                    tds[3].text.strip(),
                    tds[4].text.strip()])

    f.close()
if __name__ == "__main__":
    test()

 

python写csv文件,解决中文乱码问题

原文:https://www.cnblogs.com/liuyong0076/p/12524130.html

(0)
(0)
   
举报
评论 一句话评论(0
关于我们 - 联系我们 - 留言反馈 - 联系我们:wmxa8@hotmail.com
© 2014 bubuko.com 版权所有
打开技术之扣,分享程序人生!