python写csv文件，解决中文乱码问题

时间：2020-03-19 14:55:36 阅读：113 评论：0 收藏：0 [点我收藏+]

# pip install BeautifulSoup4

from bs4 import BeautifulSoup
import requests
import json, time, datetime
import csv, codecs

def getUrlText(url):    #根据url获取html资源,返回html文本
    while True:
        try:
            html = requests.get(url)
            html.encoding = ‘utf-8‘
            html = html.text
            break
        except requests.exceptions.ConnectionError:
            print(‘ConnectionError -- please wait 3 seconds‘)
            time.sleep(3)
        except requests.exceptions.ChunkedEncodingError:
            print(‘ChunkedEncodingError -- please wait 3 seconds‘)
            time.sleep(3)    
        except:
            print(‘Unfortunitely -- An Unknow Error Happened, Please wait 3 seconds‘)
            time.sleep(3)
    return html

def test():
    url = ‘http://www.sxkszx.cn/news/201989/n378377624.html‘
    html = getUrlText(url)
    # print(html)
    
    soup = BeautifulSoup(html, features="lxml")
    table = soup.table
    f = codecs.open("1.csv", ‘wb‘, "gbk")
    w = csv.writer(f)
    w.writerow([‘院校代码‘,‘院校名称‘,‘科类‘,‘计划性质‘,‘最低分‘])
    for idx, tr in enumerate(table.select(‘tr‘)):
        if idx != 0:
            tds = tr.select(‘td‘)
            if(tds[0].text.strip().isdigit()):

w.writerow([tds[0].text.strip(),

tds[1].text.strip(),

tds[2].text.strip(),

tds[3].text.strip(),

tds[4].text.strip()])


    f.close()
if __name__ == "__main__":
    test()

python写csv文件，解决中文乱码问题

原文：https://www.cnblogs.com/liuyong0076/p/12524130.html

踩

(0)

评论一句话评论（0）

分享档案

更多>

2021年09月23日 (328)
2021年09月24日 (313)
2021年09月17日 (191)
2021年09月15日 (369)
2021年09月16日 (411)
2021年09月13日 (439)
2021年09月11日 (398)
2021年09月12日 (393)
2021年09月10日 (160)
2021年09月08日 (222)