首页 > 其他 > 详细

天猫淘宝评论数据抓取

时间:2017-12-07 19:53:54      阅读:401      评论:0      收藏:0      [点我收藏+]
import requests
import re,json
import pandas


class base():
    def __init__(self,url):
        self.url = url

    def all_url(self):
        return [self.url + "%s" % i for i in range(1,100)]

    def loads_jsonp(self,_jsonp):
        try:
            return json.loads(re.match(".*?({.*}).*",_jsonp,re.S).group(1))
        except:
            raise ValueError(Invalid Input)

    def url_req(self,url):
        content = requests.get(url).text
        aa = self.loads_jsonp(content)
        return aa

    def taobao_comment(self,data):
        for i in data[comments]:
            data = {}
            data[昵称]=i[user][nick]
            data[评论]=i[content]
            info_list.append(data)

    def tianmao_comment(self,data):
        for i in data[rateList]:
            data = {}
            data[昵称]=i[displayUserNick]
            data[评论]=i[rateContent]
            info_list.append(data)

    def comment(self,url):
        data = self.url_req(url)
        self.tianmao_comment(data) if tmall in url else self.taobao_comment(data)
            

def main(url):
    data = base(url)
    for i in data.all_url():
        data.comment(i)
        print(len(info_list))


if __name__ == "__main__":
    url = https://rate.tmall.com/list_detail_rate.htm?itemId=39258348512&spuId=250685252&sellerId=2106913388&order=3&currentPage=
    info_list = []
    main(url)
    df =pandas.DataFrame(info_list)
    df.to_excel(comments.xlsx,index=False)

 

天猫淘宝评论数据抓取

原文:http://www.cnblogs.com/Erick-L/p/8000637.html

(0)
(0)
   
举报
评论 一句话评论(0
关于我们 - 联系我们 - 留言反馈 - 联系我们:wmxa8@hotmail.com
© 2014 bubuko.com 版权所有
打开技术之扣,分享程序人生!