首页 > 编程语言 > 详细

Python 抓取网易云粉丝 第三种方法 【五】

时间:2020-10-02 16:57:31      阅读:32      评论:0      收藏:0      [点我收藏+]
import scrapy#持续写入
import time

import random

import math

from Crypto.Cipher import AES

import codecs

import base64

import requests

from lxml import etree

import json


class niub:
    def __init__(self):

        self.key = 0CoJUm6Qyw8W8jud

        self.f = 00e0b509f6259df8642dbc35662901477df22677ec152b5ff68ace615bb7b725152b3ab17a876aea8a5aa76d2e417629ec4ee341f56135fccf695280104e0312ecbda92557c93870114af6c9d05c4f7f0c3685b7a46bee255932575cce10b424d813cfe4875d3e82047b97ddef52741d546b8e289dc6935b3ece0462db0a22b8e7

        self.e = 010001

        self.singer_id = 1411492497

        self.post_url1 = https://music.163.com/weapi/user/getfolloweds?csrf_token=

        self.post_url2 = https://music.163.com/weapi/v1/play/record?csrf_token=

    # 生成16个随机字符

    def _generate_random_strs(self, length):

        string = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"

        # 控制次数参数i

        i = 0

        # 初始化随机字符串

        random_strs = ""

        while i < length:
            e = random.random() * len(string)

            # 向下取整

            e = math.floor(e)

            random_strs = random_strs + list(string)[e]

            i = i + 1

        return random_strs

    # AES加密

    def _AESencrypt(self, msg, key):

        # 如果不是16的倍数则进行填充(paddiing)

        padding = 16 - len(msg) % 16

        msg = msg + padding * chr(padding)

        # 用来加密或者解密的初始向量(必须是16位)

        iv = 0102030405060708

        Cipher = AES.new(key.encode(utf-8), AES.MODE_CBC, iv.encode(utf-8))

        # 加密后得到的是bytes类型的数据

        encryptedbytes = Cipher.encrypt(msg.encode(utf8))

        # 使用Base64进行编码,返回byte字符串

        encodestrs = base64.b64encode(encryptedbytes)

        # 对byte字符串按utf-8进行解码

        enctext = encodestrs.decode(utf-8)

        return enctext

    # RSA加密

    def _RSAencrypt(self, randomstrs, key, f):

        # 随机字符串逆序排列

        string = randomstrs[::-1]

        # 将随机字符串转换成byte类型数据

        text = bytes(string, utf-8)

        seckey = int(codecs.encode(text, encoding=hex), 16) ** int(key, 16) % int(f, 16)

        return format(seckey, x).zfill(256)

    def _get_params1(self, page):

        offset = (page - 1) * 20

        msg = {"userId": "1411492497", "offset": + str(
            offset) + , "total": "false", "limit": "20", "csrf_token": ""}

        enctext = self._AESencrypt(msg, self.key)

        # 生成长度为16的随机字符串

        i = self._generate_random_strs(16)

        # 两次AES加密之后得到params的值

        encText = self._AESencrypt(enctext, i)

        # RSA加密之后得到encSecKey的值

        encSecKey = self._RSAencrypt(i, self.e, self.f)

        return encText, encSecKey

    def start_requests(self):
        for i in range(1,2):
            params, encSecKey = self._get_params1(i)
            formdata = {

                params: params, encSecKey: encSecKey

            }
            yield formdata
            return formdata




msg = {"userId": "1411492497", "offset":"1", "total": "false", "limit": "20", "csrf_token": ""}
key = 0CoJUm6Qyw8W8jud
# print(x.start_requests())





def lianjie(params,encSecKey):
    headers = {Host: music.163.com,
               Connection: keep-alive,
               Content-Length: 476,
               Pragma: no-cache,
               Cache-Control: no-cache,
               User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36,
               Content-Type: application/x-www-form-urlencoded,
               Accept: */*,
               Origin: https://music.163.com,
               Sec-Fetch-Site: same-origin,
               Sec-Fetch-Mode: cors,
               Sec-Fetch-Dest: empty,
               Referer: https://music.163.com/user/fans?id=1411492497,
               Accept-Encoding: gzip, deflate, br,
               Accept-Language: zh-CN,zh;q=0.9,
               # ‘Cookie‘: ‘_iuqxldmzr_=32; _ntes_nnid=008eb89f93bb80b8c5abbbfeb29cf783,1601351876041; _ntes_nuid=008eb89f93bb80b8c5abbbfeb29cf783; NMTID=00OjXQcC8_wl8Qc5Eyzj_hKZKF_GlUAAAF02AJ4-A; WM_NI=Nz8nT1vsX8DoejbrC5yMqBrqv70bOcl%2Fe9pgZSO9wSff8VZdQamhdi38Tu5LOB4kn7SaIJfCij4ENk3o9AkK0xpJ9ALg8jqb0bfyIAprddlPL1%2FzcgWpVXoiyEbZoNNKdHQ%3D; WM_NIKE=9ca17ae2e6ffcda170e2e6eea2e47b9bbaaf93b6508d9a8ab7d44b828b9aafb546a2909db9b3489c949eaff22af0fea7c3b92a8ea6f88fd73bb6ad98d9c765a7b6ae93c23fbb93bfa3f17bb6e90083ce69b0f1abdaf36d9a8d81daf659a6baf8b7d97093bfa389e763f1e89c96b8488d9efc8ced3d91bf87abe549abb6fca6cd61f1b1f7a2ea41f4a8c0abbc4b90b9f88bf46af4beaa9acc4bb4aeac85ef5c90efe19be26d95b09ab9ee63bb9baebbee468e95aca9d437e2a3; WM_TID=5UHlf7z1yZ1FQBVRREY%2FJaeDTwOCfBMs; JSESSIONID-WYYY=khAdF6WsaT8Vl%2FBmeuUxNUJzXuSo9AuMAkkyWuiGbGlShWwbk%5CW3flpBsDz0ZTNpKPz8PcvsO%2FYH8jX9F07a5ACh0KqO5O0nAoEJO5W%2FR8yfJSJdCm95FQaQxo7QQzQ%2FfJpypzjeXQI8RO3opWeXr1x7z1GUBQQ2sn4P5sEWeDNkPoSO%3A1601382242186‘

               }


    url = https://music.163.com/weapi/user/getfolloweds?csrf_token=
    # payload = {"params": "9XF7hjblVik3GtlIL+qnj89FSxZniqBCgNQp/MC8sl2Bnh0+UBdiNELWLWE4sH5ewDJBmdgfCJsOGtAV+DHgaBYQ70n4zPJHXxzxh7ZogOe7+UIXMS+Zi9R4W2Yhz9aZ1g8Wl6cz5Fzw70F4thj+tL6YcLRd4ISi1TBFsVikMhb41eFj0PMkAjNH8HysS3B0", "encSecKey": "735f184a826789473c8123979b51608ac82c2be7142ebaa300c8b1787c34155336c6dce64bc9eeb74a66fc9ecb51e20e7e03e354943cfea3ebedec850964cac70740af2c7f5a2f5ac2c4c46071063bcd3b7059722be2d866248bd9915d9b6cea870b06e0d2bc0ceda65ddf6b8de4bded25c6bf2012fe71080ab8d36c962612fa"}
    data = {

        params:params,
        encSecKey:encSecKey,
    }
    # 与 get 请求一样,r 为响应对象
    r = requests.post(url=url, data=data, headers=headers, verify=False)
    # 查看响应结果
    a= []
    for user in json.loads(r.text)[followeds]:
        mingzi = user[nickname]
        guanzhu = user[follows]
        fensi = user[followeds]
        dongtai = user[eventCount]
        shuju = 名字 + ; + str(mingzi) + ; + 动态 + ; + str(dongtai) + ; + 关注 + ; + str(guanzhu) + ; + 粉丝 + ; + str(fensi) + \n
        a.append(shuju)
    a= ‘‘.join(a)
    return a
def writeInfomation(contont):#写到文本

    with open("鱿小鱼.txt", "a+", encoding=utf-8) as f: #定义格式 utf-8
        f.write(contont)
        end = time.time()
        f.close()
    print(end-start)

def main(offset):
    x = niub()
    heji = []
    heji1 = []
    data1 = x._get_params1(offset)
    a = data1[0]
    b = data1[1]
    lianjie(a, b)
    zuihou = lianjie(a, b)
    writeInfomation(zuihou)
if __name__ == __main__:
    start = time.time()
    for i in range(1,3):
        main(offset=i)

这种速度10页以内比第一种方法慢,但是可以抓取上万页,每页速度3秒左右,可以持续抓取,其他两种方法不行。

Python 抓取网易云粉丝 第三种方法 【五】

原文:https://www.cnblogs.com/aotumandaren/p/13761929.html

(0)
(0)
   
举报
评论 一句话评论(0
关于我们 - 联系我们 - 留言反馈 - 联系我们:wmxa8@hotmail.com
© 2014 bubuko.com 版权所有
打开技术之扣,分享程序人生!