#coding:utf-8 import requests,codecs import json import re import os, shutil import urllib.request, urllib.error from requests.cookies import RequestsCookieJar import time from PyPDF2 import PdfFileReader, PdfFileWriter, PdfFileMerger import urllib3 import hashlib class calcSig(object): key1 = ‘57218436‘ key2 = ‘15387264‘ rstr = ‘efc84c17‘ def shuffle(self, p1, p2): p = ‘‘ p += p1[int(p2[0], 10) - 1] p += p1[int(p2[1], 10) - 1] p += p1[int(p2[2], 10) - 1] p += p1[int(p2[3], 10) - 1] p += p1[int(p2[4], 10) - 1] p += p1[int(p2[5], 10) - 1] p += p1[int(p2[6], 10) - 1] p += p1[int(p2[7], 10) - 1] return p.lower() # 生成 as和cp字段 def ppp(self, u_md5, u_key1, u_key2): ascp = [0] * 36 ascp[0] = ‘a‘ ascp[1] = ‘1‘ for i in range(0, 8): ascp[2 * (i + 1)] = u_md5[i] ascp[2 * i + 3] = u_key2[i] ascp[2 * i + 18] = u_key1[i] ascp[2 * i + 1 + 18] = u_md5[i + 24] ascp[-2] = ‘e‘ ascp[-1] = ‘1‘ return ‘‘.join(ascp) # 解析url参数 def parseURL(self, url): param_index = url.find(‘?‘) param = url[param_index + 1:] param_list = param.split(‘&‘) param_list.append(‘rstr=‘+self.rstr) param_list = sorted(param_list) result = ‘‘ for a in param_list: tmp = a.split(‘=‘) tmp[1] = tmp[1].replace(‘+‘, ‘a‘) tmp[1] = tmp[1].replace(‘ ‘, ‘a‘) result += tmp[1] return result # 计算md5 def calcMD5(self, str_encode): m = hashlib.md5() m.update(str_encode.encode(‘utf-8‘)) return m.hexdigest() def work(self, url, curtime): url_param = self.parseURL(url) p_md5 = self.calcMD5(url_param) if curtime & 1: p_md5 = self.calcMD5(p_md5) hexTime = hex(curtime)[2:] aa = self.shuffle(hexTime, self.key1) bb = self.shuffle(hexTime, self.key2) sig = self.ppp(p_md5, aa, bb) return (‘%s&as=%s&cp=%s‘ % (url, sig[:18], sig[18:])) # return (sig[:18], sig[18:]) def getVideo_requests(url,video_Name): print(‘>>> 开始下载 ! \n‘) mname=‘%d_%s‘ % (time.time(), mid) tempName_video = os.path.join(DOWNLOAD_PATH,‘%s.pdf‘%mname) # 创建临时文件 try: #cookie_jar = RequestsCookieJar() c = requests.cookies.RequestsCookieJar() s = requests.Session() c.set("JSESSIONID", "94E22BF178A02E950FABC28678D0AABA", domain="eresource.library.sh.cn",path="/") c.set("JSESSIONID", "A4FD3CC5DD934282AA003EC932399FB7", domain="eresource.library.sh.cn",path=‘/objserver/‘) s.cookies.update(c) response = requests.get(url=url, timeout=120,cookies=c,headers=headers,verify=False) print(url) with open(tempName_video, ‘wb‘) as f: f.write(response.content) f.close() except Exception as e: print(e) print(‘>>> %s.pdf 下载完成! ‘%mid) if nowpage==205: mergefiles(DOWNLOAD_PATH,‘out.pdf‘) def getfilenames(filepath=‘‘,filelist_out=[],file_ext=‘all‘): # 遍历filepath下的所有文件,包括子目录下的文件 for fpath, dirs, fs in os.walk(filepath): for f in fs: fi_d = os.path.join(fpath, f) if file_ext == ‘all‘: filelist_out.append(fi_d) elif os.path.splitext(fi_d)[1] == file_ext: filelist_out.append(fi_d) else: pass return filelist_out def mergefiles(path, output_filename, import_bookmarks=False): # 遍历目录下的所有pdf将其合并输出到一个pdf文件中,输出的pdf文件默认带书签,书签名为之前的文件名 # 默认情况下原始文件的书签不会导入,使用import_bookmarks=True可以将原文件所带的书签也导入到输出的pdf文件中 merger = PdfFileMerger() filelist = getfilenames(filepath=path, file_ext=‘.pdf‘) print(filelist) if len(filelist) == 0: print("当前目录及子目录下不存在pdf文件") sys.exit() for filename in filelist: f = codecs.open(filename, ‘rb‘) file_rd = PdfFileReader(f) short_filename = os.path.basename(os.path.splitext(filename)[0]) if file_rd.isEncrypted == True: print(‘不支持的加密文件:%s‘%(filename)) continue merger.append(file_rd, bookmark=short_filename, import_bookmarks=import_bookmarks) print(‘合并文件:%s‘%(filename)) f.close() out_filename=os.path.join(os.path.abspath(path), output_filename) merger.write(out_filename) print(‘合并后的输出文件:%s‘%(out_filename)) merger.close() def jsppost(url,data): print(‘>>> jsp开始 ! \n‘) try: #cookie_jar = RequestsCookieJar() #c = requests.cookies.RequestsCookieJar() #s = requests.Session() #c.set("JSESSIONID", "94E22BF178A02E950FABC28678D0AABA", domain="eresource.library.sh.cn",path="/") #c.set("JSESSIONID", "A4FD3CC5DD934282AA003EC932399FB7", domain="eresource.library.sh.cn",path=‘/objserver/‘) #s.cookies.update(c) response = requests.post(url=url,data=data, timeout=120,cookies=c,headers=headers,verify=False) except Exception as e: print(e) def jsp(url): print(‘>>> jsp开始 ! \n‘) try: #cookie_jar = RequestsCookieJar() c = requests.cookies.RequestsCookieJar() s = requests.Session() c.set("JSESSIONID", "94E22BF178A02E950FABC28678D0AABA", domain="eresource.library.sh.cn",path="/") c.set("JSESSIONID", "A4FD3CC5DD934282AA003EC932399FB7", domain="eresource.library.sh.cn",path=‘/objserver/‘) s.cookies.update(c) #response = requests.get(url=url, timeout=120,cookies=c,headers=headers,verify=False) r = requests.get(url=url, timeout=120,headers=headers,verify=False) tt=json.loads(r.text).get(‘min_time‘) print(tt) #print(json.loads(r.text).get(‘followers‘)) file = r‘mdata.txt‘ with open(file,‘a+‘,encoding=‘utf-8‘) as f: f.write(str(json.loads(r.text).get(‘followers‘))+‘,‘) time.sleep(20) getFollower(str(tt)) except Exception as e: print(e) def run(start_url): video_Name = "myvideo" getVideo_requests(start_url,video_Name) def getFollower(max_time): t = time.time() user_id=‘127178283690750‘ ts=int(t) #秒级时间戳 _rticket=str(int(round(t * 1000))) c = calcSig() url = "https://aweme.snssdk.com/aweme/v1/user/follower/list/?user_id="+user_id+"&max_time="+max_time+"&_rticket="+_rticket+"&ts="+str(ts)+"&count=20&retry_type=no_retry&iid=82879820134&device_id=69189073301&ac=wifi&channel=360_new&aid=1128&app_name=aweme&version_code=168&version_name=1.6.8&device_platform=android&ssmix=a&device_type=Redmi+4A&device_brand=Xiaomi&language=zh&os_api=23&os_version=6.0.1&openudid=29f385a1e0b60ece&manifest_version_code=210&resolution=720*1280&dpi=320&update_version_code=2102" murl=c.work(url,ts) print(c.work(url,ts)) jsp(murl) if __name__ == "__main__": print("加载中....") DOWNLOAD_PATH = r‘E:\\python\\pdf\\‘ #下载目录 urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) #mergefiles(DOWNLOAD_PATH,‘out.pdf‘) """ headers = {"User-Agent":"Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.100 Safari/537.36","Referer": "http://wrd2016.library.sh.cn/channel/stjp/?tdsourcetag=s_pcqq_aiomsg"} requests.packages.urllib3.disable_warnings() jsp(‘http://eresource.library.sh.cn:8080/objserver/jsp/browserjp.jsp‘) time.sleep(0.1) headers = {"User-Agent":"Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.100 Safari/537.36","Referer": "http://eresource.library.sh.cn:8080/objserver/jsp/browserjp.jsp"} jsp(‘http://eresource.library.sh.cn:8080/objserver/jsp/fcjp.jsp‘) time.sleep(0.1) headers = {"User-Agent":"Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.100 Safari/537.36","Referer": "http://eresource.library.sh.cn:8080/objserver/jsp/browserjp.jsp"} jsp(‘http://eresource.library.sh.cn:8080/objserver/jsp/sy.jsp‘) time.sleep(0.1) headers = {"User-Agent":"Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.100 Safari/537.36","Referer": "http://eresource.library.sh.cn:8080/objserver/jsp/browserjp.jsp"} jsp(‘http://eresource.library.sh.cn:8080/objserver/jsp/fy.jsp‘) time.sleep(0.1) fo = open(‘E:\python\pdf\merged_pdf_file.pdf‘, "wb") print("文件名为: ", fo.name) fo.close() """ #""" for i in range(1): #headers = {"Accept":"","Accept-Encoding":"gzip","User-Agent":"com.ss.android.ugc.aweme/750 (Linux; U; Android 6.0.1; zh_CN; Redmi 4A; Build/MMB29M; Cronet/58.0.2991.0)","Cookie": "install_id=82603796791; ttreq=1$1cd2da467f7f9f07d5de1aa593622dc8e06a7c93; odin_tt=cbe4c5411d373bcc1c3d572c33fbd7cd291508f9058b0c999dd5a432c031db5692d05ebc521e96dfce5ebb02201f1934a7bb907f7d37d6a30f308f580d97c3da; qh[360]=1","sdk-version":"1","X-Gorgon":"030428800001f0e89460d16043016864424adf199af7a8b2e148","X-Khronos":"1565712299","X-SS-REQ-TICKET":"1565712299115"} #headers = {"User-Agent":"com.ss.android.ugc.aweme/750 (Linux; U; Android 6.0.1; zh_CN; Redmi 4A; Build/MMB29M; Cronet/58.0.2991.0)","Cookie": "install_id=82603796791; ttreq=1$1cd2da467f7f9f07d5de1aa593622dc8e06a7c93; odin_tt=cbe4c5411d373bcc1c3d572c33fbd7cd291508f9058b0c999dd5a432c031db5692d05ebc521e96dfce5ebb02201f1934a7bb907f7d37d6a30f308f580d97c3da; qh[360]=1","sdk-version":"1","X-Gorgon":"03e0906d00016fa814d014d60446c7c71713b86fe930f83d8ff5","X-Khronos":"1565778310","X-SS-REQ-TICKET":"1565777837303"} headers = {"User-Agent":"com.ss.android.ugc.aweme/750 (Linux; U; Android 6.0.1; zh_CN; Redmi 4A; Build/MMB29M; Cronet/58.0.2991.0)","Cookie": "install_id=82879820134; ttreq=1$737084fed40b6240b948d2e5f0895b8df4af9e2f; odin_tt=8c2caed1fe142cfbbdf2a4b5e3fa397d62dbcd9137cc328c4631dfa0c18120d7cb5797df33708d164e7423e4913134028431d801ee758ebef1c15954d9ca470f; qh[360]=1"} #jsp("https://aweme.snssdk.com/aweme/v1/user/follower/list/?user_id=80954225804&max_time=1565784692&count=20&retry_type=no_retry&mcc_mnc=&iid=82715258125&device_id=69189073301&ac=wifi&channel=360_new&aid=1128&app_name=aweme&version_code=380&version_name=1.8.0&device_platform=android&ssmix=a&device_type=Redmi+4A&device_brand=Xiaomi&language=zh&os_api=23&os_version=6.0.1&openudid=29f385a1e0b60ece&manifest_version_code=380&resolution=720*1280&dpi=320&update_version_code=3802&_rticket=1565784692555&js_sdk_version=1.6.4&mas=0180c7997d71bdad7fd997615978f4f1c98c8c1c4ccc46868c26ec") #jsp("https://aweme.snssdk.com/aweme/v1/user/follower/list/?user_id=127178283690750&max_time=1565919239&count=20&retry_type=no_retry&iid=82803867752&device_id=38693548692&ac=wifi&channel=360_new&aid=1128&app_name=aweme&version_code=168&version_name=1.6.8&device_platform=android&ssmix=a&device_type=M5+Note&device_brand=Meizu&language=zh&os_api=24&os_version=7.0&uuid=865066037588709&openudid=797cadbbed256853&manifest_version_code=210&resolution=1080*1920&dpi=480&update_version_code=2102&_rticket=1565919256352&ts=1565919256&as=a1a5d0056821ed2806&cp=0818d75184625587e1") time.sleep(0.2) t = time.time() user_id=‘103184660180‘ ts=int(t) #秒级时间戳 max_time=‘1565925565‘#str(ts) _rticket=str(int(round(t * 1000))) c = calcSig() #https://aweme.snssdk.com/aweme/v1/user/follower/list/?user_id=103184660180&max_time=1565923611&count=20&retry_type=no_retry&iid=82879820134&device_id=69189073301&ac=wifi&channel=360_new&aid=1128&app_name=aweme&version_code=210&version_name=2.1.0&device_platform=android&ssmix=a&device_type=Redmi+4A&device_brand=Xiaomi&language=zh&os_api=23&os_version=6.0.1&openudid=29f385a1e0b60ece&manifest_version_code=210&resolution=720*1280&dpi=320&update_version_code=2102&_rticket=1565923611254&ts=1565923609&as=a12521d559b1ad39464355&cp=1a17dc5a9562589be1OaWe&mas=01773fb75f7a5086844863aaf3ed01dcd2acaccc2c6c2c9ccc2686 url = "https://aweme.snssdk.com/aweme/v1/user/follower/list/?user_id="+user_id+"&max_time="+max_time+"&_rticket="+_rticket+"&ts="+str(ts)+"&count=20&retry_type=no_retry&iid=82879820134&device_id=69189073301&ac=wifi&channel=360_new&aid=1128&app_name=aweme&version_code=168&version_name=1.6.8&device_platform=android&ssmix=a&device_type=Redmi+4A&device_brand=Xiaomi&language=zh&os_api=23&os_version=6.0.1&openudid=29f385a1e0b60ece&manifest_version_code=210&resolution=720*1280&dpi=320&update_version_code=2102" murl=c.work(url,ts) print(c.work(url,ts)) jsp(murl) #"""
原文:https://www.cnblogs.com/xuan52rock/p/11362902.html