保存在本地,可惜文件名只是论文编号还需要根据论文题目与编号的映射关系重命名,心累...
1 #coding=utf-8 2 import requests 3 import threading 4 5 def get_file_content(num): 6 savepath = ‘%04d.pdf‘ % (num) 7 suburl = ‘https://www.ijcai.org/proceedings/2019/%04d.pdf‘ % (num) 8 r = requests.get(suburl) 9 f = open(savepath,‘wb‘) # 用‘wb‘读取非文本文件pdf 10 f.write(r.content) # r.content -> requests中的二进制响应内容:以字节的方式访问请求响应体,对于非文本请求 11 f.close() 12 13 def get_file_content_arange(min,max): 14 for num in range(min,max+1): 15 print(‘doanloading %04d.pdf...‘ % (num)) 16 get_file_content(num) 17 18 threads = [] 19 t1 = threading.Thread(target=get_file_content_arange,args=(1,221)) 20 threads.append(t1) 21 t2 = threading.Thread(target=get_file_content_arange, args=(221,440)) 22 threads.append(t2) 23 t3 = threading.Thread(target=get_file_content_arange, args=(440,658)) 24 threads.append(t3) 25 t4 = threading.Thread(target=get_file_content_arange, args=(658,964,)) 26 threads.append(t4) 27 for t in threads: 28 t.start()
Download IJCAI2019 papers by python
原文:https://www.cnblogs.com/wind-chaser/p/11388900.html