# -*- coding: utf-8 -*-
import requests;
import sys;
import io;
#重点:标准解析库
from bs4 import BeautifulSoup;
sys.stdout = io.TextIOWrapper(sys.stdout.buffer,encoding=‘utf8‘); #改变标准输出的默认编码
#根据cookies访问后台
url = ‘http://域名/other/other.comment.wall.php?ac=l&id=&fid=&uid=&title=&source=0&status=0&b_time=&e_time=‘;
#浏览器登录后得到的cookie,也就是刚才复制的字符串
cookie_str = r‘PHPSESSID=9f20c6bb676841f38aee8589aceb5c7f; username=zhonghuihong; password=XXX‘;
#把cookie字符串处理成字典,以便接下来使用
cookies = {};
for line in cookie_str.split(‘;‘):
key, value = line.split(‘=‘, 1);
cookies[key] = value;
#设置请求头,直接复制即可
headers = {‘User-agent‘:‘Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.76 Mobile Safari/537.36‘};
#在发送get请求时带上请求头和cookies
resp = requests.get(url, headers = headers, cookies = cookies);
html_resp=resp.content.decode(‘UTF-8‘);
#print(resp.content.decode(‘utf-8‘));
soup_string = BeautifulSoup(html_resp, ‘html.parser‘);
soup_table=soup_string.find(attrs={‘class‘:‘table table-striped table-bordered table-hover‘});
#print(soup_table);
soup_str=soup_table.findAll(attrs={‘style‘:‘text-align:center;vertical-align:middle;word-break:break-all; word-wrap:break-all;‘});
print(soup_str);
#for soup in soup_str:
#print(soup.string);
#book_div = soup_string.find(attrs={"id":"book"})
#book_a = book_div.findAll(attrs={"class":"title"})
#for book in book_a:
#print book.string
#print(soup_string);
原文:https://www.cnblogs.com/shenyexiaoqingxin/p/10370261.html