这算是实现的第一个flag吧,批量获取网站的banner,并写入csv中。获取的info包括:状态码、title、server、content-type。
# -*- coding: utf-8 -*-
import requests
import re
import csv
import chardet
import threading
targets = []
csv_file = ‘jd.csv‘
def get_banner_export_csv():
with open(‘urls.txt‘,‘r‘) as a:
for target in a:
if target not in targets:
targets.append(target)
else:
continue
for url in targets:
url = str(url).strip()
if ‘http‘ or ‘https‘ not in url:
url = ‘http://‘ + url
try:
req = requests.get(url,timeout=(5,20),verify=False,allow_redirects=False) #发出一次请求,禁止302跳转。
if ‘charset‘ not in req.headers.get(‘Content-Type‘," "):
req.encoding = chardet.detect(req.content).get(‘encoding‘) #解决网页编码问题
pattern = re.compile(r‘<title>(.*?)</title>‘,re.S)
title = re.findall(pattern,req.text)[0]
stat_code = str(req.status_code)
if ‘30‘ in stat_code:
location = req.headers[‘Location‘]
else:
location = ‘‘
if ‘Server‘ in req.headers:
server = str(req.headers[‘Server‘])
else:
server = ‘‘
if ‘Content-Type‘ in req.headers:
type = str(req.headers[‘Content-Type‘])
else:
type = ‘‘
if ‘30‘ not in stat_code:
print("{} {} {} {}".format(stat_code,url,title,type,server))
else:
print("{} {} {} {} {}".format(stat_code,url,title,location,type,server))
with open(csv_file,‘a+‘,encoding=‘utf-8‘,newline=‘‘) as f: #写入相关信息
writer = csv.writer(f)
writer.writerow([stat_code,url,title,location,type,server])
except Exception as e:
print(url + ‘ ‘ + str(e))
def main():
thread = threading.Thread(target=get_banner_export_csv,)
thread.start()
with open(csv_file,‘a+‘,encoding=‘utf-8‘,newline=‘‘) as g:
writer = csv.writer(g)
writer.writerow([‘stat_code‘,‘url‘,‘title‘,‘loaction‘,‘type‘,‘server‘])
main()
使用方法:
1.在当目录下urls.txt中放入需要获取banner的url(可有http可无http)。
2.csv文件名/路径需要自己修改
原文:https://www.cnblogs.com/P1g3/p/10735233.html