首页 > 其他 > 详细

第一个爬虫和测试

时间:2020-05-13 18:57:42      阅读:44      评论:0      收藏:0      [点我收藏+]

1.完善并测试球赛程序

from random import random
#第一阶段
def printIntro():
    print("学号尾数是37")
    print("这个程序模拟两个选手A和B的羽毛球比赛")
    print("程序运行需要A和B的能力值(以0到1之间的小数表示)")
try:
    printIntro()
except:
    print("printIntro Error")    
def getInputs():
    a = eval(input("请输入选手A的能力值(0-1): "))
    b = eval(input("请输入选手B的能力值(0-1): "))
    n = eval(input("模拟比赛的场次: "))
    return a, b, n
try:
    getInputs()
except:
    print("getInputs Error")
def simNGames(n, probA, probB):
    winsA, winsB = 0, 0
    for i in range(n):    #将模拟n场比赛分解为n次模拟一场比赛
        scoreA, scoreB = simOneGame(probA, probB)
        if scoreA > scoreB:
            winsA += 1
        else:
            winsB += 1
    return winsA, winsB
try: 
    simNGames(1000, 0.7, 0.6)
except:
    print("getInputs Error")
def printSummary(winsA, winsB):
    n = winsA + winsB
    print("羽毛球比赛分析开始,共模拟{}场比赛".format(n))
    print("选手A获胜{}场比赛,占比{:0.1%}".format(winsA, winsA/n))
    print("选手B获胜{}场比赛,占比{:0.1%}".format(winsB, winsB/n))
try:
    printSummary(652, 348)
except:
    print("Error")
def main():
    printIntro()
    probA, probB, n = getInputs()
    winsA, winsB = simNGames(n, probA, probB)
    printSummary(winsA, winsB)

#第二阶段    
def simOneGame(probA, probB):
    scoreA, scoreB = 0, 0
    serving = "A"
    while not gameOver(scoreA, scoreB):
        if serving == "A":
            if random() < probA:
                scoreA += 1
            else:
                serving="B"
        else:
            if random() < probB:
                scoreB += 1
            else:
                serving="A"
    return scoreA, scoreB
try:
    simOneGame(0.7, 0.6)
except:
    print("simOneGame Error")
#第三阶段
def gameOver(a,b):
    if (a>=20 and b>=20):
        if(abs(a-b)==2 and a<=29 and b<=29):
            return True

        else:
            return a==30 or b==30
    else:
        return False
try:
    gameOver(30,27)
except:
    print("Error")
main()

  

2.采用requests库的get函数访问360搜索网页

import requests
for i in range(20):
    r = requests.get("http://www.so.com/")
r.encoding= ‘utf-8‘
print(r.text)
print(type(r.text))
print(len(r.text))
print(type(r.content))
print(len(r.content))

 结果显示:

<!DOCTYPE html><!--[if lt IE 7 ]><html class="ie6"><![endif]--><!--[if IE 7 ]><html class="ie7"><![endif]--><!--[if IE 8 ]><html class="ie8"><![endif]--><!--[if IE 9 ]><html class="ie9"><![endif]--><!--[if (gt IE 9)|!(IE)]><!--><html class="w3c"><!--<![endif]--><head><meta charset="utf-8"><title>360搜索</title><link rel="dns-prefetch" href="//s0.qhimg.com"><link rel="dns-prefetch" href="//s1.qhimg.com"><link rel="dns-prefetch" href="//p0.qhimg.com"><link rel="dns-prefetch" href="//p1.qhimg.com">
<meta name="keywords" content="360搜索,360搜索,网页搜索,视频搜索,图片搜索,音乐搜索,新闻搜索,软件搜索,学术搜索">
<meta name="description" content="360搜索是安全、精准、可信赖的新一代搜索引擎,依托于360母品牌的安全优势,全面拦截各类钓鱼欺诈等恶意网站,提供更放心的搜索服务。 360搜索 so靠谱。">
<style type="text/css">html,body{height:100%}html,body,form,input,span,p,img,ul,ol,li,dl,dt,dd{margin:0;padding:0;border:0}ul,ol{list-style:none}body{background:#fff;font:12px/1.5 arial,sans-serif;text-align:center}a{text-decoration:none}a:hover{text-decoration:underline}.page-wrap{position:relative;min-height:100%;_height:100%}#main{width:700px;margin:0 auto;padding:100px 0}#logo{margin:0 auto 55px;position:relative;left:-10px}#logo div{width:203px;height:72px;background-image:url(http://p0.qhimg.com/t01cbf97e6893738583.png);background-image:-webkit-image-set(url(http://p1.qhimg.com/t01cbf97e6893738583.png) 1x,url(http://p0.qhimg.com/t015dc0adab50c8e912.png) 2x);margin:0 auto}#so-nav-tabs{font-size:14px;text-align:left;padding-left:75px;position:relative;z-index:999}#so-nav-tabs a,#so-nav-tabs strong{height:31px;line-height:31px;display:inline-block;margin-right:18px;*margin-right:15px;color:#666}#so-nav-tabs a:hover{color:#3eaf0e;text-decoration:none}#so-nav-tabs strong{color:#3eaf0e}#search-box{padding-left:40px;text-align:left}#input-container{width:500px;height:36px;display:inline-block;border:1px solid #bbb;box-shadow:0 2px 1px #f0f0f0;position:relative;z-index:1}#suggest-align{height:32px;position:relative}#input{width:485px;height:22px;margin:7px 0 5px 8px;outline:0;background:#fff;font-size:16px;line-height:22px;vertical-align:top}#search-button{width:100px;height:38px;_height:40px;display:inline-block;margin-left:5px;outline:0;border:1px solid #3eaf0e;*border:0;box-shadow:0 1px 1px rgba(0,0,0,0.2);-webkit-box-shadow:0 1px 1px rgba(0,0,0,0.2);-moz-box-shadow:0 1px 1px rgba(0,0,0,0.2);background:url(http://p1.qhimg.com/d/_onebox/btn-98-114.png) no-repeat #3eaf0e;color:#fff;font:bold 16px arial,sans-serif;vertical-align:top;cursor:pointer}#search-button.hover{border:1px solid #4bbe11;*border:0;background-position:0 -38px}#search-button.mousedown{border:1px solid #4bbe11;*border:0;background-position:0 -76px}#footer{width:100%;height:36px;line-height:36px;text-align:left;color:#eaeaea;position:absolute;left:0;bottom:0}#footer p{margin:0 20px}#footer a{color:#959595;margin:0 5px 0 3px}#footer span{float:right;font-style:normal;color:#959595}</style>
</head>

<body>
<div class="page-wrap">
    <div id="main">
        <div id="logo"><div></div></div>
        <div id="so-nav-tabs">
            <a href="http://sh.qihoo.com/?src=tab_web">新闻</a>
            <strong>网页</strong>
            <a href="http://wenda.so.com/?src=tab_web">问答</a>
            <a href="http://video.so.com/?src=tab_web">视频</a>
            <a href="http://image.so.com/?src=tab_web">图片</a>
            <a href="http://music.so.com/?src=tab_web">音乐</a>
            <a href="http://map.so.com/?src=tab_web">地图</a>
            <a href="http://baike.so.com/?src=tab_web">百科</a>
            <a href="http://ly.so.com/?src=tab_web">良医</a>
        </div>
        <div id="search-box">
            <form action="/s">
                <span id="input-container">
                    <input type="hidden" name="ie" value="utf-8">
                    <input type="hidden" name="shb" value="1">
                    <input type="hidden" name="src" id="from" value="noscript_home">                    
                    <div id="suggest-align">
                        <input type="text" name="q" id="input"  suggestWidth="501px" autocomplete="off" x-webkit-speech><cite id="suggest-tp"></cite>
                    </div>
                    <div id="sug-arrow"><span id="sug-new"></span></div>
                </span><input type="submit" id="search-button" value="搜一下">
            </form>
        </div>
    </div>
    <div id="footer">
        <p>
            <span>Copyright&nbsp;&copy;&nbsp;360.CN&nbsp;&nbsp;京ICP备08010314号-19&nbsp;&nbsp;京公网安备110000000006号</span>
            <a href="http://info.so.com/feedback.html">意见反馈</a>|
            <a href="http://zhanzhang.so.com">站长平台</a>|
            <a href="http://info.so.com/about.html">关于我们</a>|
            <a href="http://www.so.com/help/help_1_1.html">使用帮助</a>|
            <a href="http://www.so.com/help/help_iduty.html">使用前必读</a>|
            <a href="http://e.360.cn?src=srp">推广合作</a>
        </p>
    </div>
</div>
<img src="//s.qhupdate.com/so/click.gif?pro=so&pid=home&mod=noscriptpage" style="display:none">
</body>
</html>
<class ‘str‘>
4986
<class ‘bytes‘>
5294

3.用HTML完成计算要求
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8"> 
<title>菜鸟教程(runoob.com)39</title>

</head>

<body>
<h1>我的第一个标题</h1>
<p id="first">我的第一个段落。</p>
</body>
	<table border = "1">
<tr>
	<td> row 1,cell 1</td>
	<td> row 1,cell 2</td>
</tr>	
<tr>
	<td> row 2,cell 1</td>
	<td> row 2,cell 2</td>
</tr>
	</table>
</html>

  结果显示

  技术分享图片

4.爬中国大学排名网站内容
import requests
from bs4 import BeautifulSoup
allUniv = []
def getHTMLText(url):
    try:
        r = requests.get(url,timeout=30)
        r.raise_for_status()
        r.encoding = ‘utf-8‘
        return r.text
    except:
        return ""
def fillUnivList(soup):
    data = soup.find_all(‘tr‘)
    for tr in data:
        ltd = tr.find_all(‘td‘)
        if len(ltd) == 0:
            continue
        singleUniv = []
        for td in ltd:
            singleUniv.append(td.string)
        allUniv.append(singleUniv)
def printUnivList(num):
    print("{1:^2}{2:{0}^10}{3:{0}^6}{4:{0}^4}{5:{0}^10}".format(chr(12288),"排名","学校名称","省市","总分","培养规模"))
    for i in range(num):
        u = allUniv[i]
        print("{1:^4}{2:{0}^10}{3:{0}^5}{4:{0}^8.1f}{5:{0}^10}".format(chr(12288),u[0],u[1],u[2],eval(u[3]),u[6]))
def main(num):
    url = "http://www.zuihaodaxue.com/zuihaodaxuepaiming2018.html"
    html = getHTMLText(url)
    soup = BeautifulSoup(html,"html.parser")
    fillUnivList(soup)
    printUnivList(num)
main(10)

  结果显示

排名   学校名称   省市      总分    培养规模   
1    清华大学    北京    95.3      1182145  
2    北京大学    北京    78.6      665616  
3    浙江大学    浙江    73.9    452414  
4   上海交通大学   上海    73.1    226279  
5    复旦大学    上海    66.0    257468  
6  中国科学技术大学  安徽    61.9    63406   
7    南京大学    江苏    59.8    84668   
8   华中科技大学   湖北    59.1    29666   
9    中山大学    广东    58.6    148621  
10  哈尔滨工业大学  黑龙江       57.4       58319  

 




 

第一个爬虫和测试

原文:https://www.cnblogs.com/Mindf/p/12883942.html

(0)
(0)
   
举报
评论 一句话评论(0
关于我们 - 联系我们 - 留言反馈 - 联系我们:wmxa8@hotmail.com
© 2014 bubuko.com 版权所有
打开技术之扣,分享程序人生!