下载微博内容的图片

时间：2018-06-06 20:31:55 阅读：171 评论：0 收藏：0 [点我收藏+]

wb_id就是微博内容所在的id，html标签属性为mid

#图片区域,多图
self.multi_media_xpath="//div[@mid=‘{}‘ and not(@minfo)]//div[@class=‘WB_detail‘]//div[@class=‘media_box‘]/ul/li/img/@src"
# 图片区域,单图
self.single_media_xpath="//div[@mid=‘{}‘ and not(@minfo)]//div[@class=‘WB_detail‘]//div[@class=‘media_box‘]/ul/li//img/@src"

    @decorator
    def get_img_list(self,root,wb_id):
        # 判断是否为单图
        imgurllist = []
        single_img_node_list = root.xpath(self.single_media_xpath.format(wb_id))
        # 不包含360长图
        multi_img_node_list = root.xpath(self.multi_media_xpath.format(wb_id))
        if len(multi_img_node_list) > 1:
            imgurllist = ["http:" + i.replace("thumb150", "mw690") for i in multi_img_node_list]
            return imgurllist
        elif single_img_node_list:
            #单图的链接形式
            imgurllist =  ["http:" + i.replace("orj360", "mw690") for i in multi_img_node_list]
        else:
            print("该条内容没有图片")
        return imgurllist

    def save_imge(self,url,id_path,retry=1):
        if retry>3:
            print("重试三次以上，该图片下载失败")
            return None
        filepath=id_path
        urlname=url.split(‘/‘)[-1]
        filename=os.path.join(filepath,urlname)
        if not os.path.exists(filepath):
            os.makedirs(filepath)
        if not os.path.exists(filename):
            while retry<3:
                try:
                   ir=requests.get(url,timeout=10)
                   print("当前下载的url", url, "id", id_path)
                   with open(filename, "wb") as fs:
                        fs.write(ir.content)
                   break
                except:
                    time.sleep(3)
                    print(f"图片下载超时，开始重试，重试次数",retry)
                    retry+=1
                    self.save_imge(url,id_path,retry)
        else:
            print("图片已经存在")

下载微博内容的图片

原文：https://www.cnblogs.com/c-x-a/p/9146192.html

踩

(0)

评论一句话评论（0）

分享档案

更多>

2021年09月23日 (328)
2021年09月24日 (313)
2021年09月17日 (191)
2021年09月15日 (369)
2021年09月16日 (411)
2021年09月13日 (439)
2021年09月11日 (398)
2021年09月12日 (393)
2021年09月10日 (160)
2021年09月08日 (222)