首页 > 其他 > 详细

node小爬虫

时间:2017-12-13 21:29:19      阅读:195      评论:0      收藏:0      [点我收藏+]
const http = require(‘http‘);
const fs = require(‘fs‘);
const cheerio = require(‘cheerio‘);
const urlArr = [];
const imgArr = [];
for(var i=1 ; i<=33 ; i++){
    urlArr.push(‘http://www.imooc.com/course/list?page=‘+i);
}

function getImg() {
    urlArr.forEach(function(url) {
        http.get(url, function(res) {
            var html;
            res.on(‘data‘, function(data) {
                html += data;
            });
            res.on(‘end‘, function() {
                var $ = cheerio.load(html);
                $(‘.course-card‘).each(function(index, ele) {
                    var title = $(ele).find(‘.course-card-name‘).text();
                    var imgUrl = $(ele).find(‘.course-banner‘).attr(‘src‘);
                    var videoUrl = ‘http://www.imooc.com‘+$(ele).attr(‘href‘);
                    var text = title + videoUrl;
                    save_img(‘http:‘+imgUrl, title);
                    save_title(title, text);
                });
            });
        });
    });
}

function save_img(url, title) {
    http.get(url, function(res){
        var imgData = "";
        res.setEncoding("binary");
        res.on("data", function(chunk){
            imgData += chunk;
        });
        res.on("end", function(){
            fs.writeFile("node_download/img/"+title+".jpg", imgData, "binary", function(err){
                if(err){
                    console.log(err);
                }else {
                    console.log(title);
                }
            });
        });
    });
}

function save_title(title, text) {
    fs.writeFile("node_download/txt/"+title+".txt", text, ‘utf8‘,function(err){
        if(err){
            console.log(err);
        }else {
            console.log(title);
        }
    });
}

getImg();

 

node小爬虫

原文:http://www.cnblogs.com/huangtonghui/p/8034232.html

(0)
(0)
   
举报
评论 一句话评论(0
关于我们 - 联系我们 - 留言反馈 - 联系我们:wmxa8@hotmail.com
© 2014 bubuko.com 版权所有
打开技术之扣,分享程序人生!