天天看點

nodejs爬取豆瓣影評用nodejs寫了個爬取電影心靈奇旅電影熱評的功能。

用nodejs寫了個爬取電影心靈奇旅電影熱評的功能。

//爬取豆瓣心靈奇旅影評,包括使用者首頁頭像

let request = require('request')
let fs = require('fs')
const path = require('path');


var startNum = 0//起始爬取位置

//發送請求
function reqData(url) {
    return new Promise((resolve, reject) => {
        request(url, function (error, response, body) {
            if (error) {
                reject(error)
            } else {
                resolve({
                    response,
                    body
                })
            }

        });
    });
}


//請求處理
async function req(url) {
    let {
        response,
        body
    } = await reqData(url)

	//爬取評論的正規表達式
    let reg1 = /<span class="short">(.*?)<\/span>/igs
    //爬取使用者資訊的正則
    let reg2 = /<a title="(.*?)" href="(.*?)".*?<img src="(.*?)".*?<\/a>/igs

    let users = []
    let res1
    let res2

    let id = startNum
    while ((res1 = reg1.exec(body)) && (res2 = reg2.exec(body))) {

        id++

        let user = {
            id: id,
            userName: res2[1],
            userHome: res2[2],
            userIMage: res2[3],
            comment: res1[1]
        }

        users.push(user)
    }

    return new Promise((resolve, reject) => {
        if (users.length == 0) {
            reject("爬取結果為空!")
        } else {
            resolve(users)
        }
    })

}


async function scrapyComments() {

    //爬取200條資料
    while (startNum < 20 * 10) {
        var goalUrl = "https://movie.douban.com/subject/24733428/comments?start=" + startNum + "&limit=20&status=P&sort=new_score"

        let users = await req(goalUrl);

        startNum += 20;

        let strUser = JSON.stringify(users)

        writeData("/心靈奇旅/comments.json", strUser)
    }

    console.log("爬取成功!");



}

scrapyComments()

//寫入資料
function writeData(fileName, data) {

    let filePath = path.join(__dirname, fileName)

    let dirName = path.dirname(fileName)

    let dirPath = path.join(__dirname, dirName)

	//檔案不存在則建立
    fs.access(dirPath, (err) => {
        if (err) {

            fs.mkdirSync(dirPath, err => {
                console.log(err);
            })
        }
    });

    fs.open(filePath, 'wx', (err, fd) => {
		//寫入
        fs.writeFile(filePath, data, {
            flag: 'a+'
        }, err => {
            if (err) {
                console.error(err)
                return
            }
        })

    })

}
           

注:代碼小白,寫的不好,多指教。