puppeteer(木偶人爬虫)
最近在公司大佬的带领下了解了一下谷歌的木偶人,发现JS无所不能啊。
直接上菜吧(获取bing页面的壁纸到本地)
const puppeteer = require('puppeteer');
const https = require('https');
const fs = require('fs');
//创建浏览器对象
(async ()=> {
const browse = await puppeteer.launch({
headless: false,
devtools: false
}).catch(() => browse.close)
const page = await browse.newPage()
await page.setViewport({width:1280,height:800})
await page.setRequestInterception(true)
page.on('request', request => {
//监控页面所有类型为image的请求并取得其地址
if (request.resourceType() === 'image') {
downLoadAndSave(request.url())
request.continue()
} else{
console.log("continue")
request.continue()
}
})
await page.goto('https://bing.ioliu.cn/')
await autoScroll(page)
})()
//模拟真人滑动网页
async function autoScroll(page) {
console.log('auto scroll start')
await page.evaluate(async () => {
await new Promise((resolve, reject) => {
let totalHeight = 0
let distance = 100
let timer = setInterval(() => {
let scrollHeight = document.body.scrollHeight
window.scrollBy(0, distance)
totalHeight += distance
if(totalHeight >= scrollHeight) {
clearInterval(timer)
resolve()
}
}, 200)
})
})
console.log('auto scroll done')
}
//保存文件
function downLoadAndSave(url) {
https.get(url, (req, res) => {
let imgData = ''
req.setEncoding('binary')
req.on('data', chunk => {
imgData += chunk
})
req.on('end', data => {
let time = new Date().getTime()
fs.writeFile(`image/img_${time}.jpeg`, imgData, 'binary', err => {
if(err){
console.log('保存出错'+err)
}else{
// console.log('保存成功')
}
})
})
})
}
package.json
{
"name": "MuourenTest",
"version": "1.0.0",
"description": "木偶人测试",
"main": "index.js",
"scripts": {
"test:jest": "jest"
},
"author": "xiaohuwei",
"license": "ISC",
"devDependencies": {
"chromedriver": "^75.1.0",
"nightwatch": "^1.1.13"
},
"dependencies": {
"puppeteer": "^1.18.1"
}
}
执行流程和效果
npm i
node request.js
PS:需要在同级目录新建 image
文件夹哦
请问博主的是什么音乐插件,方便发一下插件链接吗
YoduBGM
666
js只差没写操作系统了,真是无所不能,但我不太喜欢
是的
爬这些图片也用不到啊。
这个不是目的@(哈哈)
JS牛逼……感觉都没必要折腾python了……@(惊讶)
js无所不能