安装node
cd /usr/local/src/
wget http://nodejs.org/dist/v0.10.24/node-v0.10.24.tar.gz
tar zxvf node-v0.10.24.tar.gz
cd node-v0.10.24
./configure --prefix=/usr/local/node/0.10.24
make
make install
node -v
安装完node之后npm就安装好了
vagrant@homestead:~/code/Ecc3.0_System01$ npm -v
切换cnpm国内源
╰$ npm install -g cnpm --registry=https://registry.npm.taobao.org
安装puppeteer
╰$ cnpm i puppeteer
测试脚本
const puppeteer = require('puppeteer');
(async () => {
try {
const browser = await puppeteer.launch();
const page = await browser.newPage();
await page.goto('https://www.baidu.com/');
// 等待时间
await page.waitFor(1000*2);
// 截图
await page.screenshot({path:'./output/baidu.png',fullPage:true});
// pdf
await page.pdf({path:'./output/baidu.pdf',format:"A4",printBackground:true});
// 输入内容
await page.type('#kw','Python',{delay:true});
// 触发内容
await page.click('#su');
// 等待时间
await page.waitFor(1000*5);
await page.setViewport({
width:1920,
height:1080
});
// 截图
await page.screenshot({path:'./output/baidu_python.png',fullPage:true});
// pdf
await page.pdf({path:'./output/baidu_python.pdf',format:"a4",printBackground:true});
await browser.close();
} catch (error) {
console.log(`this is the ${error}`);
}
})();
结果
批量下载 Es6文档
const puppeteer = require("puppeteer");
(async() => {
try {
const browser = await puppeteer.launch({
headless:true,
args: [
'–disable-gpu', // GPU硬件加速
'–disable-dev-shm-usage', // 创建临时文件共享内存
'–disable-setuid-sandbox', // uid沙盒
'–no-first-run', // 没有设置首页。在启动的时候,就会打开一个空白页面。
'–no-sandbox', // 沙盒模式
'–no-zygote',
'–single-process' // 单进程运行
]
});
const page = await browser.newPage();
await page.goto('http://es6.ruanyifeng.com/#README',{
'timeout': 0 //无限大
});
let aTags = await (await page).evaluate(() => {
let as = [...document.querySelectorAll('ol li a')];
return as.map((a) =>{
return {
href: a.href.trim(),
name: a.text
}
});
});
// console.log(aTags)
// await page.pdf({path: `./output/${aTags[0].name}.pdf`,format:'a4',printBackground:true});
for (var i = 1; i < aTags.length;i++){
pageS = await browser.newPage();
var a = aTags[i];
console.log("完成个数:"+i);
await pageS.goto(a.href,{'timeout': 0});
await pageS.pdf({path: `/Users/shiyuxiang/develop/www/Ecc3.0_System01/output/${a.name}.pdf`,format:'a4'});
pageS.close();
}
console.log("完成")
browser.close();
} catch (err) {
console.log(`this is the ${err}`);
}
})();
执行
╰$ node crawl.js
结果
将PDF可并到一块
╰$ sudo apt-get install pdftk
╰$ cnpm i pdf-merge
脚本
const PDFMerge = require('pdf-merge');
const path = require('path');
const fs = require('fs');
/**
* @desc 返回路径
* @param {String} dir, dir2 字符串
* @return {String} 路径
*/
function resolve(dir, dir2 = ''){
return path.posix.join(__dirname, './', dir, dir2);
}
// 配置
const config = {
entry: './output/',
output: './data/'
};
//
const filenameArr = fs.readdirSync(resolve(config.entry));
const sortedFilenameArr = filenameArr.sort((str1, str2) => {
let regex = /^(\d{1,2})\./;
let a = +str1.match(regex);
let b = +str2.match(regex);
return a - b;
});
// console.log(sortedFilenameArr);
const files = sortedFilenameArr.map((el) => {
return resolve(`${config.entry}${el}`);
});
console.log('files', files);
const outputPath = resolve(config.output);
const isExists = fs.existsSync(outputPath);
console.log('isExists', isExists, 'outputPath', outputPath);
/**
* @desc 创建输出路径
*/
function mkdirOutputpath(){
try{
fs.mkdirSync(outputPath);
console.log('mkdir is successful!');
} catch(e){
console.log('mkdir is failed!', e);
}
};
// 如果不存在 则创建
if(!isExists){
mkdirOutputpath();
}
console.log('let\'s start merge...');
const filename = `ES6 入门教程-${Date.now()}.pdf`;
// console.log(filename);
const output = resolve(`${config.output}${filename}`);
// console.log(output);
// Save as new file
PDFMerge(files, {
output: output,
})
.then((buffer) => {
console.log('merge is successful!');
});
本文暂时没有评论,来添加一个吧(●'◡'●)