前置環境:
$ sudo apt-get install nodejs npm xfonts-wqy xfonts-kaname
$ sudo ln -s /usr/bin/nodejs /usr/bin/node
$ mkdir -p job/images && cd job
$ npm install webshot
程式主體:
$ vim build.js
var output_dir = 'images';
var concurrent_limit = 10;
var running_task = 0;
var total_task = [
{ domain: 'tw.yahoo.com', url: 'https://tw.yahoo.com' } ,
{ domain: 'facebook.com', url: 'https://facebook.com' } ,
];
function build_website_snapshot() {
while(total_task.length > 0 && running_task < concurrent_limit) {
var item = total_task.shift();
var url = item.url;
var domain = item.domain;
// https://github.com/brenden/node-webshot
webshot(url, output_dir+'/'+domain+'.png', {
screenSize: {
width: 320,
height: 480,
},
shotSize: {
width: 320,
height: 320,
},
timeout: 20000,
renderDelay: 3000,
userAgent: 'Mozilla/5.0 (iPhone; U; CPU iPhone OS 3_2 like Mac OS X; en-us) AppleWebKit/531.21.20 (KHTML, like Gecko) Mobile/7B298g'
}, function(err) {
if(err)
console.log(err);
running_task--;
if (running_task == 0)
console.log('done');
if (total_task.length > 0)
build_website_snapshot();
});
running_task++;
}
}
$ node build.js
如此一來,就稍微搞定批次產出了。若要把產出的東西存進 MySQL DB server,那可以再這樣做:
$ npm install mysql
$ vim import.js
var fs = require('fs');
var path = require('path');
var mysql = require('mysql');
var connection = mysql.createConnection({
host : 'localhost',
user : 'dbuser',
password : 'dbpassword',
database : 'dbname',
});
var scan_source_dir = 'images';
var files = [];
var sql_values = [];
fs.readdirSync(scan_source_dir).filter(function(file){
//console.log(file);
if (fs.statSync(path.join(scan_source_dir, file)).isFile() && file.lastIndexOf('.png') == (file.length - 4)) {
var domain = file.substring(0, file.length - 4);
//files[domain] = fs.readFileSync(path.join(scan_source_dir, file), {encoding: 'binary'});
files[domain] = fs.readFileSync(path.join(scan_source_dir, file));
sql_values.push([domain, files[domain], Math.round(new Date().getTime()/1000), Math.round(new Date().getTime()/1000)]);
}
});
// console.log (sql_values);
/*
CREATE TABLE `snapshot_table ` (
`id` int(11) unsigned NOT NULL AUTO_INCREMENT,
`domain` varchar(64) NOT NULL DEFAULT '',
`image` blob,
`createtime` int(11) DEFAULT NULL,
`updatetime` int(11) DEFAULT NULL,
PRIMARY KEY (`id`),
UNIQUE KEY `domain` (`domain`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
*/
var sql = "INSERT INTO snapshot_table (domain, image, createtime, updatetime) VALUES ? ON DUPLICATE KEY UPDATE image=VALUES(image), updatetime=VALUES(updatetime) ";
connection.query(sql, [sql_values], function(err) {
console.log(err);
});
connection.end();
如此一來,就可以自動掃目錄下符合 *.png 的檔案,並將 binary data 紀錄至 db server 中。
沒有留言:
張貼留言