|
|
@ -0,0 +1,74 @@ |
|
|
|
#!/usr/bin/env node |
|
|
|
|
|
|
|
var program = require('commander') |
|
|
|
, read = require('node-readability') |
|
|
|
, he = require('he') |
|
|
|
, pkg = require('../package.json') |
|
|
|
, clc = require('cli-color'); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
program |
|
|
|
.version(pkg.version) |
|
|
|
.option('-c, --color', 'use colors in terminal') |
|
|
|
.option('-f, --format [format]', 'set the format for the result (text or html)', '') |
|
|
|
.parse(process.argv); |
|
|
|
|
|
|
|
var styles = {}; |
|
|
|
styles.h1 = styles.h2 = styles.muted = function(s) { return s; }; |
|
|
|
|
|
|
|
if(program.color) { |
|
|
|
styles.h1 = clc.underline.bold; |
|
|
|
styles.h2 = clc.bold; |
|
|
|
styles.muted = clc.white; |
|
|
|
console.log(clc.reset); |
|
|
|
} |
|
|
|
|
|
|
|
var error = function(msg) { |
|
|
|
if(msg) console.log(clc.red("Error: ")+msg); |
|
|
|
console.log(); |
|
|
|
usage(); |
|
|
|
process.exit(0); |
|
|
|
} |
|
|
|
|
|
|
|
var usage = function() { |
|
|
|
console.log("Usage: readability [url]"); |
|
|
|
console.log(); |
|
|
|
console.log("E.g.: readability http://techcrunch.com/2014/02/12/marc-andreessen-tech-is-still-recovering-from-a-depression"); |
|
|
|
console.log(); |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
if(process.argv.length < 2) return error(); |
|
|
|
var url = process.argv[2]; |
|
|
|
|
|
|
|
if(typeof url!='string' || !url.match(/^https?:\/\//)) return error(url+" is an invalid url"); |
|
|
|
|
|
|
|
console.log(url); |
|
|
|
read(url, function(err, article, meta) { |
|
|
|
// The title of the page. |
|
|
|
console.log(); |
|
|
|
console.log(styles.h1(article.title)); |
|
|
|
console.log(); |
|
|
|
// The main body of the page. |
|
|
|
// console.log(article.content); |
|
|
|
var content = article.content; |
|
|
|
|
|
|
|
if(program.format == 'html') return console.log(content); |
|
|
|
|
|
|
|
content = content.replace(/<h[2-9]>([^<])+<\/h[2-9]>/gi, function(match, header) { |
|
|
|
return styles.h2(match); |
|
|
|
}); |
|
|
|
|
|
|
|
content = content.replace(/ {2,*}|\t/g,' '); |
|
|
|
|
|
|
|
var content = content.replace(/<\/?[^>]+(>|$)/g, ""); |
|
|
|
content = he.decode(content); |
|
|
|
console.log(content); |
|
|
|
|
|
|
|
// The raw HTML code of the page |
|
|
|
//console.log(article.html); |
|
|
|
// The document object of the page |
|
|
|
//console.log(article.document); |
|
|
|
|
|
|
|
}); |