|
|
- #!/usr/bin/env node
-
- var program = require('commander')
- , read = require('node-readability')
- , he = require('he')
- , pkg = require('../package.json')
- , clc = require('cli-color');
-
-
-
- program
- .version(pkg.version)
- .option('-c, --color', 'use colors in terminal')
- .option('-f, --format [format]', 'set the format for the result (text or html)', '')
- .parse(process.argv);
-
- var styles = {};
- styles.h1 = styles.h2 = styles.muted = function(s) { return s; };
-
- if(program.color) {
- styles.h1 = clc.underline.bold;
- styles.h2 = clc.bold;
- styles.muted = clc.white;
- console.log(clc.reset);
- }
-
- var error = function(msg) {
- if(msg) console.log(clc.red("Error: ")+msg);
- console.log();
- usage();
- process.exit(0);
- }
-
- var usage = function() {
- console.log("Usage: readability [url]");
- console.log();
- console.log("E.g.: readability http://techcrunch.com/2014/02/12/marc-andreessen-tech-is-still-recovering-from-a-depression");
- console.log();
- }
-
-
- if(process.argv.length < 2) return error();
- var url = process.argv[2];
-
- if(typeof url!='string' || !url.match(/^https?:\/\//)) return error(url+" is an invalid url");
-
- console.log(url);
- read(url, function(err, article, meta) {
- // The title of the page.
- console.log();
- console.log(styles.h1(article.title));
- console.log();
- // The main body of the page.
- // console.log(article.content);
- var content = article.content;
-
- if(program.format == 'html') return console.log(content);
-
- content = content.replace(/<h[2-9]>([^<])+<\/h[2-9]>/gi, function(match, header) {
- return styles.h2(match);
- });
-
- content = content.replace(/ {2,*}|\t/g,' ');
-
- var content = content.replace(/<\/?[^>]+(>|$)/g, "");
- content = he.decode(content);
- console.log(content);
-
- // The raw HTML code of the page
- //console.log(article.html);
- // The document object of the page
- //console.log(article.document);
-
- });
|