From 2e6ad3afda03e77e0271b5cd3842952c89b1d70d Mon Sep 17 00:00:00 2001 From: Samuel Clay Date: Sat, 13 Apr 2019 15:28:56 -0400 Subject: [PATCH] Adding new node app: original_text. To replace Mercury Reader. Thanks for all the text. --- README.md | 15 +++++---- apps/rss_feeds/text_importer.py | 5 ++- config/haproxy.conf.template | 12 ++++++-- config/monit_node.conf | 5 +++ config/supervisor_node_text.conf | 13 ++++++++ fabfile.py | 4 ++- node/favicons.coffee | 19 ++++++------ node/favicons.js | 22 +++++++------ node/log.coffee | 9 ++++-- node/log.js | 10 +++++- node/original_text.coffee | 32 +++++++++++++++++++ node/original_text.js | 53 ++++++++++++++++++++++++++++++++ node/unread_counts.coffee | 18 +++++++++-- node/unread_counts.js | 21 +++++++++++-- 14 files changed, 200 insertions(+), 38 deletions(-) create mode 100644 config/supervisor_node_text.conf create mode 100644 node/original_text.coffee create mode 100644 node/original_text.js diff --git a/README.md b/README.md index 53969a40a..0479936dc 100755 --- a/README.md +++ b/README.md @@ -32,12 +32,15 @@ used to store stories, read stories, feed/page fetch histories, and proxied sites. * [PostgreSQL](http://www.postgresql.com): Relational database, used to store feeds, subscriptions, and user accounts. - + * [Redis](http://redis.io): Programmer's database, used to assemble stories for the river, store story ids, manage feed fetching schedules, and the minuscule bit of caching that NewsBlur uses. + * [Elasticsearch](http://elasticsearch.org): Search database, use for searching stories. Optional. + ### Client-side and design * [jQuery](http://www.jquery.com): Cross-browser compliant JavaScript code. IE works without effort. * [Underscore.js](http://underscorejs.org/): Functional programming for JavaScript. Indispensible. + * [Backbone.js](http://backbonejs.org/): Framework for the web app. Also indispensible. * Miscellaneous jQuery Plugins: Everything from resizable layouts, to progress bars, sortables, date handling, colors, corners, JSON, animations. [See the complete list](https://github.com/samuelclay/NewsBlur/tree/master/media/js). @@ -191,15 +194,15 @@ these after the installation below. If you are on Ubuntu, you can simply use [Fabric](http://docs.fabfile.org/) to install NewsBlur and its many components. NewsBlur is designed to run on three separate servers: an app server, a db server, and assorted task servers. To install everything on a single -machine, read through `fabfile.py` and setup all three servers without repeating the -`setup_common` steps. +machine, read through `fabfile.py` and setup all three servers (app, db, and task) without +repeating the `setup_common` steps. ### Finishing Installation You must perform a few tasks to tie all of the various systems together. - 1. First, copy local_settings.py and fill in your OAuth keys, S3 keys, database names (if not `newsblur`), -task server/broker address (RabbitMQ), and paths: + 1. First, copy local_settings.py and fill in your OAuth keys, S3 keys, database names + (if not `newsblur`), task server/broker address (RabbitMQ), and paths: cp local_settings.py.template local_settings.py @@ -291,7 +294,7 @@ You got the downtime message either through email or SMS. This is the order of o If you don't get a 502 page, then NewsBlur isn't even reachable and you just need to contact [the hosting provider](https://cloudsupport.digitalocean.com/s/createticket) and yell at them. - 2. Check which servers can't be reached on HAProxy stats page. Basic auth can be found in secrets/configs/haproxy.conf. + 2. Check which servers can't be reached on HAProxy stats page. Basic auth can be found in secrets/configs/haproxy.conf. Search the secrets repo for "gimmiestats". Typically it'll be mongo, but any of the redis or postgres servers can be unreachable due to acts of god. Otherwise, a frequent cause is lack of disk space. There are monitors on every DB diff --git a/apps/rss_feeds/text_importer.py b/apps/rss_feeds/text_importer.py index 462c9c470..5a472ffc8 100644 --- a/apps/rss_feeds/text_importer.py +++ b/apps/rss_feeds/text_importer.py @@ -188,7 +188,10 @@ class TextImporter: mercury_api_key = getattr(settings, 'MERCURY_PARSER_API_KEY', 'abc123') headers["content-type"] = "application/json" headers["x-api-key"] = mercury_api_key - url = "https://mercury.postlight.com/parser?url=%s" % url + if settings.DEBUG: + url = "http://nb.local.com:4040/rss_feeds/original_text_fetcher?url=%s" % url + else: + url = "https://www.newsblur.com/rss_feeds/original_text_fetcher?url=%s" % url try: r = requests.get(url, headers=headers, verify=False) diff --git a/config/haproxy.conf.template b/config/haproxy.conf.template index 97084f7bb..7e5dcbe7f 100644 --- a/config/haproxy.conf.template +++ b/config/haproxy.conf.template @@ -70,11 +70,17 @@ backend node_socket {{ node_socket }} backend node_favicon - http-check expect rstatus 200|503 - option httpchk GET /rss_feeds/icon/1 - balance roundrobin + http-check expect rstatus 200|503 + option httpchk GET /rss_feeds/icon/1 + balance roundrobin {{ node_favicon }} +backend node_text + http-check expect rstatus 200|503 + option httpchk GET /rss_feeds/original_text_fetcher + balance roundrobin +{{ node_text }} + backend self balance roundrobin option httpchk GET /_nginxchk diff --git a/config/monit_node.conf b/config/monit_node.conf index 09bbb587c..5a66672a1 100644 --- a/config/monit_node.conf +++ b/config/monit_node.conf @@ -11,6 +11,11 @@ check file node-newsblur.log with path /srv/newsblur/logs/unread_counts.log if timestamp > 10 minutes then exec "/srv/newsblur/utils/kill_node.sh" # as uid sclay and gid sclay +# If no real-time updates in last 10 minutes, something is wrong +check file node-newsblur.log with path /srv/newsblur/logs/text_server.log + if timestamp > 10 minutes then exec "/srv/newsblur/utils/kill_node.sh" + # as uid sclay and gid sclay + check system node_server if loadavg (1min) > 12 then exec "/srv/newsblur/utils/kill_node.sh" # as uid sclay and gid sclay diff --git a/config/supervisor_node_text.conf b/config/supervisor_node_text.conf new file mode 100644 index 000000000..e0625f635 --- /dev/null +++ b/config/supervisor_node_text.conf @@ -0,0 +1,13 @@ +[program:node_text] +command=node node/text_server.js +directory=/srv/newsblur +environment=NODE_ENV=production +user=sclay +autostart=true +autorestart=true +#redirect_stderr=True +minfds = 10000 +priority=991 +stopsignal=HUP +stdout_logfile = /srv/newsblur/logs/text_server.log +stderr_logfile = /srv/newsblur/logs/error_text_server.log diff --git a/fabfile.py b/fabfile.py index 956c6d08c..4893ddf06 100644 --- a/fabfile.py +++ b/fabfile.py @@ -790,6 +790,7 @@ def setup_node_app(): # run('curl -L https://npmjs.org/install.sh | sudo sh') # sudo('apt-get install npm') sudo('npm install -g supervisor') + sudo('npm install -g @postlight/mercury-parser') sudo('ufw allow 8888') def config_node(): @@ -797,6 +798,7 @@ def config_node(): put('config/supervisor_node_unread.conf', '/etc/supervisor/conf.d/node_unread.conf', use_sudo=True) put('config/supervisor_node_unread_ssl.conf', '/etc/supervisor/conf.d/node_unread_ssl.conf', use_sudo=True) put('config/supervisor_node_favicons.conf', '/etc/supervisor/conf.d/node_favicons.conf', use_sudo=True) + put('config/supervisor_node_text.conf', '/etc/supervisor/conf.d/node_text.conf', use_sudo=True) sudo('supervisorctl reload') @parallel @@ -897,7 +899,7 @@ def build_haproxy(): maintenance_servers = ['app20'] ignore_servers = [] - for group_type in ['app', 'push', 'work', 'node_socket', 'node_favicon', 'www']: + for group_type in ['app', 'push', 'work', 'node_socket', 'node_favicon', 'node_text', 'www']: group_type_name = group_type if 'node' in group_type: group_type_name = 'node' diff --git a/node/favicons.coffee b/node/favicons.coffee index 4c36a6943..d321463cb 100644 --- a/node/favicons.coffee +++ b/node/favicons.coffee @@ -1,19 +1,20 @@ app = require('express')() server = require('http').Server(app) mongo = require 'mongodb' +log = require './log.js' DEV = process.env.NODE_ENV == 'development' MONGODB_SERVER = if DEV then 'localhost' else 'db_mongo' MONGODB_PORT = parseInt(process.env.MONGODB_PORT or 27017, 10) -console.log " ---> Starting NewsBlur Favicon server..." +log.debug "Starting NewsBlur Favicon server..." if !DEV and !process.env.NODE_ENV - console.log " ---> Specify NODE_ENV=" + log.debug "Specify NODE_ENV=" return else if DEV - console.log " ---> Running as development server" + log.debug "Running as development server" else - console.log " ---> Running as production server" + log.debug "Running as production server" if DEV url = "mongodb://#{MONGODB_SERVER}:#{MONGODB_PORT}/newsblur" @@ -21,25 +22,25 @@ else url = "mongodb://#{MONGODB_SERVER}:#{MONGODB_PORT}/newsblur?replicaSet=nbset&readPreference=secondaryPreferred" mongo.MongoClient.connect url, (err, db) => - console.log " ---> Connected to #{db?.serverConfig.s.host}:#{db?.serverConfig.s.port} / #{err}" + log.debug "Connected to #{db?.serverConfig.s.host}:#{db?.serverConfig.s.port} / #{err}" @collection = db?.collection "feed_icons" app.get /\/rss_feeds\/icon\/(\d+)\/?/, (req, res) => feed_id = parseInt(req.params[0], 10) etag = req.header('If-None-Match') - console.log " ---> Feed: #{feed_id} " + if etag then " / #{etag}" else "" + log.debug "Feed: #{feed_id} " + if etag then " / #{etag}" else "" @collection.findOne _id: feed_id, (err, docs) -> if not err and etag and docs and docs?.color == etag - console.log " ---> Cached: #{feed_id}, etag: #{etag}/#{docs?.color} " + if err then "(err: #{err})" else "" + log.debug "Cached: #{feed_id}, etag: #{etag}/#{docs?.color} " + if err then "(err: #{err})" else "" res.sendStatus 304 else if not err and docs and docs.data - console.log " ---> Req: #{feed_id}, etag: #{etag}/#{docs?.color} " + if err then "(err: #{err})" else "" + log.debug "Req: #{feed_id}, etag: #{etag}/#{docs?.color} " + if err then "(err: #{err})" else "" res.header 'etag', docs.color body = new Buffer(docs.data, 'base64') res.set("Content-Type", "image/png") res.status(200).send body else - console.log " ---> Redirect: #{feed_id}, etag: #{etag}/#{docs?.color} " + if err then "(err: #{err})" else "" + log.debug "Redirect: #{feed_id}, etag: #{etag}/#{docs?.color} " + if err then "(err: #{err})" else "" if DEV res.redirect '/media/img/icons/circular/world.png' else diff --git a/node/favicons.js b/node/favicons.js index bb458a544..62b895054 100644 --- a/node/favicons.js +++ b/node/favicons.js @@ -1,6 +1,6 @@ // Generated by CoffeeScript 1.8.0 (function() { - var DEV, MONGODB_PORT, MONGODB_SERVER, app, mongo, server, url; + var DEV, MONGODB_PORT, MONGODB_SERVER, app, log, mongo, server, url; app = require('express')(); @@ -8,21 +8,23 @@ mongo = require('mongodb'); + log = require('./log.js'); + DEV = process.env.NODE_ENV === 'development'; MONGODB_SERVER = DEV ? 'localhost' : 'db_mongo'; MONGODB_PORT = parseInt(process.env.MONGODB_PORT || 27017, 10); - console.log(" ---> Starting NewsBlur Favicon server..."); + log.debug("Starting NewsBlur Favicon server..."); if (!DEV && !process.env.NODE_ENV) { - console.log(" ---> Specify NODE_ENV="); + log.debug("Specify NODE_ENV="); return; } else if (DEV) { - console.log(" ---> Running as development server"); + log.debug("Running as development server"); } else { - console.log(" ---> Running as production server"); + log.debug("Running as production server"); } if (DEV) { @@ -33,7 +35,7 @@ mongo.MongoClient.connect(url, (function(_this) { return function(err, db) { - console.log(" ---> Connected to " + (db != null ? db.serverConfig.s.host : void 0) + ":" + (db != null ? db.serverConfig.s.port : void 0) + " / " + err); + log.debug("Connected to " + (db != null ? db.serverConfig.s.host : void 0) + ":" + (db != null ? db.serverConfig.s.port : void 0) + " / " + err); return _this.collection = db != null ? db.collection("feed_icons") : void 0; }; })(this)); @@ -43,22 +45,22 @@ var etag, feed_id; feed_id = parseInt(req.params[0], 10); etag = req.header('If-None-Match'); - console.log((" ---> Feed: " + feed_id + " ") + (etag ? " / " + etag : "")); + log.debug(("Feed: " + feed_id + " ") + (etag ? " / " + etag : "")); return _this.collection.findOne({ _id: feed_id }, function(err, docs) { var body; if (!err && etag && docs && (docs != null ? docs.color : void 0) === etag) { - console.log((" ---> Cached: " + feed_id + ", etag: " + etag + "/" + (docs != null ? docs.color : void 0) + " ") + (err ? "(err: " + err + ")" : "")); + log.debug(("Cached: " + feed_id + ", etag: " + etag + "/" + (docs != null ? docs.color : void 0) + " ") + (err ? "(err: " + err + ")" : "")); return res.sendStatus(304); } else if (!err && docs && docs.data) { - console.log((" ---> Req: " + feed_id + ", etag: " + etag + "/" + (docs != null ? docs.color : void 0) + " ") + (err ? "(err: " + err + ")" : "")); + log.debug(("Req: " + feed_id + ", etag: " + etag + "/" + (docs != null ? docs.color : void 0) + " ") + (err ? "(err: " + err + ")" : "")); res.header('etag', docs.color); body = new Buffer(docs.data, 'base64'); res.set("Content-Type", "image/png"); return res.status(200).send(body); } else { - console.log((" ---> Redirect: " + feed_id + ", etag: " + etag + "/" + (docs != null ? docs.color : void 0) + " ") + (err ? "(err: " + err + ")" : "")); + log.debug(("Redirect: " + feed_id + ", etag: " + etag + "/" + (docs != null ? docs.color : void 0) + " ") + (err ? "(err: " + err + ")" : "")); if (DEV) { return res.redirect('/media/img/icons/circular/world.png'); } else { diff --git a/node/log.coffee b/node/log.coffee index 60794fabf..134aa0733 100644 --- a/node/log.coffee +++ b/node/log.coffee @@ -1,5 +1,10 @@ info = (username, message) -> timestamp = new Date().toISOString().replace(/T/, ' ').replace(/\..+/, '') console.log "[#{timestamp}] ---> [#{username}] #{message}" - -exports.info = info \ No newline at end of file + +debug = (message) -> + timestamp = new Date().toISOString().replace(/T/, ' ').replace(/\..+/, '') + console.log "[#{timestamp}] ---> #{message}" + +exports.info = info +exports.debug = debug \ No newline at end of file diff --git a/node/log.js b/node/log.js index 105f427ab..d9dca7e05 100644 --- a/node/log.js +++ b/node/log.js @@ -1,6 +1,6 @@ // Generated by CoffeeScript 1.8.0 (function() { - var info; + var debug, info; info = function(username, message) { var timestamp; @@ -8,6 +8,14 @@ return console.log("[" + timestamp + "] ---> [" + username + "] " + message); }; + debug = function(message) { + var timestamp; + timestamp = new Date().toISOString().replace(/T/, ' ').replace(/\..+/, ''); + return console.log("[" + timestamp + "] ---> " + message); + }; + exports.info = info; + exports.debug = debug; + }).call(this); diff --git a/node/original_text.coffee b/node/original_text.coffee new file mode 100644 index 000000000..8c1b707b0 --- /dev/null +++ b/node/original_text.coffee @@ -0,0 +1,32 @@ +Mercury = require('@postlight/mercury-parser') +app = require('express')() +server = require('http').Server(app) +log = require './log.js' + +DEV = process.env.NODE_ENV == 'development' + +log.debug "Starting NewsBlur Original Text Fetcher / Mercury Parser..." +if !DEV and !process.env.NODE_ENV + log.debug "Specify NODE_ENV=" + return +else if DEV + log.debug "Running as development server" +else + log.debug "Running as production server" + +app.get /\/rss_feeds\/original_text_fetcher\/?/, (req, res) => + res.setHeader 'Content-Type', 'application/json' + url = req.query.url + if !url + log.debug "Missing url" + res.end JSON.stringify error: "Missing `url` query parameter." + api_key = req.header 'x-api-key' + if not DEV and (!api_key or api_key.indexOf "djtXZrSIEfDa3Dex9FQ9AR" == -1) + log.debug "Mismatched API key: #{url}" + res.end JSON.stringify error: "Invalid API key. You need to set up your own Original Text server." + + Mercury.parse(url).then (result) => + log.debug "Fetched: #{url}" + res.end JSON.stringify result + +app.listen 4040 diff --git a/node/original_text.js b/node/original_text.js new file mode 100644 index 000000000..482c40bea --- /dev/null +++ b/node/original_text.js @@ -0,0 +1,53 @@ +// Generated by CoffeeScript 1.8.0 +(function() { + var DEV, Mercury, app, log, server; + + Mercury = require('@postlight/mercury-parser'); + + app = require('express')(); + + server = require('http').Server(app); + + log = require('./log.js'); + + DEV = process.env.NODE_ENV === 'development'; + + log.debug("Starting NewsBlur Original Text Fetcher / Mercury Parser..."); + + if (!DEV && !process.env.NODE_ENV) { + log.debug("Specify NODE_ENV="); + return; + } else if (DEV) { + log.debug("Running as development server"); + } else { + log.debug("Running as production server"); + } + + app.get(/\/rss_feeds\/original_text_fetcher\/?/, (function(_this) { + return function(req, res) { + var api_key, url; + res.setHeader('Content-Type', 'application/json'); + url = req.query.url; + if (!url) { + log.debug("Missing url"); + res.end(JSON.stringify({ + error: "Missing `url` query parameter." + })); + } + api_key = req.header('x-api-key'); + if (!DEV && (!api_key || api_key.indexOf("djtXZrSIEfDa3Dex9FQ9AR" === -1))) { + log.debug("Mismatched API key: " + url); + res.end(JSON.stringify({ + error: "Invalid API key. You need to set up your own Original Text server." + })); + } + return Mercury.parse(url).then(function(result) { + log.debug("Fetched: " + url); + return res.end(JSON.stringify(result)); + }); + }; + })(this)); + + app.listen(4040); + +}).call(this); diff --git a/node/unread_counts.coffee b/node/unread_counts.coffee index 1b72e2e16..2ea9dd230 100644 --- a/node/unread_counts.coffee +++ b/node/unread_counts.coffee @@ -2,6 +2,7 @@ fs = require 'fs' redis = require 'redis' log = require './log.js' +DEV = process.env.NODE_ENV == 'development' REDIS_SERVER = if process.env.NODE_ENV == 'development' then 'localhost' else 'db_redis_pubsub' SECURE = !!process.env.NODE_SSL # client = redis.createClient 6379, REDIS_SERVER @@ -11,6 +12,17 @@ SECURE = !!process.env.NODE_SSL # rsub = redis.createClient 6379, REDIS_SERVER # rclient = redis.createClient 6379, REDIS_SERVER + +log.debug "Starting NewsBlur unread count server..." +if !DEV and !process.env.NODE_ENV + log.debug "Specify NODE_ENV=" + return +else if DEV + log.debug "Running as development server" +else + log.debug "Running as production server" + + if SECURE privateKey = fs.readFileSync('/srv/newsblur/config/certificates/newsblur.com.key').toString() certificate = fs.readFileSync('/srv/newsblur/config/certificates/newsblur.com.crt').toString() @@ -22,12 +34,14 @@ if SECURE app = require('https').createServer options io = require('socket.io')(app, path: "/v2/socket.io") app.listen options.port + log.debug "Listening securely on port #{options.port}" else options = port: 8888 app = require('http').createServer() io = require('socket.io')(app, path: "/v2/socket.io") app.listen options.port + log.debug "Listening on port #{options.port}" # io.set('transports', ['websocket']) @@ -48,7 +62,7 @@ io.on 'connection', (socket) -> return socket.on "error", (err) -> - console.log " ---> Error (socket): #{err}" + log.debug "Error (socket): #{err}" socket.subscribe?.quit() socket.subscribe = redis.createClient 6379, REDIS_SERVER socket.subscribe.on "error", (err) => @@ -79,4 +93,4 @@ io.on 'connection', (socket) -> " #{if SECURE then "(SSL)" else "(non-SSL)"}" io.sockets.on 'error', (err) -> - console.log " ---> Error (sockets): #{err}" + log.debug "Error (sockets): #{err}" diff --git a/node/unread_counts.js b/node/unread_counts.js index 67d4e5e66..cc4df3614 100644 --- a/node/unread_counts.js +++ b/node/unread_counts.js @@ -1,6 +1,6 @@ // Generated by CoffeeScript 1.8.0 (function() { - var REDIS_SERVER, SECURE, app, certificate, fs, io, log, options, privateKey, redis; + var DEV, REDIS_SERVER, SECURE, app, certificate, fs, io, log, options, privateKey, redis; fs = require('fs'); @@ -8,10 +8,23 @@ log = require('./log.js'); + DEV = process.env.NODE_ENV === 'development'; + REDIS_SERVER = process.env.NODE_ENV === 'development' ? 'localhost' : 'db_redis_pubsub'; SECURE = !!process.env.NODE_SSL; + log.debug("Starting NewsBlur unread count server..."); + + if (!DEV && !process.env.NODE_ENV) { + log.debug("Specify NODE_ENV="); + return; + } else if (DEV) { + log.debug("Running as development server"); + } else { + log.debug("Running as production server"); + } + if (SECURE) { privateKey = fs.readFileSync('/srv/newsblur/config/certificates/newsblur.com.key').toString(); certificate = fs.readFileSync('/srv/newsblur/config/certificates/newsblur.com.crt').toString(); @@ -25,6 +38,7 @@ path: "/v2/socket.io" }); app.listen(options.port); + log.debug("Listening securely on port " + options.port); } else { options = { port: 8888 @@ -34,6 +48,7 @@ path: "/v2/socket.io" }); app.listen(options.port); + log.debug("Listening on port " + options.port); } io.on('connection', function(socket) { @@ -48,7 +63,7 @@ return; } socket.on("error", function(err) { - return console.log(" ---> Error (socket): " + err); + return log.debug("Error (socket): " + err); }); if ((_ref = socket.subscribe) != null) { _ref.quit(); @@ -96,7 +111,7 @@ }); io.sockets.on('error', function(err) { - return console.log(" ---> Error (sockets): " + err); + return log.debug("Error (sockets): " + err); }); }).call(this);