Adding new node app: original_text. To replace Mercury Reader. Thanks for all the text.

This commit is contained in:
Samuel Clay 2019-04-13 15:28:56 -04:00
parent 67d5142bbe
commit 2e6ad3afda
14 changed files with 200 additions and 38 deletions

View file

@ -32,12 +32,15 @@
used to store stories, read stories, feed/page fetch histories, and proxied sites.
* [PostgreSQL](http://www.postgresql.com): Relational database, used to store feeds,
subscriptions, and user accounts.
* [Redis](http://redis.io): Programmer's database, used to assemble stories for the river, store story ids, manage feed fetching schedules, and the minuscule bit of caching that NewsBlur uses.
* [Elasticsearch](http://elasticsearch.org): Search database, use for searching stories. Optional.
### Client-side and design
* [jQuery](http://www.jquery.com): Cross-browser compliant JavaScript code. IE works without effort.
* [Underscore.js](http://underscorejs.org/): Functional programming for JavaScript.
Indispensible.
* [Backbone.js](http://backbonejs.org/): Framework for the web app. Also indispensible.
* Miscellaneous jQuery Plugins: Everything from resizable layouts, to progress
bars, sortables, date handling, colors, corners, JSON, animations.
[See the complete list](https://github.com/samuelclay/NewsBlur/tree/master/media/js).
@ -191,15 +194,15 @@ these after the installation below.
If you are on Ubuntu, you can simply use [Fabric](http://docs.fabfile.org/) to install
NewsBlur and its many components. NewsBlur is designed to run on three separate servers:
an app server, a db server, and assorted task servers. To install everything on a single
machine, read through `fabfile.py` and setup all three servers without repeating the
`setup_common` steps.
machine, read through `fabfile.py` and setup all three servers (app, db, and task) without
repeating the `setup_common` steps.
### Finishing Installation
You must perform a few tasks to tie all of the various systems together.
1. First, copy local_settings.py and fill in your OAuth keys, S3 keys, database names (if not `newsblur`),
task server/broker address (RabbitMQ), and paths:
1. First, copy local_settings.py and fill in your OAuth keys, S3 keys, database names
(if not `newsblur`), task server/broker address (RabbitMQ), and paths:
cp local_settings.py.template local_settings.py
@ -291,7 +294,7 @@ You got the downtime message either through email or SMS. This is the order of o
If you don't get a 502 page, then NewsBlur isn't even reachable and you just need to contact [the
hosting provider](https://cloudsupport.digitalocean.com/s/createticket) and yell at them.
2. Check which servers can't be reached on HAProxy stats page. Basic auth can be found in secrets/configs/haproxy.conf.
2. Check which servers can't be reached on HAProxy stats page. Basic auth can be found in secrets/configs/haproxy.conf. Search the secrets repo for "gimmiestats".
Typically it'll be mongo, but any of the redis or postgres servers can be unreachable due to
acts of god. Otherwise, a frequent cause is lack of disk space. There are monitors on every DB

View file

@ -188,7 +188,10 @@ class TextImporter:
mercury_api_key = getattr(settings, 'MERCURY_PARSER_API_KEY', 'abc123')
headers["content-type"] = "application/json"
headers["x-api-key"] = mercury_api_key
url = "https://mercury.postlight.com/parser?url=%s" % url
if settings.DEBUG:
url = "http://nb.local.com:4040/rss_feeds/original_text_fetcher?url=%s" % url
else:
url = "https://www.newsblur.com/rss_feeds/original_text_fetcher?url=%s" % url
try:
r = requests.get(url, headers=headers, verify=False)

View file

@ -70,11 +70,17 @@ backend node_socket
{{ node_socket }}
backend node_favicon
http-check expect rstatus 200|503
option httpchk GET /rss_feeds/icon/1
balance roundrobin
http-check expect rstatus 200|503
option httpchk GET /rss_feeds/icon/1
balance roundrobin
{{ node_favicon }}
backend node_text
http-check expect rstatus 200|503
option httpchk GET /rss_feeds/original_text_fetcher
balance roundrobin
{{ node_text }}
backend self
balance roundrobin
option httpchk GET /_nginxchk

View file

@ -11,6 +11,11 @@ check file node-newsblur.log with path /srv/newsblur/logs/unread_counts.log
if timestamp > 10 minutes then exec "/srv/newsblur/utils/kill_node.sh"
# as uid sclay and gid sclay
# If no real-time updates in last 10 minutes, something is wrong
check file node-newsblur.log with path /srv/newsblur/logs/text_server.log
if timestamp > 10 minutes then exec "/srv/newsblur/utils/kill_node.sh"
# as uid sclay and gid sclay
check system node_server
if loadavg (1min) > 12 then exec "/srv/newsblur/utils/kill_node.sh"
# as uid sclay and gid sclay

View file

@ -0,0 +1,13 @@
[program:node_text]
command=node node/text_server.js
directory=/srv/newsblur
environment=NODE_ENV=production
user=sclay
autostart=true
autorestart=true
#redirect_stderr=True
minfds = 10000
priority=991
stopsignal=HUP
stdout_logfile = /srv/newsblur/logs/text_server.log
stderr_logfile = /srv/newsblur/logs/error_text_server.log

4
fabfile.py vendored
View file

@ -790,6 +790,7 @@ def setup_node_app():
# run('curl -L https://npmjs.org/install.sh | sudo sh')
# sudo('apt-get install npm')
sudo('npm install -g supervisor')
sudo('npm install -g @postlight/mercury-parser')
sudo('ufw allow 8888')
def config_node():
@ -797,6 +798,7 @@ def config_node():
put('config/supervisor_node_unread.conf', '/etc/supervisor/conf.d/node_unread.conf', use_sudo=True)
put('config/supervisor_node_unread_ssl.conf', '/etc/supervisor/conf.d/node_unread_ssl.conf', use_sudo=True)
put('config/supervisor_node_favicons.conf', '/etc/supervisor/conf.d/node_favicons.conf', use_sudo=True)
put('config/supervisor_node_text.conf', '/etc/supervisor/conf.d/node_text.conf', use_sudo=True)
sudo('supervisorctl reload')
@parallel
@ -897,7 +899,7 @@ def build_haproxy():
maintenance_servers = ['app20']
ignore_servers = []
for group_type in ['app', 'push', 'work', 'node_socket', 'node_favicon', 'www']:
for group_type in ['app', 'push', 'work', 'node_socket', 'node_favicon', 'node_text', 'www']:
group_type_name = group_type
if 'node' in group_type:
group_type_name = 'node'

View file

@ -1,19 +1,20 @@
app = require('express')()
server = require('http').Server(app)
mongo = require 'mongodb'
log = require './log.js'
DEV = process.env.NODE_ENV == 'development'
MONGODB_SERVER = if DEV then 'localhost' else 'db_mongo'
MONGODB_PORT = parseInt(process.env.MONGODB_PORT or 27017, 10)
console.log " ---> Starting NewsBlur Favicon server..."
log.debug "Starting NewsBlur Favicon server..."
if !DEV and !process.env.NODE_ENV
console.log " ---> Specify NODE_ENV=<development,production>"
log.debug "Specify NODE_ENV=<development,production>"
return
else if DEV
console.log " ---> Running as development server"
log.debug "Running as development server"
else
console.log " ---> Running as production server"
log.debug "Running as production server"
if DEV
url = "mongodb://#{MONGODB_SERVER}:#{MONGODB_PORT}/newsblur"
@ -21,25 +22,25 @@ else
url = "mongodb://#{MONGODB_SERVER}:#{MONGODB_PORT}/newsblur?replicaSet=nbset&readPreference=secondaryPreferred"
mongo.MongoClient.connect url, (err, db) =>
console.log " ---> Connected to #{db?.serverConfig.s.host}:#{db?.serverConfig.s.port} / #{err}"
log.debug "Connected to #{db?.serverConfig.s.host}:#{db?.serverConfig.s.port} / #{err}"
@collection = db?.collection "feed_icons"
app.get /\/rss_feeds\/icon\/(\d+)\/?/, (req, res) =>
feed_id = parseInt(req.params[0], 10)
etag = req.header('If-None-Match')
console.log " ---> Feed: #{feed_id} " + if etag then " / #{etag}" else ""
log.debug "Feed: #{feed_id} " + if etag then " / #{etag}" else ""
@collection.findOne _id: feed_id, (err, docs) ->
if not err and etag and docs and docs?.color == etag
console.log " ---> Cached: #{feed_id}, etag: #{etag}/#{docs?.color} " + if err then "(err: #{err})" else ""
log.debug "Cached: #{feed_id}, etag: #{etag}/#{docs?.color} " + if err then "(err: #{err})" else ""
res.sendStatus 304
else if not err and docs and docs.data
console.log " ---> Req: #{feed_id}, etag: #{etag}/#{docs?.color} " + if err then "(err: #{err})" else ""
log.debug "Req: #{feed_id}, etag: #{etag}/#{docs?.color} " + if err then "(err: #{err})" else ""
res.header 'etag', docs.color
body = new Buffer(docs.data, 'base64')
res.set("Content-Type", "image/png")
res.status(200).send body
else
console.log " ---> Redirect: #{feed_id}, etag: #{etag}/#{docs?.color} " + if err then "(err: #{err})" else ""
log.debug "Redirect: #{feed_id}, etag: #{etag}/#{docs?.color} " + if err then "(err: #{err})" else ""
if DEV
res.redirect '/media/img/icons/circular/world.png'
else

View file

@ -1,6 +1,6 @@
// Generated by CoffeeScript 1.8.0
(function() {
var DEV, MONGODB_PORT, MONGODB_SERVER, app, mongo, server, url;
var DEV, MONGODB_PORT, MONGODB_SERVER, app, log, mongo, server, url;
app = require('express')();
@ -8,21 +8,23 @@
mongo = require('mongodb');
log = require('./log.js');
DEV = process.env.NODE_ENV === 'development';
MONGODB_SERVER = DEV ? 'localhost' : 'db_mongo';
MONGODB_PORT = parseInt(process.env.MONGODB_PORT || 27017, 10);
console.log(" ---> Starting NewsBlur Favicon server...");
log.debug("Starting NewsBlur Favicon server...");
if (!DEV && !process.env.NODE_ENV) {
console.log(" ---> Specify NODE_ENV=<development,production>");
log.debug("Specify NODE_ENV=<development,production>");
return;
} else if (DEV) {
console.log(" ---> Running as development server");
log.debug("Running as development server");
} else {
console.log(" ---> Running as production server");
log.debug("Running as production server");
}
if (DEV) {
@ -33,7 +35,7 @@
mongo.MongoClient.connect(url, (function(_this) {
return function(err, db) {
console.log(" ---> Connected to " + (db != null ? db.serverConfig.s.host : void 0) + ":" + (db != null ? db.serverConfig.s.port : void 0) + " / " + err);
log.debug("Connected to " + (db != null ? db.serverConfig.s.host : void 0) + ":" + (db != null ? db.serverConfig.s.port : void 0) + " / " + err);
return _this.collection = db != null ? db.collection("feed_icons") : void 0;
};
})(this));
@ -43,22 +45,22 @@
var etag, feed_id;
feed_id = parseInt(req.params[0], 10);
etag = req.header('If-None-Match');
console.log((" ---> Feed: " + feed_id + " ") + (etag ? " / " + etag : ""));
log.debug(("Feed: " + feed_id + " ") + (etag ? " / " + etag : ""));
return _this.collection.findOne({
_id: feed_id
}, function(err, docs) {
var body;
if (!err && etag && docs && (docs != null ? docs.color : void 0) === etag) {
console.log((" ---> Cached: " + feed_id + ", etag: " + etag + "/" + (docs != null ? docs.color : void 0) + " ") + (err ? "(err: " + err + ")" : ""));
log.debug(("Cached: " + feed_id + ", etag: " + etag + "/" + (docs != null ? docs.color : void 0) + " ") + (err ? "(err: " + err + ")" : ""));
return res.sendStatus(304);
} else if (!err && docs && docs.data) {
console.log((" ---> Req: " + feed_id + ", etag: " + etag + "/" + (docs != null ? docs.color : void 0) + " ") + (err ? "(err: " + err + ")" : ""));
log.debug(("Req: " + feed_id + ", etag: " + etag + "/" + (docs != null ? docs.color : void 0) + " ") + (err ? "(err: " + err + ")" : ""));
res.header('etag', docs.color);
body = new Buffer(docs.data, 'base64');
res.set("Content-Type", "image/png");
return res.status(200).send(body);
} else {
console.log((" ---> Redirect: " + feed_id + ", etag: " + etag + "/" + (docs != null ? docs.color : void 0) + " ") + (err ? "(err: " + err + ")" : ""));
log.debug(("Redirect: " + feed_id + ", etag: " + etag + "/" + (docs != null ? docs.color : void 0) + " ") + (err ? "(err: " + err + ")" : ""));
if (DEV) {
return res.redirect('/media/img/icons/circular/world.png');
} else {

View file

@ -1,5 +1,10 @@
info = (username, message) ->
timestamp = new Date().toISOString().replace(/T/, ' ').replace(/\..+/, '')
console.log "[#{timestamp}] ---> [#{username}] #{message}"
exports.info = info
debug = (message) ->
timestamp = new Date().toISOString().replace(/T/, ' ').replace(/\..+/, '')
console.log "[#{timestamp}] ---> #{message}"
exports.info = info
exports.debug = debug

View file

@ -1,6 +1,6 @@
// Generated by CoffeeScript 1.8.0
(function() {
var info;
var debug, info;
info = function(username, message) {
var timestamp;
@ -8,6 +8,14 @@
return console.log("[" + timestamp + "] ---> [" + username + "] " + message);
};
debug = function(message) {
var timestamp;
timestamp = new Date().toISOString().replace(/T/, ' ').replace(/\..+/, '');
return console.log("[" + timestamp + "] ---> " + message);
};
exports.info = info;
exports.debug = debug;
}).call(this);

32
node/original_text.coffee Normal file
View file

@ -0,0 +1,32 @@
Mercury = require('@postlight/mercury-parser')
app = require('express')()
server = require('http').Server(app)
log = require './log.js'
DEV = process.env.NODE_ENV == 'development'
log.debug "Starting NewsBlur Original Text Fetcher / Mercury Parser..."
if !DEV and !process.env.NODE_ENV
log.debug "Specify NODE_ENV=<development,production>"
return
else if DEV
log.debug "Running as development server"
else
log.debug "Running as production server"
app.get /\/rss_feeds\/original_text_fetcher\/?/, (req, res) =>
res.setHeader 'Content-Type', 'application/json'
url = req.query.url
if !url
log.debug "Missing url"
res.end JSON.stringify error: "Missing `url` query parameter."
api_key = req.header 'x-api-key'
if not DEV and (!api_key or api_key.indexOf "djtXZrSIEfDa3Dex9FQ9AR" == -1)
log.debug "Mismatched API key: #{url}"
res.end JSON.stringify error: "Invalid API key. You need to set up your own Original Text server."
Mercury.parse(url).then (result) =>
log.debug "Fetched: #{url}"
res.end JSON.stringify result
app.listen 4040

53
node/original_text.js Normal file
View file

@ -0,0 +1,53 @@
// Generated by CoffeeScript 1.8.0
(function() {
var DEV, Mercury, app, log, server;
Mercury = require('@postlight/mercury-parser');
app = require('express')();
server = require('http').Server(app);
log = require('./log.js');
DEV = process.env.NODE_ENV === 'development';
log.debug("Starting NewsBlur Original Text Fetcher / Mercury Parser...");
if (!DEV && !process.env.NODE_ENV) {
log.debug("Specify NODE_ENV=<development,production>");
return;
} else if (DEV) {
log.debug("Running as development server");
} else {
log.debug("Running as production server");
}
app.get(/\/rss_feeds\/original_text_fetcher\/?/, (function(_this) {
return function(req, res) {
var api_key, url;
res.setHeader('Content-Type', 'application/json');
url = req.query.url;
if (!url) {
log.debug("Missing url");
res.end(JSON.stringify({
error: "Missing `url` query parameter."
}));
}
api_key = req.header('x-api-key');
if (!DEV && (!api_key || api_key.indexOf("djtXZrSIEfDa3Dex9FQ9AR" === -1))) {
log.debug("Mismatched API key: " + url);
res.end(JSON.stringify({
error: "Invalid API key. You need to set up your own Original Text server."
}));
}
return Mercury.parse(url).then(function(result) {
log.debug("Fetched: " + url);
return res.end(JSON.stringify(result));
});
};
})(this));
app.listen(4040);
}).call(this);

View file

@ -2,6 +2,7 @@ fs = require 'fs'
redis = require 'redis'
log = require './log.js'
DEV = process.env.NODE_ENV == 'development'
REDIS_SERVER = if process.env.NODE_ENV == 'development' then 'localhost' else 'db_redis_pubsub'
SECURE = !!process.env.NODE_SSL
# client = redis.createClient 6379, REDIS_SERVER
@ -11,6 +12,17 @@ SECURE = !!process.env.NODE_SSL
# rsub = redis.createClient 6379, REDIS_SERVER
# rclient = redis.createClient 6379, REDIS_SERVER
log.debug "Starting NewsBlur unread count server..."
if !DEV and !process.env.NODE_ENV
log.debug "Specify NODE_ENV=<development,production>"
return
else if DEV
log.debug "Running as development server"
else
log.debug "Running as production server"
if SECURE
privateKey = fs.readFileSync('/srv/newsblur/config/certificates/newsblur.com.key').toString()
certificate = fs.readFileSync('/srv/newsblur/config/certificates/newsblur.com.crt').toString()
@ -22,12 +34,14 @@ if SECURE
app = require('https').createServer options
io = require('socket.io')(app, path: "/v2/socket.io")
app.listen options.port
log.debug "Listening securely on port #{options.port}"
else
options =
port: 8888
app = require('http').createServer()
io = require('socket.io')(app, path: "/v2/socket.io")
app.listen options.port
log.debug "Listening on port #{options.port}"
# io.set('transports', ['websocket'])
@ -48,7 +62,7 @@ io.on 'connection', (socket) ->
return
socket.on "error", (err) ->
console.log " ---> Error (socket): #{err}"
log.debug "Error (socket): #{err}"
socket.subscribe?.quit()
socket.subscribe = redis.createClient 6379, REDIS_SERVER
socket.subscribe.on "error", (err) =>
@ -79,4 +93,4 @@ io.on 'connection', (socket) ->
" #{if SECURE then "(SSL)" else "(non-SSL)"}"
io.sockets.on 'error', (err) ->
console.log " ---> Error (sockets): #{err}"
log.debug "Error (sockets): #{err}"

View file

@ -1,6 +1,6 @@
// Generated by CoffeeScript 1.8.0
(function() {
var REDIS_SERVER, SECURE, app, certificate, fs, io, log, options, privateKey, redis;
var DEV, REDIS_SERVER, SECURE, app, certificate, fs, io, log, options, privateKey, redis;
fs = require('fs');
@ -8,10 +8,23 @@
log = require('./log.js');
DEV = process.env.NODE_ENV === 'development';
REDIS_SERVER = process.env.NODE_ENV === 'development' ? 'localhost' : 'db_redis_pubsub';
SECURE = !!process.env.NODE_SSL;
log.debug("Starting NewsBlur unread count server...");
if (!DEV && !process.env.NODE_ENV) {
log.debug("Specify NODE_ENV=<development,production>");
return;
} else if (DEV) {
log.debug("Running as development server");
} else {
log.debug("Running as production server");
}
if (SECURE) {
privateKey = fs.readFileSync('/srv/newsblur/config/certificates/newsblur.com.key').toString();
certificate = fs.readFileSync('/srv/newsblur/config/certificates/newsblur.com.crt').toString();
@ -25,6 +38,7 @@
path: "/v2/socket.io"
});
app.listen(options.port);
log.debug("Listening securely on port " + options.port);
} else {
options = {
port: 8888
@ -34,6 +48,7 @@
path: "/v2/socket.io"
});
app.listen(options.port);
log.debug("Listening on port " + options.port);
}
io.on('connection', function(socket) {
@ -48,7 +63,7 @@
return;
}
socket.on("error", function(err) {
return console.log(" ---> Error (socket): " + err);
return log.debug("Error (socket): " + err);
});
if ((_ref = socket.subscribe) != null) {
_ref.quit();
@ -96,7 +111,7 @@
});
io.sockets.on('error', function(err) {
return console.log(" ---> Error (sockets): " + err);
return log.debug("Error (sockets): " + err);
});
}).call(this);