Adding discover to folders of feeds. Holy moly this is nearly done.

This commit is contained in:
Samuel Clay 2024-07-12 18:16:11 -04:00
parent 92f881e54b
commit 7e68b00d13
9 changed files with 101 additions and 39 deletions

View file

@ -8,13 +8,13 @@
- ../env_vars/base.yml - ../env_vars/base.yml
roles: roles:
- { role: "base", tags: "base" } # - { role: "base", tags: "base" }
- { role: "ufw", tags: "ufw" } # - { role: "ufw", tags: "ufw" }
- { role: "docker", tags: "docker" } # - { role: "docker", tags: "docker" }
- { role: "repo", tags: ["repo", "pull"] } # - { role: "repo", tags: ["repo", "pull"] }
- { role: "dnsmasq", tags: "dnsmasq" } # - { role: "dnsmasq", tags: "dnsmasq" }
- { role: "consul", tags: "consul" } # - { role: "consul", tags: "consul" }
- { role: "consul-client", tags: "consul" } # - { role: "consul-client", tags: "consul" }
- { role: "mongo", tags: "mongo" } - { role: "mongo", tags: "mongo" }
- { role: "node-exporter", tags: ["node-exporter", "metrics"] } - { role: "node-exporter", tags: ["node-exporter", "metrics"] }
- { role: "mongo-exporter", tags: ["mongo-exporter", "metrics"] } - { role: "mongo-exporter", tags: ["mongo-exporter", "metrics"] }

View file

@ -95,6 +95,7 @@
- name: Block for mongo volume on hetzner - name: Block for mongo volume on hetzner
block: block:
- name: Create backup directory - name: Create backup directory
become: yes
file: file:
path: "/srv/newsblur/docker/volumes/mongo/backup" path: "/srv/newsblur/docker/volumes/mongo/backup"
state: directory state: directory

View file

@ -1056,17 +1056,17 @@ class Feed(models.Model):
end=" ", end=" ",
) )
def count_similar_feeds(self, force=False, csv_path=None): def count_similar_feeds(self, feed_ids=None, force=False):
if not force and self.similar_feeds.count(): if not force and self.similar_feeds.count():
logging.debug(f"Found {self.similar_feeds.count()} cached similar feeds for {self}")
return self.similar_feeds.all() return self.similar_feeds.all()
content_vector = SearchFeed.fetch_feed_content_vector(self.pk) if not feed_ids:
if not content_vector: feed_ids = [self.pk]
content_vector = SearchFeed.generate_feed_content_vector(self.pk) if self.pk not in feed_ids:
results = SearchFeed.vector_query(content_vector) feed_ids.append(self.pk)
logging.debug(
f"Found {len(results)} recommendations for feed {self}: {r['_source']['title'] for r in results}" results = self.find_similar_feeds(feed_ids=feed_ids)
)
self.similar_feeds.clear() self.similar_feeds.clear()
for result in results: for result in results:
@ -1076,8 +1076,19 @@ class Feed(models.Model):
except IntegrityError: except IntegrityError:
logging.debug(f" ---> ~FRIntegrity error adding similar feed: {feed_id}") logging.debug(f" ---> ~FRIntegrity error adding similar feed: {feed_id}")
pass pass
return self.similar_feeds.all() return self.similar_feeds.all()
@classmethod
def find_similar_feeds(cls, feed_ids=None):
combined_content_vector = SearchFeed.generate_combined_feed_content_vector(feed_ids)
results = SearchFeed.vector_query(combined_content_vector, feed_ids_to_exclude=feed_ids)
logging.debug(
f"Found {len(results)} recommendations for feeds {feed_ids}: {r['_source']['title'] for r in results}"
)
return results
def _split_favicon_color(self, color=None): def _split_favicon_color(self, color=None):
if not color: if not color:
color = self.favicon_color color = self.favicon_color

View file

@ -26,5 +26,6 @@ urlpatterns = [
url(r"^original_text", views.original_text, name="original-text"), url(r"^original_text", views.original_text, name="original-text"),
url(r"^original_story", views.original_story, name="original-story"), url(r"^original_story", views.original_story, name="original-story"),
url(r"^story_changes", views.story_changes, name="story-changes"), url(r"^story_changes", views.story_changes, name="story-changes"),
url(r"^discover/(?P<feed_id>\d+)/?$", views.discover_feeds, name="discover-feeds"), url(r"^discover/(?P<feed_id>\d+)/?$", views.discover_feeds, name="discover-feed"),
url(r"^discover/feeds/?$", views.discover_feeds, name="discover-feeds"),
] ]

View file

@ -651,13 +651,20 @@ def story_changes(request):
@ajax_login_required @ajax_login_required
@json.json_view @json.json_view
def discover_feeds(request, feed_id=None): def discover_feeds(request, feed_id=None):
feed_ids = request.GET.getlist("feed_id") or request.GET.getlist("feed_id[]") if request.method == "GET" and feed_id:
if not feed_ids: similar_feed_ids = (
feed_ids = Feed.get_by_id(feed_id).count_similar_feeds(force=True).values_list("pk", flat=True) Feed.get_by_id(feed_id).count_similar_feeds(force=True).values_list("pk", flat=True)
feeds = Feed.objects.filter(pk__in=feed_ids) )
elif request.method == "POST":
feed_ids = request.POST.getlist("feed_ids")
similar_feeds = Feed.find_similar_feeds(feed_ids=feed_ids)
similar_feed_ids = [result["_source"]["feed_id"] for result in similar_feeds]
else:
return {"code": -1, "message": "Missing feed_ids.", "discover_feeds": None, "failed": True}
feeds = Feed.objects.filter(pk__in=similar_feed_ids)
discover_feeds = defaultdict(dict) discover_feeds = defaultdict(dict)
for feed in feeds: for feed in feeds:
discover_feeds[feed.pk]["feed"] = feed.canonical(include_favicon=False) discover_feeds[feed.pk]["feed"] = feed.canonical(include_favicon=False)
discover_feeds[feed.pk]["stories"] = feed.get_stories(limit=5) discover_feeds[feed.pk]["stories"] = feed.get_stories(limit=5)
logging.user(request, "~FCDiscovering similar feeds: ~SB%s" % feed_ids) logging.user(request, "~FCDiscovering similar feeds: ~SB%s" % similar_feed_ids)
return {"discover_feeds": discover_feeds} return {"discover_feeds": discover_feeds}

View file

@ -690,21 +690,30 @@ class SearchFeed:
return results["hits"]["hits"] return results["hits"]["hits"]
@classmethod @classmethod
def vector_query(cls, query_vector, max_results=10): def vector_query(cls, query_vector, max_results=10, feed_ids_to_exclude=None):
try: try:
cls.ES().indices.flush(index=cls.index_name()) cls.ES().indices.flush(index=cls.index_name())
except elasticsearch.exceptions.NotFoundError as e: except elasticsearch.exceptions.NotFoundError as e:
logging.debug(f" ***> ~FRNo search server available: {e}") logging.debug(f" ***> ~FRNo search server available: {e}")
return [] return []
must_not_clauses = []
if feed_ids_to_exclude:
must_not_clauses.append({"terms": {"feed_id": feed_ids_to_exclude}})
body = { body = {
"query": { "query": {
"script_score": { "bool": {
"query": {"match_all": {}}, "must": {
"script": { "script_score": {
"source": "cosineSimilarity(params.query_vector, 'content_vector') + 1.0", "query": {"match_all": {}},
"params": {"query_vector": query_vector}, "script": {
"source": "cosineSimilarity(params.query_vector, 'content_vector') + 1.0",
"params": {"query_vector": query_vector},
},
}
}, },
"must_not": must_not_clauses,
} }
}, },
"size": max_results, "size": max_results,
@ -748,6 +757,20 @@ class SearchFeed:
return [] return []
return results["hits"]["hits"][0]["_source"]["content_vector"] return results["hits"]["hits"][0]["_source"]["content_vector"]
@classmethod
def generate_combined_feed_content_vector(cls, feed_ids):
vectors = []
for feed_id in feed_ids:
vector = cls.fetch_feed_content_vector(feed_id)
if not vector:
vector = cls.generate_feed_content_vector(feed_id)
vectors.append(vector)
combined_vector = np.mean(vectors, axis=0)
normalized_combined_vector = combined_vector / np.linalg.norm(combined_vector)
return normalized_combined_vector
@classmethod @classmethod
def generate_feed_content_vector(cls, feed_id): def generate_feed_content_vector(cls, feed_id):
from apps.rss_feeds.models import Feed from apps.rss_feeds.models import Feed

View file

@ -13,16 +13,14 @@ NEWSBLUR.Collections.DiscoverFeeds = Backbone.Collection.extend({
model: NEWSBLUR.Models.DiscoverFeed, model: NEWSBLUR.Models.DiscoverFeed,
url: function () { url: function () {
// if (!this.feed_ids || this.feed_ids.length === 0) { if (this.similar_to_feed_id) {
// throw new Error("feed_ids are required to fetch the data"); var url = '/rss_feeds/discover/' + this.similar_to_feed_id + '/';
// } if (this.feed_ids && this.feed_ids.length > 0) {
url += '?feed_id=' + this.feed_ids.join("&feed_id=");
// Assuming your base endpoint is /api/feed }
var url = '/rss_feeds/discover/' + this.similar_to_feed_id + '/'; } else if (this.similar_to_feed_ids) {
if (this.feed_ids && this.feed_ids.length > 0) { var url = '/rss_feeds/discover/feeds/';
url += '?feed_id=' + this.feed_ids.join("&feed_id=");
} }
return url; return url;
}, },

View file

@ -36,15 +36,21 @@ NEWSBLUR.DiscoverFeedsPopover = NEWSBLUR.ReaderPopover.extend({
fetchData: function () { fetchData: function () {
var self = this; var self = this;
var feed = this.model.get_feed(this.options.feed_id); if (this.options.feed_id) {
this.discover_feeds_model.feed_ids = feed.get("similar_feeds");; var feed = this.model.get_feed(this.options.feed_id);
this.discover_feeds_model.similar_to_feed_id = feed.get("id");; // this.discover_feeds_model.feed_ids = feed.get("similar_feeds"); // Let the server include this
this.discover_feeds_model.similar_to_feed_id = feed.get("id");
} else if (this.options.feed_ids) {
this.discover_feeds_model.similar_to_feed_ids = this.options.feed_ids;
}
NEWSBLUR.ReaderPopover.prototype.render.call(this); NEWSBLUR.ReaderPopover.prototype.render.call(this);
this.showLoading(); this.showLoading();
try { try {
this.discover_feeds_model.fetch({ this.discover_feeds_model.fetch({
type: this.discover_feeds_model.similar_to_feed_ids ? 'POST' : 'GET',
data: { feed_ids: this.discover_feeds_model.similar_to_feed_ids },
success: function () { success: function () {
self.hideLoading(); self.hideLoading();
self.render(); self.render();

View file

@ -19,6 +19,7 @@ NEWSBLUR.Views.Folder = Backbone.View.extend({
"click .NB-feedbar-mark-feed-read-expand": "expand_mark_read", "click .NB-feedbar-mark-feed-read-expand": "expand_mark_read",
"click .NB-feedbar-mark-feed-read-time": "mark_folder_as_read_days", "click .NB-feedbar-mark-feed-read-time": "mark_folder_as_read_days",
"click .NB-feedbar-options": "open_options_popover", "click .NB-feedbar-options": "open_options_popover",
"click .NB-feedbar-discover-container": "open_discover_popover",
"click .NB-story-title-indicator": "show_hidden_story_titles", "click .NB-story-title-indicator": "show_hidden_story_titles",
"mousedown .folder_title": "highlight_feeds", "mousedown .folder_title": "highlight_feeds",
"mouseenter": "add_hover_inverse", "mouseenter": "add_hover_inverse",
@ -175,6 +176,12 @@ NEWSBLUR.Views.Folder = Backbone.View.extend({
<div class="NB-story-title-indicator-count"></div>\ <div class="NB-story-title-indicator-count"></div>\
<span class="NB-story-title-indicator-text">show hidden stories</span>\ <span class="NB-story-title-indicator-text">show hidden stories</span>\
</div>\ </div>\
<% if (show_discover) { %>\
<div class="NB-feedbar-discover-container">\
<div class="NB-feedbar-discover-icon"></div>\
<div class="NB-icon"></div>\
</div>\
<% } %>\
<% } %>\ <% } %>\
<div class="NB-folder-icon">\ <div class="NB-folder-icon">\
<% if (feedbar) { %>\ <% if (feedbar) { %>\
@ -199,6 +206,7 @@ NEWSBLUR.Views.Folder = Backbone.View.extend({
is_collapsed: this.options.collapsed && !this.options.feed_chooser, is_collapsed: this.options.collapsed && !this.options.feed_chooser,
root: this.options.root, root: this.options.root,
feedbar: this.options.feedbar, feedbar: this.options.feedbar,
show_discover: NEWSBLUR.assets.preference("show_discover"),
list_type: this.options.feedbar ? 'div' : 'li' list_type: this.options.feedbar ? 'div' : 'li'
}); });
@ -563,6 +571,13 @@ NEWSBLUR.Views.Folder = Backbone.View.extend({
show_hidden_story_titles: function () { show_hidden_story_titles: function () {
NEWSBLUR.app.story_titles_header.show_hidden_story_titles(); NEWSBLUR.app.story_titles_header.show_hidden_story_titles();
},
open_discover_popover: function () {
NEWSBLUR.DiscoverFeedsPopover.create({
anchor: this.$(".NB-feedbar-discover-container"),
feed_ids: this.model.feed_ids_in_folder()
});
} }
}); });