Adding discover to folders of feeds. Holy moly this is nearly done.

This commit is contained in:
Samuel Clay 2024-07-12 18:16:11 -04:00
parent 92f881e54b
commit 7e68b00d13
9 changed files with 101 additions and 39 deletions

View file

@ -8,13 +8,13 @@
- ../env_vars/base.yml
roles:
- { role: "base", tags: "base" }
- { role: "ufw", tags: "ufw" }
- { role: "docker", tags: "docker" }
- { role: "repo", tags: ["repo", "pull"] }
- { role: "dnsmasq", tags: "dnsmasq" }
- { role: "consul", tags: "consul" }
- { role: "consul-client", tags: "consul" }
# - { role: "base", tags: "base" }
# - { role: "ufw", tags: "ufw" }
# - { role: "docker", tags: "docker" }
# - { role: "repo", tags: ["repo", "pull"] }
# - { role: "dnsmasq", tags: "dnsmasq" }
# - { role: "consul", tags: "consul" }
# - { role: "consul-client", tags: "consul" }
- { role: "mongo", tags: "mongo" }
- { role: "node-exporter", tags: ["node-exporter", "metrics"] }
- { role: "mongo-exporter", tags: ["mongo-exporter", "metrics"] }

View file

@ -95,6 +95,7 @@
- name: Block for mongo volume on hetzner
block:
- name: Create backup directory
become: yes
file:
path: "/srv/newsblur/docker/volumes/mongo/backup"
state: directory

View file

@ -1056,17 +1056,17 @@ class Feed(models.Model):
end=" ",
)
def count_similar_feeds(self, force=False, csv_path=None):
def count_similar_feeds(self, feed_ids=None, force=False):
if not force and self.similar_feeds.count():
logging.debug(f"Found {self.similar_feeds.count()} cached similar feeds for {self}")
return self.similar_feeds.all()
content_vector = SearchFeed.fetch_feed_content_vector(self.pk)
if not content_vector:
content_vector = SearchFeed.generate_feed_content_vector(self.pk)
results = SearchFeed.vector_query(content_vector)
logging.debug(
f"Found {len(results)} recommendations for feed {self}: {r['_source']['title'] for r in results}"
)
if not feed_ids:
feed_ids = [self.pk]
if self.pk not in feed_ids:
feed_ids.append(self.pk)
results = self.find_similar_feeds(feed_ids=feed_ids)
self.similar_feeds.clear()
for result in results:
@ -1076,8 +1076,19 @@ class Feed(models.Model):
except IntegrityError:
logging.debug(f" ---> ~FRIntegrity error adding similar feed: {feed_id}")
pass
return self.similar_feeds.all()
@classmethod
def find_similar_feeds(cls, feed_ids=None):
combined_content_vector = SearchFeed.generate_combined_feed_content_vector(feed_ids)
results = SearchFeed.vector_query(combined_content_vector, feed_ids_to_exclude=feed_ids)
logging.debug(
f"Found {len(results)} recommendations for feeds {feed_ids}: {r['_source']['title'] for r in results}"
)
return results
def _split_favicon_color(self, color=None):
if not color:
color = self.favicon_color

View file

@ -26,5 +26,6 @@ urlpatterns = [
url(r"^original_text", views.original_text, name="original-text"),
url(r"^original_story", views.original_story, name="original-story"),
url(r"^story_changes", views.story_changes, name="story-changes"),
url(r"^discover/(?P<feed_id>\d+)/?$", views.discover_feeds, name="discover-feeds"),
url(r"^discover/(?P<feed_id>\d+)/?$", views.discover_feeds, name="discover-feed"),
url(r"^discover/feeds/?$", views.discover_feeds, name="discover-feeds"),
]

View file

@ -651,13 +651,20 @@ def story_changes(request):
@ajax_login_required
@json.json_view
def discover_feeds(request, feed_id=None):
feed_ids = request.GET.getlist("feed_id") or request.GET.getlist("feed_id[]")
if not feed_ids:
feed_ids = Feed.get_by_id(feed_id).count_similar_feeds(force=True).values_list("pk", flat=True)
feeds = Feed.objects.filter(pk__in=feed_ids)
if request.method == "GET" and feed_id:
similar_feed_ids = (
Feed.get_by_id(feed_id).count_similar_feeds(force=True).values_list("pk", flat=True)
)
elif request.method == "POST":
feed_ids = request.POST.getlist("feed_ids")
similar_feeds = Feed.find_similar_feeds(feed_ids=feed_ids)
similar_feed_ids = [result["_source"]["feed_id"] for result in similar_feeds]
else:
return {"code": -1, "message": "Missing feed_ids.", "discover_feeds": None, "failed": True}
feeds = Feed.objects.filter(pk__in=similar_feed_ids)
discover_feeds = defaultdict(dict)
for feed in feeds:
discover_feeds[feed.pk]["feed"] = feed.canonical(include_favicon=False)
discover_feeds[feed.pk]["stories"] = feed.get_stories(limit=5)
logging.user(request, "~FCDiscovering similar feeds: ~SB%s" % feed_ids)
logging.user(request, "~FCDiscovering similar feeds: ~SB%s" % similar_feed_ids)
return {"discover_feeds": discover_feeds}

View file

@ -690,21 +690,30 @@ class SearchFeed:
return results["hits"]["hits"]
@classmethod
def vector_query(cls, query_vector, max_results=10):
def vector_query(cls, query_vector, max_results=10, feed_ids_to_exclude=None):
try:
cls.ES().indices.flush(index=cls.index_name())
except elasticsearch.exceptions.NotFoundError as e:
logging.debug(f" ***> ~FRNo search server available: {e}")
return []
must_not_clauses = []
if feed_ids_to_exclude:
must_not_clauses.append({"terms": {"feed_id": feed_ids_to_exclude}})
body = {
"query": {
"script_score": {
"query": {"match_all": {}},
"script": {
"source": "cosineSimilarity(params.query_vector, 'content_vector') + 1.0",
"params": {"query_vector": query_vector},
"bool": {
"must": {
"script_score": {
"query": {"match_all": {}},
"script": {
"source": "cosineSimilarity(params.query_vector, 'content_vector') + 1.0",
"params": {"query_vector": query_vector},
},
}
},
"must_not": must_not_clauses,
}
},
"size": max_results,
@ -748,6 +757,20 @@ class SearchFeed:
return []
return results["hits"]["hits"][0]["_source"]["content_vector"]
@classmethod
def generate_combined_feed_content_vector(cls, feed_ids):
vectors = []
for feed_id in feed_ids:
vector = cls.fetch_feed_content_vector(feed_id)
if not vector:
vector = cls.generate_feed_content_vector(feed_id)
vectors.append(vector)
combined_vector = np.mean(vectors, axis=0)
normalized_combined_vector = combined_vector / np.linalg.norm(combined_vector)
return normalized_combined_vector
@classmethod
def generate_feed_content_vector(cls, feed_id):
from apps.rss_feeds.models import Feed

View file

@ -13,16 +13,14 @@ NEWSBLUR.Collections.DiscoverFeeds = Backbone.Collection.extend({
model: NEWSBLUR.Models.DiscoverFeed,
url: function () {
// if (!this.feed_ids || this.feed_ids.length === 0) {
// throw new Error("feed_ids are required to fetch the data");
// }
// Assuming your base endpoint is /api/feed
var url = '/rss_feeds/discover/' + this.similar_to_feed_id + '/';
if (this.feed_ids && this.feed_ids.length > 0) {
url += '?feed_id=' + this.feed_ids.join("&feed_id=");
if (this.similar_to_feed_id) {
var url = '/rss_feeds/discover/' + this.similar_to_feed_id + '/';
if (this.feed_ids && this.feed_ids.length > 0) {
url += '?feed_id=' + this.feed_ids.join("&feed_id=");
}
} else if (this.similar_to_feed_ids) {
var url = '/rss_feeds/discover/feeds/';
}
return url;
},

View file

@ -36,15 +36,21 @@ NEWSBLUR.DiscoverFeedsPopover = NEWSBLUR.ReaderPopover.extend({
fetchData: function () {
var self = this;
var feed = this.model.get_feed(this.options.feed_id);
this.discover_feeds_model.feed_ids = feed.get("similar_feeds");;
this.discover_feeds_model.similar_to_feed_id = feed.get("id");;
if (this.options.feed_id) {
var feed = this.model.get_feed(this.options.feed_id);
// this.discover_feeds_model.feed_ids = feed.get("similar_feeds"); // Let the server include this
this.discover_feeds_model.similar_to_feed_id = feed.get("id");
} else if (this.options.feed_ids) {
this.discover_feeds_model.similar_to_feed_ids = this.options.feed_ids;
}
NEWSBLUR.ReaderPopover.prototype.render.call(this);
this.showLoading();
try {
this.discover_feeds_model.fetch({
type: this.discover_feeds_model.similar_to_feed_ids ? 'POST' : 'GET',
data: { feed_ids: this.discover_feeds_model.similar_to_feed_ids },
success: function () {
self.hideLoading();
self.render();

View file

@ -19,6 +19,7 @@ NEWSBLUR.Views.Folder = Backbone.View.extend({
"click .NB-feedbar-mark-feed-read-expand": "expand_mark_read",
"click .NB-feedbar-mark-feed-read-time": "mark_folder_as_read_days",
"click .NB-feedbar-options": "open_options_popover",
"click .NB-feedbar-discover-container": "open_discover_popover",
"click .NB-story-title-indicator": "show_hidden_story_titles",
"mousedown .folder_title": "highlight_feeds",
"mouseenter": "add_hover_inverse",
@ -175,6 +176,12 @@ NEWSBLUR.Views.Folder = Backbone.View.extend({
<div class="NB-story-title-indicator-count"></div>\
<span class="NB-story-title-indicator-text">show hidden stories</span>\
</div>\
<% if (show_discover) { %>\
<div class="NB-feedbar-discover-container">\
<div class="NB-feedbar-discover-icon"></div>\
<div class="NB-icon"></div>\
</div>\
<% } %>\
<% } %>\
<div class="NB-folder-icon">\
<% if (feedbar) { %>\
@ -199,6 +206,7 @@ NEWSBLUR.Views.Folder = Backbone.View.extend({
is_collapsed: this.options.collapsed && !this.options.feed_chooser,
root: this.options.root,
feedbar: this.options.feedbar,
show_discover: NEWSBLUR.assets.preference("show_discover"),
list_type: this.options.feedbar ? 'div' : 'li'
});
@ -563,6 +571,13 @@ NEWSBLUR.Views.Folder = Backbone.View.extend({
show_hidden_story_titles: function () {
NEWSBLUR.app.story_titles_header.show_hidden_story_titles();
},
open_discover_popover: function () {
NEWSBLUR.DiscoverFeedsPopover.create({
anchor: this.$(".NB-feedbar-discover-container"),
feed_ids: this.model.feed_ids_in_folder()
});
}
});