mirror of
https://github.com/samuelclay/NewsBlur.git
synced 2025-08-05 16:58:59 +00:00
Running scikit-surprise
This commit is contained in:
parent
663b5f979a
commit
2feaf087e7
7 changed files with 40 additions and 16 deletions
13
.vscode/settings.json
vendored
13
.vscode/settings.json
vendored
|
@ -1,7 +1,18 @@
|
|||
{
|
||||
"black-formatter.args": [
|
||||
"--line-length 110"
|
||||
],
|
||||
"isort.args": [
|
||||
"--profile",
|
||||
"black"
|
||||
],
|
||||
"editor.formatOnSave": true,
|
||||
"editor.codeActionsOnSave": {
|
||||
"source.organizeImports": true
|
||||
},
|
||||
"python.linting.enabled": true,
|
||||
"python.linting.pylintEnabled": false,
|
||||
"python.linting.flake8Enabled": true,
|
||||
"python.linting.flake8Enabled": false,
|
||||
"python.linting.pylamaEnabled": false,
|
||||
"python.linting.flake8Args": [
|
||||
"--ignore=E501,W293,W503,W504,E302,E722,E226,E221,E402,E401"
|
||||
|
|
6
Makefile
6
Makefile
|
@ -69,6 +69,10 @@ jekyll:
|
|||
cd blog && bundle exec jekyll serve
|
||||
jekyll_drafts:
|
||||
cd blog && bundle exec jekyll serve --drafts
|
||||
lint:
|
||||
docker exec -it newsblur_web isort --profile black .
|
||||
docker exec -it newsblur_web black --line-length 110 .
|
||||
docker exec -it newsblur_web flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics --exclude=venv
|
||||
|
||||
# runs tests
|
||||
test:
|
||||
|
@ -198,7 +202,7 @@ mongorestore:
|
|||
cp -fr docker/volumes/mongodump docker/volumes/db_mongo/
|
||||
docker exec -it db_mongo mongorestore --port 29019 -d newsblur /data/db/mongodump/newsblur
|
||||
pgrestore:
|
||||
docker exec -it db_postgres bash -c "psql -U newsblur -c 'CREATE DATABASE newsblur_prod;'; pg_restore -U newsblur --role=newsblur --dbname=newsblur_prod /var/lib/postgresql/data/backup_postgresql_2023-10-09-04-00.sql.sql"
|
||||
docker exec -it db_postgres bash -c "psql -U newsblur -c 'CREATE DATABASE newsblur_prod;'; pg_restore -U newsblur --role=newsblur --dbname=newsblur_prod /var/lib/postgresql/data/backup_postgresql_2023-10-10-04-00.sql.sql"
|
||||
index_feeds:
|
||||
docker exec -it newsblur_web ./manage.py index_feeds
|
||||
index_stories:
|
||||
|
|
|
@ -1,4 +1,6 @@
|
|||
from django.conf import settings
|
||||
from django.core.management.base import BaseCommand
|
||||
|
||||
from apps.recommendations.models import CollaborativelyFilteredRecommendation
|
||||
|
||||
|
||||
|
@ -15,7 +17,7 @@ class Command(BaseCommand):
|
|||
|
||||
def handle(self, *args, **options):
|
||||
# Store user feed data to file
|
||||
file_name = "user_feed_data.csv"
|
||||
file_name = f"{settings.SURPRISE_DATA_FOLDER}/user_feed_data_2.csv"
|
||||
CollaborativelyFilteredRecommendation.store_user_feed_data_to_file(file_name)
|
||||
|
||||
# Load data and get the trained model
|
||||
|
|
|
@ -1,15 +1,17 @@
|
|||
import tempfile
|
||||
import mongoengine as mongo
|
||||
from surprise import SVD
|
||||
from surprise.model_selection import train_test_split
|
||||
from surprise import Reader, Dataset
|
||||
from django.db import models
|
||||
from django.contrib.auth.models import User
|
||||
from apps.rss_feeds.models import Feed
|
||||
from apps.reader.models import UserSubscription, UserSubscriptionFolders
|
||||
from utils import json_functions as json
|
||||
from collections import defaultdict
|
||||
|
||||
import mongoengine as mongo
|
||||
from django.contrib.auth.models import User
|
||||
from django.core.paginator import Paginator
|
||||
from django.db import models
|
||||
from surprise import SVD, Dataset, Reader
|
||||
from surprise.model_selection import train_test_split
|
||||
|
||||
from apps.reader.models import UserSubscription, UserSubscriptionFolders
|
||||
from apps.rss_feeds.models import Feed
|
||||
from utils import json_functions as json
|
||||
|
||||
|
||||
class RecommendedFeed(models.Model):
|
||||
feed = models.ForeignKey(Feed, related_name="recommendations", on_delete=models.CASCADE)
|
||||
|
@ -80,15 +82,16 @@ class MFeedFolder(mongo.Document):
|
|||
|
||||
class CollaborativelyFilteredRecommendation(models.Model):
|
||||
@classmethod
|
||||
def store_user_feed_data_to_file(cls, file_name="user_feed_data.csv"):
|
||||
def store_user_feed_data_to_file(cls, file_name):
|
||||
temp_file = open(file_name, "w+")
|
||||
users = User.objects.all()
|
||||
users = User.objects.all().order_by("pk")
|
||||
paginator = Paginator(users, 1000)
|
||||
for page_num in paginator.page_range:
|
||||
users = paginator.page(page_num)
|
||||
for user in users:
|
||||
# Only include feeds with num_subscribers >= 5
|
||||
subs = UserSubscription.objects.filter(user=user, feed__num_subscribers__gte=5)
|
||||
# print(f"User {user} has {subs.count()} feeds")
|
||||
for sub in subs:
|
||||
temp_file.write(f"{user.id},{sub.feed_id},1\n")
|
||||
print(f"Page {page_num} of {paginator.num_pages} saved to {file_name}")
|
||||
|
|
|
@ -103,7 +103,7 @@ pyzmq==22.0.3
|
|||
raven==6.10.0
|
||||
# readability-lxml==0.8.1.1 # Was vendorized due to noscript # Vendorized again due to 0.8.1.1 not out yet
|
||||
redis>=4,<5
|
||||
requests==2.25.0
|
||||
requests>=2.25.0,<3
|
||||
requests-oauthlib==1.3.0
|
||||
scipy==1.5.4
|
||||
sentry-sdk>=1,<2
|
||||
|
@ -116,7 +116,7 @@ sqlparse==0.4.1
|
|||
stevedore==3.3.0
|
||||
stripe==2.55.1
|
||||
subdomains==3.0.1
|
||||
surprise==1.1.3
|
||||
scikit-surprise~=1.1.3
|
||||
text-unidecode==1.3
|
||||
tiktoken~=0.4.0
|
||||
toml==0.10.2
|
||||
|
|
|
@ -11,6 +11,7 @@ services:
|
|||
user: "${CURRENT_UID}:${CURRENT_GID}"
|
||||
environment:
|
||||
- DOCKERBUILD=True
|
||||
- SURPRISE_DATA_FOLDER=/srv/newsblur/docker/volumes/suprise
|
||||
- RUNWITHMAKEBUILD=${RUNWITHMAKEBUILD?Use the `make` command instead of docker CLI}
|
||||
stdin_open: true
|
||||
tty: true
|
||||
|
@ -154,6 +155,7 @@ services:
|
|||
- ${PWD}:/srv/newsblur
|
||||
environment:
|
||||
- DOCKERBUILD=True
|
||||
- SURPRISE_DATA_FOLDER=/srv/newsblur/docker/volumes/suprise
|
||||
|
||||
haproxy:
|
||||
container_name: haproxy
|
||||
|
|
|
@ -811,6 +811,8 @@ REDIS_PUBSUB_POOL = redis.ConnectionPool(host=REDIS_PUBSUB['host'], por
|
|||
# celeryapp.autodiscover_tasks(INSTALLED_APPS)
|
||||
accept_content = ['pickle', 'json', 'msgpack', 'yaml']
|
||||
|
||||
SURPRISE_DATA_FOLDER = os.getenv("SURPRISE_DATA_FOLDER", "/srv/newsblur/docker/volumes/surprise")
|
||||
|
||||
# ==========
|
||||
# = Assets =
|
||||
# ==========
|
||||
|
|
Loading…
Add table
Reference in a new issue