diff --git a/.vscode/settings.json b/.vscode/settings.json index 7a6739045..9dbb87c00 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -1,7 +1,18 @@ { + "black-formatter.args": [ + "--line-length 110" + ], + "isort.args": [ + "--profile", + "black" + ], + "editor.formatOnSave": true, + "editor.codeActionsOnSave": { + "source.organizeImports": true + }, "python.linting.enabled": true, "python.linting.pylintEnabled": false, - "python.linting.flake8Enabled": true, + "python.linting.flake8Enabled": false, "python.linting.pylamaEnabled": false, "python.linting.flake8Args": [ "--ignore=E501,W293,W503,W504,E302,E722,E226,E221,E402,E401" diff --git a/Makefile b/Makefile index b9d65130b..1967b5873 100644 --- a/Makefile +++ b/Makefile @@ -69,6 +69,10 @@ jekyll: cd blog && bundle exec jekyll serve jekyll_drafts: cd blog && bundle exec jekyll serve --drafts +lint: + docker exec -it newsblur_web isort --profile black . + docker exec -it newsblur_web black --line-length 110 . + docker exec -it newsblur_web flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics --exclude=venv # runs tests test: @@ -198,7 +202,7 @@ mongorestore: cp -fr docker/volumes/mongodump docker/volumes/db_mongo/ docker exec -it db_mongo mongorestore --port 29019 -d newsblur /data/db/mongodump/newsblur pgrestore: - docker exec -it db_postgres bash -c "psql -U newsblur -c 'CREATE DATABASE newsblur_prod;'; pg_restore -U newsblur --role=newsblur --dbname=newsblur_prod /var/lib/postgresql/data/backup_postgresql_2023-10-09-04-00.sql.sql" + docker exec -it db_postgres bash -c "psql -U newsblur -c 'CREATE DATABASE newsblur_prod;'; pg_restore -U newsblur --role=newsblur --dbname=newsblur_prod /var/lib/postgresql/data/backup_postgresql_2023-10-10-04-00.sql.sql" index_feeds: docker exec -it newsblur_web ./manage.py index_feeds index_stories: diff --git a/apps/recommendations/management/commands/train_collab.py b/apps/recommendations/management/commands/train_collab.py index 9feedb674..a5a738273 100644 --- a/apps/recommendations/management/commands/train_collab.py +++ b/apps/recommendations/management/commands/train_collab.py @@ -1,4 +1,6 @@ +from django.conf import settings from django.core.management.base import BaseCommand + from apps.recommendations.models import CollaborativelyFilteredRecommendation @@ -15,7 +17,7 @@ class Command(BaseCommand): def handle(self, *args, **options): # Store user feed data to file - file_name = "user_feed_data.csv" + file_name = f"{settings.SURPRISE_DATA_FOLDER}/user_feed_data_2.csv" CollaborativelyFilteredRecommendation.store_user_feed_data_to_file(file_name) # Load data and get the trained model diff --git a/apps/recommendations/models.py b/apps/recommendations/models.py index 3ae5d2a32..8291a63c1 100644 --- a/apps/recommendations/models.py +++ b/apps/recommendations/models.py @@ -1,15 +1,17 @@ import tempfile -import mongoengine as mongo -from surprise import SVD -from surprise.model_selection import train_test_split -from surprise import Reader, Dataset -from django.db import models -from django.contrib.auth.models import User -from apps.rss_feeds.models import Feed -from apps.reader.models import UserSubscription, UserSubscriptionFolders -from utils import json_functions as json from collections import defaultdict +import mongoengine as mongo +from django.contrib.auth.models import User +from django.core.paginator import Paginator +from django.db import models +from surprise import SVD, Dataset, Reader +from surprise.model_selection import train_test_split + +from apps.reader.models import UserSubscription, UserSubscriptionFolders +from apps.rss_feeds.models import Feed +from utils import json_functions as json + class RecommendedFeed(models.Model): feed = models.ForeignKey(Feed, related_name="recommendations", on_delete=models.CASCADE) @@ -80,15 +82,16 @@ class MFeedFolder(mongo.Document): class CollaborativelyFilteredRecommendation(models.Model): @classmethod - def store_user_feed_data_to_file(cls, file_name="user_feed_data.csv"): + def store_user_feed_data_to_file(cls, file_name): temp_file = open(file_name, "w+") - users = User.objects.all() + users = User.objects.all().order_by("pk") paginator = Paginator(users, 1000) for page_num in paginator.page_range: users = paginator.page(page_num) for user in users: # Only include feeds with num_subscribers >= 5 subs = UserSubscription.objects.filter(user=user, feed__num_subscribers__gte=5) + # print(f"User {user} has {subs.count()} feeds") for sub in subs: temp_file.write(f"{user.id},{sub.feed_id},1\n") print(f"Page {page_num} of {paginator.num_pages} saved to {file_name}") diff --git a/config/requirements.txt b/config/requirements.txt index 64b91f7d8..acf44282d 100644 --- a/config/requirements.txt +++ b/config/requirements.txt @@ -103,7 +103,7 @@ pyzmq==22.0.3 raven==6.10.0 # readability-lxml==0.8.1.1 # Was vendorized due to noscript # Vendorized again due to 0.8.1.1 not out yet redis>=4,<5 -requests==2.25.0 +requests>=2.25.0,<3 requests-oauthlib==1.3.0 scipy==1.5.4 sentry-sdk>=1,<2 @@ -116,7 +116,7 @@ sqlparse==0.4.1 stevedore==3.3.0 stripe==2.55.1 subdomains==3.0.1 -surprise==1.1.3 +scikit-surprise~=1.1.3 text-unidecode==1.3 tiktoken~=0.4.0 toml==0.10.2 diff --git a/docker-compose.yml b/docker-compose.yml index 0482e9835..ce34d2938 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -11,6 +11,7 @@ services: user: "${CURRENT_UID}:${CURRENT_GID}" environment: - DOCKERBUILD=True + - SURPRISE_DATA_FOLDER=/srv/newsblur/docker/volumes/suprise - RUNWITHMAKEBUILD=${RUNWITHMAKEBUILD?Use the `make` command instead of docker CLI} stdin_open: true tty: true @@ -154,6 +155,7 @@ services: - ${PWD}:/srv/newsblur environment: - DOCKERBUILD=True + - SURPRISE_DATA_FOLDER=/srv/newsblur/docker/volumes/suprise haproxy: container_name: haproxy diff --git a/newsblur_web/settings.py b/newsblur_web/settings.py index f44d7e76a..8d4a62587 100644 --- a/newsblur_web/settings.py +++ b/newsblur_web/settings.py @@ -811,6 +811,8 @@ REDIS_PUBSUB_POOL = redis.ConnectionPool(host=REDIS_PUBSUB['host'], por # celeryapp.autodiscover_tasks(INSTALLED_APPS) accept_content = ['pickle', 'json', 'msgpack', 'yaml'] +SURPRISE_DATA_FOLDER = os.getenv("SURPRISE_DATA_FOLDER", "/srv/newsblur/docker/volumes/surprise") + # ========== # = Assets = # ==========