mirror of
https://github.com/samuelclay/NewsBlur.git
synced 2025-08-05 16:58:59 +00:00
Running scikit-surprise
This commit is contained in:
parent
663b5f979a
commit
2feaf087e7
7 changed files with 40 additions and 16 deletions
13
.vscode/settings.json
vendored
13
.vscode/settings.json
vendored
|
@ -1,7 +1,18 @@
|
||||||
{
|
{
|
||||||
|
"black-formatter.args": [
|
||||||
|
"--line-length 110"
|
||||||
|
],
|
||||||
|
"isort.args": [
|
||||||
|
"--profile",
|
||||||
|
"black"
|
||||||
|
],
|
||||||
|
"editor.formatOnSave": true,
|
||||||
|
"editor.codeActionsOnSave": {
|
||||||
|
"source.organizeImports": true
|
||||||
|
},
|
||||||
"python.linting.enabled": true,
|
"python.linting.enabled": true,
|
||||||
"python.linting.pylintEnabled": false,
|
"python.linting.pylintEnabled": false,
|
||||||
"python.linting.flake8Enabled": true,
|
"python.linting.flake8Enabled": false,
|
||||||
"python.linting.pylamaEnabled": false,
|
"python.linting.pylamaEnabled": false,
|
||||||
"python.linting.flake8Args": [
|
"python.linting.flake8Args": [
|
||||||
"--ignore=E501,W293,W503,W504,E302,E722,E226,E221,E402,E401"
|
"--ignore=E501,W293,W503,W504,E302,E722,E226,E221,E402,E401"
|
||||||
|
|
6
Makefile
6
Makefile
|
@ -69,6 +69,10 @@ jekyll:
|
||||||
cd blog && bundle exec jekyll serve
|
cd blog && bundle exec jekyll serve
|
||||||
jekyll_drafts:
|
jekyll_drafts:
|
||||||
cd blog && bundle exec jekyll serve --drafts
|
cd blog && bundle exec jekyll serve --drafts
|
||||||
|
lint:
|
||||||
|
docker exec -it newsblur_web isort --profile black .
|
||||||
|
docker exec -it newsblur_web black --line-length 110 .
|
||||||
|
docker exec -it newsblur_web flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics --exclude=venv
|
||||||
|
|
||||||
# runs tests
|
# runs tests
|
||||||
test:
|
test:
|
||||||
|
@ -198,7 +202,7 @@ mongorestore:
|
||||||
cp -fr docker/volumes/mongodump docker/volumes/db_mongo/
|
cp -fr docker/volumes/mongodump docker/volumes/db_mongo/
|
||||||
docker exec -it db_mongo mongorestore --port 29019 -d newsblur /data/db/mongodump/newsblur
|
docker exec -it db_mongo mongorestore --port 29019 -d newsblur /data/db/mongodump/newsblur
|
||||||
pgrestore:
|
pgrestore:
|
||||||
docker exec -it db_postgres bash -c "psql -U newsblur -c 'CREATE DATABASE newsblur_prod;'; pg_restore -U newsblur --role=newsblur --dbname=newsblur_prod /var/lib/postgresql/data/backup_postgresql_2023-10-09-04-00.sql.sql"
|
docker exec -it db_postgres bash -c "psql -U newsblur -c 'CREATE DATABASE newsblur_prod;'; pg_restore -U newsblur --role=newsblur --dbname=newsblur_prod /var/lib/postgresql/data/backup_postgresql_2023-10-10-04-00.sql.sql"
|
||||||
index_feeds:
|
index_feeds:
|
||||||
docker exec -it newsblur_web ./manage.py index_feeds
|
docker exec -it newsblur_web ./manage.py index_feeds
|
||||||
index_stories:
|
index_stories:
|
||||||
|
|
|
@ -1,4 +1,6 @@
|
||||||
|
from django.conf import settings
|
||||||
from django.core.management.base import BaseCommand
|
from django.core.management.base import BaseCommand
|
||||||
|
|
||||||
from apps.recommendations.models import CollaborativelyFilteredRecommendation
|
from apps.recommendations.models import CollaborativelyFilteredRecommendation
|
||||||
|
|
||||||
|
|
||||||
|
@ -15,7 +17,7 @@ class Command(BaseCommand):
|
||||||
|
|
||||||
def handle(self, *args, **options):
|
def handle(self, *args, **options):
|
||||||
# Store user feed data to file
|
# Store user feed data to file
|
||||||
file_name = "user_feed_data.csv"
|
file_name = f"{settings.SURPRISE_DATA_FOLDER}/user_feed_data_2.csv"
|
||||||
CollaborativelyFilteredRecommendation.store_user_feed_data_to_file(file_name)
|
CollaborativelyFilteredRecommendation.store_user_feed_data_to_file(file_name)
|
||||||
|
|
||||||
# Load data and get the trained model
|
# Load data and get the trained model
|
||||||
|
|
|
@ -1,15 +1,17 @@
|
||||||
import tempfile
|
import tempfile
|
||||||
import mongoengine as mongo
|
|
||||||
from surprise import SVD
|
|
||||||
from surprise.model_selection import train_test_split
|
|
||||||
from surprise import Reader, Dataset
|
|
||||||
from django.db import models
|
|
||||||
from django.contrib.auth.models import User
|
|
||||||
from apps.rss_feeds.models import Feed
|
|
||||||
from apps.reader.models import UserSubscription, UserSubscriptionFolders
|
|
||||||
from utils import json_functions as json
|
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
|
|
||||||
|
import mongoengine as mongo
|
||||||
|
from django.contrib.auth.models import User
|
||||||
|
from django.core.paginator import Paginator
|
||||||
|
from django.db import models
|
||||||
|
from surprise import SVD, Dataset, Reader
|
||||||
|
from surprise.model_selection import train_test_split
|
||||||
|
|
||||||
|
from apps.reader.models import UserSubscription, UserSubscriptionFolders
|
||||||
|
from apps.rss_feeds.models import Feed
|
||||||
|
from utils import json_functions as json
|
||||||
|
|
||||||
|
|
||||||
class RecommendedFeed(models.Model):
|
class RecommendedFeed(models.Model):
|
||||||
feed = models.ForeignKey(Feed, related_name="recommendations", on_delete=models.CASCADE)
|
feed = models.ForeignKey(Feed, related_name="recommendations", on_delete=models.CASCADE)
|
||||||
|
@ -80,15 +82,16 @@ class MFeedFolder(mongo.Document):
|
||||||
|
|
||||||
class CollaborativelyFilteredRecommendation(models.Model):
|
class CollaborativelyFilteredRecommendation(models.Model):
|
||||||
@classmethod
|
@classmethod
|
||||||
def store_user_feed_data_to_file(cls, file_name="user_feed_data.csv"):
|
def store_user_feed_data_to_file(cls, file_name):
|
||||||
temp_file = open(file_name, "w+")
|
temp_file = open(file_name, "w+")
|
||||||
users = User.objects.all()
|
users = User.objects.all().order_by("pk")
|
||||||
paginator = Paginator(users, 1000)
|
paginator = Paginator(users, 1000)
|
||||||
for page_num in paginator.page_range:
|
for page_num in paginator.page_range:
|
||||||
users = paginator.page(page_num)
|
users = paginator.page(page_num)
|
||||||
for user in users:
|
for user in users:
|
||||||
# Only include feeds with num_subscribers >= 5
|
# Only include feeds with num_subscribers >= 5
|
||||||
subs = UserSubscription.objects.filter(user=user, feed__num_subscribers__gte=5)
|
subs = UserSubscription.objects.filter(user=user, feed__num_subscribers__gte=5)
|
||||||
|
# print(f"User {user} has {subs.count()} feeds")
|
||||||
for sub in subs:
|
for sub in subs:
|
||||||
temp_file.write(f"{user.id},{sub.feed_id},1\n")
|
temp_file.write(f"{user.id},{sub.feed_id},1\n")
|
||||||
print(f"Page {page_num} of {paginator.num_pages} saved to {file_name}")
|
print(f"Page {page_num} of {paginator.num_pages} saved to {file_name}")
|
||||||
|
|
|
@ -103,7 +103,7 @@ pyzmq==22.0.3
|
||||||
raven==6.10.0
|
raven==6.10.0
|
||||||
# readability-lxml==0.8.1.1 # Was vendorized due to noscript # Vendorized again due to 0.8.1.1 not out yet
|
# readability-lxml==0.8.1.1 # Was vendorized due to noscript # Vendorized again due to 0.8.1.1 not out yet
|
||||||
redis>=4,<5
|
redis>=4,<5
|
||||||
requests==2.25.0
|
requests>=2.25.0,<3
|
||||||
requests-oauthlib==1.3.0
|
requests-oauthlib==1.3.0
|
||||||
scipy==1.5.4
|
scipy==1.5.4
|
||||||
sentry-sdk>=1,<2
|
sentry-sdk>=1,<2
|
||||||
|
@ -116,7 +116,7 @@ sqlparse==0.4.1
|
||||||
stevedore==3.3.0
|
stevedore==3.3.0
|
||||||
stripe==2.55.1
|
stripe==2.55.1
|
||||||
subdomains==3.0.1
|
subdomains==3.0.1
|
||||||
surprise==1.1.3
|
scikit-surprise~=1.1.3
|
||||||
text-unidecode==1.3
|
text-unidecode==1.3
|
||||||
tiktoken~=0.4.0
|
tiktoken~=0.4.0
|
||||||
toml==0.10.2
|
toml==0.10.2
|
||||||
|
|
|
@ -11,6 +11,7 @@ services:
|
||||||
user: "${CURRENT_UID}:${CURRENT_GID}"
|
user: "${CURRENT_UID}:${CURRENT_GID}"
|
||||||
environment:
|
environment:
|
||||||
- DOCKERBUILD=True
|
- DOCKERBUILD=True
|
||||||
|
- SURPRISE_DATA_FOLDER=/srv/newsblur/docker/volumes/suprise
|
||||||
- RUNWITHMAKEBUILD=${RUNWITHMAKEBUILD?Use the `make` command instead of docker CLI}
|
- RUNWITHMAKEBUILD=${RUNWITHMAKEBUILD?Use the `make` command instead of docker CLI}
|
||||||
stdin_open: true
|
stdin_open: true
|
||||||
tty: true
|
tty: true
|
||||||
|
@ -154,6 +155,7 @@ services:
|
||||||
- ${PWD}:/srv/newsblur
|
- ${PWD}:/srv/newsblur
|
||||||
environment:
|
environment:
|
||||||
- DOCKERBUILD=True
|
- DOCKERBUILD=True
|
||||||
|
- SURPRISE_DATA_FOLDER=/srv/newsblur/docker/volumes/suprise
|
||||||
|
|
||||||
haproxy:
|
haproxy:
|
||||||
container_name: haproxy
|
container_name: haproxy
|
||||||
|
|
|
@ -811,6 +811,8 @@ REDIS_PUBSUB_POOL = redis.ConnectionPool(host=REDIS_PUBSUB['host'], por
|
||||||
# celeryapp.autodiscover_tasks(INSTALLED_APPS)
|
# celeryapp.autodiscover_tasks(INSTALLED_APPS)
|
||||||
accept_content = ['pickle', 'json', 'msgpack', 'yaml']
|
accept_content = ['pickle', 'json', 'msgpack', 'yaml']
|
||||||
|
|
||||||
|
SURPRISE_DATA_FOLDER = os.getenv("SURPRISE_DATA_FOLDER", "/srv/newsblur/docker/volumes/surprise")
|
||||||
|
|
||||||
# ==========
|
# ==========
|
||||||
# = Assets =
|
# = Assets =
|
||||||
# ==========
|
# ==========
|
||||||
|
|
Loading…
Add table
Reference in a new issue