diff --git a/.vscode/settings.json b/.vscode/settings.json index 65f56c0d5..6d324acf3 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -1,23 +1,16 @@ { - "black-formatter.args": [ - "--line-length 110" - ], "isort.args": [ "--profile", "black" ], - "editor.formatOnSave": true, - "editor.codeActionsOnSave": { - "source.organizeImports": "explicit" - }, - "python.linting.enabled": true, - "python.linting.pylintEnabled": false, - "python.linting.flake8Enabled": true, - "python.linting.pylamaEnabled": false, - "python.linting.flake8Args": [ - "--ignore=E501,W293,W503,W504,E302,E722,E226,E221,E402,E401" - ], - "python.pythonPath": "~/.virtualenvs/newsblur3/bin/python", + // "python.linting.enabled": true, + // "python.linting.pylintEnabled": false, + // "python.linting.flake8Enabled": true, + // "python.linting.pylamaEnabled": false, + // "python.linting.flake8Args": [ + // "--ignore=E501,W293,W503,W504,E302,E722,E226,E221,E402,E401" + // ], + // "python.pythonPath": "~/.virtualenvs/newsblur/bin/python", "editor.bracketPairColorization.enabled": true, "editor.guides.bracketPairs": "active", "git.ignoreLimitWarning": true, @@ -38,15 +31,12 @@ "docker/volumes": true, "requirements.txt": true, // It's just a symlink to config/requirements.txt, which has git history }, - "python.formatting.blackArgs": [ - "--line-length=110", - "--skip-string-normalization" - ], + // "python.formatting.blackArgs": [ + // "--line-length=110", + // "--skip-string-normalization" + // ], "files.associations": { "*.yml": "ansible" }, - "nrf-connect.toolchain.path": "${nrf-connect.toolchain:1.9.1}", - "C_Cpp.default.configurationProvider": "nrf-connect", - "editor.formatOnSave": false, "ansible.python.interpreterPath": "/opt/homebrew/bin/python3", } diff --git a/config/requirements.txt b/config/requirements.txt index 769a0f810..d2a3e1fe7 100644 --- a/config/requirements.txt +++ b/config/requirements.txt @@ -8,11 +8,11 @@ billiard==3.6.4.0 bleach==3.2.1 boto3==1.18.12 botocore==1.21.12 +black~=23.1.0 celery==4.4.7 certifi==2020.12.5 cffi==1.14.5 chardet==3.0.4 -click==7.1.2 ConfigArgParse==1.4 cryptography==3.4.7 cssutils==1.0.2 @@ -40,13 +40,10 @@ factory-boy==3.2.0 Faker==8.8.2 feedparser>=6,<7 filelock==3.0.12 -Flask==1.1.2 +Flask==3.0.2 Flask-BasicAuth==0.2.0 future==0.18.2 -gevent==21.1.2 -geventhttpclient==1.4.4 -greenlet==1.1.0 -gunicorn==20.1.0 +gunicorn==21.2.0 h2==2.6.2 hiredis==1.1.0 hpack==3.0.0 @@ -57,24 +54,21 @@ idna==2.10 image==1.5.33 iniconfig==1.1.1 isodate==0.6.0 -itsdangerous==1.1.0 -Jinja2==2.11.3 +Jinja2==3.1.3 jmespath==0.10.0 jsonpickle==2.0.0 kombu==4.6.11 locust==1.4.3 -lxml==4.6.2 -MarkupSafe==1.1.1 +lxml==5.1.0 mock==4.0.2 mongoengine==0.21.0 msgpack==1.0.2 ndg-httpsclient==0.5.1 nose==1.3.7 nose-exclude==0.5.0 -numpy==1.19.4 +numpy==1.26.4 oauth2==1.9.0.post1 oauthlib==3.1.0 -packaging==20.9 paypalrestsdk==1.13.1 pbr==5.6.0 Pillow==8.0.1 @@ -104,7 +98,7 @@ raven==6.10.0 redis>=4,<5 requests==2.25.0 requests-oauthlib==1.3.0 -scipy==1.5.4 +scipy==1.12.0 sentry-sdk>=1,<2 sgmllib3k==1.0.0 simplejson==3.17.2 @@ -125,7 +119,6 @@ virtualenv==20.4.6 virtualenv-clone==0.5.4 virtualenvwrapper==4.8.4 webencodings==0.5.1 -Werkzeug==1.0.1 XlsxWriter==1.3.7 zope.event==4.5.0 zope.interface==5.4.0 diff --git a/utils/feed_fetcher.py b/utils/feed_fetcher.py index b4cca52e9..c6b804418 100644 --- a/utils/feed_fetcher.py +++ b/utils/feed_fetcher.py @@ -19,9 +19,7 @@ import random import re import xml.sax -import dateutil.parser import feedparser -import isodate import pymongo import redis import requests @@ -58,6 +56,7 @@ from utils.feed_functions import TimeoutError, timelimit from utils.json_fetcher import JSONFetcher from utils.story_functions import linkify, pre_process_story, strip_tags from utils.twitter_fetcher import TwitterFetcher +from utils.youtube_fetcher import YoutubeFetcher # from utils.feed_functions import mail_feed_error_to_admin @@ -131,10 +130,7 @@ class FetchFeed: return FEED_OK, self.fpf if 'youtube.com' in address: - try: - youtube_feed = self.fetch_youtube(address) - except (requests.adapters.ConnectionError): - youtube_feed = None + youtube_feed = self.fetch_youtube() if not youtube_feed: logging.debug( ' ***> [%-30s] ~FRYouTube fetch failed: %s.' % (self.feed.log_title[:30], address) @@ -313,162 +309,9 @@ class FetchFeed: json_fetcher = JSONFetcher(self.feed, self.options) return json_fetcher.fetch(address, headers) - def fetch_youtube(self, address): - username = None - channel_id = None - list_id = None - - if 'gdata.youtube.com' in address: - try: - username_groups = re.search(r'gdata.youtube.com/feeds/\w+/users/(\w+)/', address) - if not username_groups: - return - username = username_groups.group(1) - except IndexError: - return - elif 'youtube.com/@' in address: - try: - username = address.split('youtube.com/@')[1] - except IndexError: - return - elif 'youtube.com/feeds/videos.xml?user=' in address: - try: - username = urllib.parse.parse_qs(urllib.parse.urlparse(address).query)['user'][0] - except IndexError: - return - elif 'youtube.com/feeds/videos.xml?channel_id=' in address: - try: - channel_id = urllib.parse.parse_qs(urllib.parse.urlparse(address).query)['channel_id'][0] - except (IndexError, KeyError): - return - elif 'youtube.com/playlist' in address: - try: - list_id = urllib.parse.parse_qs(urllib.parse.urlparse(address).query)['list'][0] - except IndexError: - return - elif 'youtube.com/feeds/videos.xml?playlist_id' in address: - try: - list_id = urllib.parse.parse_qs(urllib.parse.urlparse(address).query)['playlist_id'][0] - except IndexError: - return - - if channel_id: - video_ids_xml = requests.get( - "https://www.youtube.com/feeds/videos.xml?channel_id=%s" % channel_id - ) - channel_json = requests.get( - "https://www.googleapis.com/youtube/v3/channels?part=snippet&id=%s&key=%s" - % (channel_id, settings.YOUTUBE_API_KEY) - ) - channel = json.decode(channel_json.content) - try: - username = channel['items'][0]['snippet']['title'] - description = channel['items'][0]['snippet']['description'] - except (IndexError, KeyError): - return - elif list_id: - playlist_json = requests.get( - "https://www.googleapis.com/youtube/v3/playlists?part=snippet&id=%s&key=%s" - % (list_id, settings.YOUTUBE_API_KEY) - ) - playlist = json.decode(playlist_json.content) - try: - username = playlist['items'][0]['snippet']['title'] - description = playlist['items'][0]['snippet']['description'] - except (IndexError, KeyError): - return - channel_url = "https://www.youtube.com/playlist?list=%s" % list_id - elif username: - video_ids_xml = requests.get("https://www.youtube.com/feeds/videos.xml?user=%s" % username) - description = "YouTube videos uploaded by %s" % username - else: - return - - if list_id: - playlist_json = requests.get( - "https://www.googleapis.com/youtube/v3/playlistItems?part=snippet&playlistId=%s&key=%s" - % (list_id, settings.YOUTUBE_API_KEY) - ) - playlist = json.decode(playlist_json.content) - try: - video_ids = [video['snippet']['resourceId']['videoId'] for video in playlist['items']] - except (IndexError, KeyError): - return - else: - if video_ids_xml.status_code != 200: - return - video_ids_soup = BeautifulSoup(video_ids_xml.content, features="lxml") - channel_url = video_ids_soup.find('author').find('uri').getText() - video_ids = [] - for video_id in video_ids_soup.findAll('yt:videoid'): - video_ids.append(video_id.getText()) - - videos_json = requests.get( - "https://www.googleapis.com/youtube/v3/videos?part=contentDetails%%2Csnippet&id=%s&key=%s" - % (','.join(video_ids), settings.YOUTUBE_API_KEY) - ) - videos = json.decode(videos_json.content) - if 'error' in videos: - logging.debug(" ***> ~FRYoutube returned an error: ~FM~SB%s" % (videos)) - return - - data = {} - data['title'] = "%s's YouTube Videos" % username if 'Uploads' not in username else username - data['link'] = channel_url - data['description'] = description - data['lastBuildDate'] = datetime.datetime.utcnow() - data['generator'] = 'NewsBlur YouTube API v3 Decrapifier - %s' % settings.NEWSBLUR_URL - data['docs'] = None - data['feed_url'] = address - rss = feedgenerator.Atom1Feed(**data) - - for video in videos['items']: - thumbnail = video['snippet']['thumbnails'].get('maxres') - if not thumbnail: - thumbnail = video['snippet']['thumbnails'].get('high') - if not thumbnail: - thumbnail = video['snippet']['thumbnails'].get('medium') - duration_sec = isodate.parse_duration(video['contentDetails']['duration']).seconds - duration_min, seconds = divmod(duration_sec, 60) - hours, minutes = divmod(duration_min, 60) - if hours >= 1: - duration = "%s:%s:%s" % ( - hours, - '{0:02d}'.format(minutes), - '{0:02d}'.format(seconds), - ) - else: - duration = "%s:%s" % (minutes, '{0:02d}'.format(seconds)) - content = """