From d39dacb5929d94a593bd4e01271e0b6ff0e5b79c Mon Sep 17 00:00:00 2001 From: Samuel Clay Date: Thu, 20 May 2010 15:13:25 -0400 Subject: [PATCH] Cleanup in prep for move from urllib to eventlets. --- apps/rss_feeds/models.py | 36 ++++++++++++++++++------------------ utils/feed_fetcher.py | 9 ++------- 2 files changed, 20 insertions(+), 25 deletions(-) diff --git a/apps/rss_feeds/models.py b/apps/rss_feeds/models.py index 3249bbc70..3bb348845 100644 --- a/apps/rss_feeds/models.py +++ b/apps/rss_feeds/models.py @@ -1,25 +1,17 @@ +import time +import settings +import difflib +import datetime +import hashlib from django.db import models from django.db import IntegrityError -from django.contrib.auth.models import User -from django.contrib.contenttypes.models import ContentType -from django.core import serializers from django.core.cache import cache -from utils import feedparser, object_manager, json -from utils.dateutil.parser import parse as dateutil_parse -from utils.feed_functions import encode, prints, mtime, levenshtein_distance -import time, datetime, random -from django.utils.http import urlquote -from django.utils.safestring import mark_safe +from utils import json +from utils.feed_functions import levenshtein_distance from utils.story_functions import format_story_link_date__short from utils.story_functions import format_story_link_date__long from utils.story_functions import pre_process_story from utils.compressed_textfield import StoryField -from django.db.models import Q -import settings -import logging -import difflib -import datetime -import hashlib from utils.diff import HTMLDiff USER_AGENT = 'Protopub v1.0 - protopub.com' @@ -102,7 +94,7 @@ class Feed(models.Model): existing_story, story_has_changed = self._exists_story(story, story_content, existing_stories) story_author, _ = self._save_story_author(story.get('author')) if existing_story is None: - pub_date = datetime.datetime.timetuple(story.get('published')) + # pub_date = datetime.datetime.timetuple(story.get('published')) # logging.debug('- New story: %s %s' % (pub_date, story.get('title'))) s = Story(story_feed = self, @@ -332,7 +324,15 @@ class StoryAuthor(models.Model): def __unicode__(self): return '%s - %s' % (self.feed, self.author_name) - + +class FeedPage(models.Model): + feed = models.OneToOneField(Feed, related_name="feed_page") + page_data = StoryField(null=True, blank=True) + +class FeedXML(models.Model): + feed = models.OneToOneField(Feed, related_name="feed_xml") + rss_xml = StoryField(null=True, blank=True) + class Story(models.Model): '''A feed item''' story_feed = models.ForeignKey(Feed, related_name="stories") @@ -372,7 +372,7 @@ class FeedUpdateHistory(models.Model): def __unicode__(self): return "[%s] %s feeds: %s seconds" % ( - fetch_date.strftime('%F %d'), + self.fetch_date.strftime('%F %d'), self.number_of_feeds, self.seconds_taken, ) diff --git a/utils/feed_fetcher.py b/utils/feed_fetcher.py index 3c1449c2a..a44789f08 100644 --- a/utils/feed_fetcher.py +++ b/utils/feed_fetcher.py @@ -1,21 +1,17 @@ from apps.rss_feeds.models import Feed, Story, FeedUpdateHistory from django.core.cache import cache -from apps.reader.models import UserSubscription, UserStory +from apps.reader.models import UserSubscription from apps.rss_feeds.importer import PageImporter from utils import feedparser from django.db import transaction from django.db.models import Q -from utils.dateutil.parser import parse as dateutil_parse from utils.story_functions import pre_process_story import sys import time import logging import datetime -# import threading import traceback import multiprocessing -import Queue -import datetime import random import socket @@ -69,7 +65,7 @@ class FetchFeed: print(log_msg) return FEED_SAME, None - next_scheduled_update = self.set_next_scheduled_update() + self.set_next_scheduled_update() # we check the etag and the modified time to save bandwith and avoid bans try: @@ -205,7 +201,6 @@ class ProcessFeed: # Compare new stories to existing stories, adding and updating - num_entries = len(self.fpf.entries) start_date = datetime.datetime.now() end_date = datetime.datetime.now() story_guids = []