Merge remote-tracking branch 'upstream/master' into defaultview

2025-09-18 21:43:31 +00:00 · 2014-01-15 20:32:39 +00:00 · 2014-01-15 20:32:39 +00:00 · 3992491bb4
commit 3992491bb4
parent 657e31e554 dfdbc6e398
8 changed files with 93 additions and 34 deletions
--- a/apps/reader/views.py
+++ b/apps/reader/views.py
@ -1814,7 +1814,11 @@ def mark_story_as_unstarred(request):
    if not starred_story:
        starred_story = MStarredStory.objects(user_id=request.user.pk, story_hash=story_id)
    if starred_story:
-        logging.user(request, "~FCUnstarring: ~SB%s" % (starred_story[0].story_title[:50]))
+        starred_story = starred_story[0]
+        logging.user(request, "~FCUnstarring: ~SB%s" % (starred_story.story_title[:50]))
+        MActivity.remove_starred_story(user_id=request.user.pk, 
+                                       story_feed_id=starred_story.story_feed_id,
+                                       story_id=starred_story.story_guid)
        starred_story.delete()
        MStarredStoryCounts.count_tags_for_user(request.user.pk)
        starred_counts = MStarredStoryCounts.user_counts(request.user.pk)
--- a/apps/social/models.py
+++ b/apps/social/models.py
@ -2901,7 +2901,18 @@ class MActivity(mongo.Document):
                                  story_feed_id=story_feed_id,
                                  content_id=story_id,
                                  defaults=dict(content=story_title))
-                           
+
+    @classmethod
+    def remove_starred_story(cls, user_id, story_feed_id, story_id):
+        params = {
+            'user_id': user_id,
+            'category': 'star',
+            'story_feed_id': story_feed_id,
+            'content_id': story_id,
+        }
+        original = cls.objects.filter(**params)
+        original.delete()
+    
    @classmethod
    def new_feed_subscription(cls, user_id, feed_id, feed_title):
        params = {
--- a/media/js/newsblur/views/story_detail_view.js
+++ b/media/js/newsblur/views/story_detail_view.js
@ -437,16 +437,21 @@ NEWSBLUR.Views.StoryDetailView = Backbone.View.extend({
        this.model.on('change:images_loaded', _.bind(function() {
            this.resize_starred_tags();
        }, this));
+        var is_truncatable = this.is_truncatable();
        
-        if (!this.is_truncatable()) return;
+        if (!is_truncatable && !this.model.get('starred')) return;
        
        this.truncate_delay = 100;
        this.images_to_load = this.$('img').length;
-        this.truncate_story_height();
-
+        if (is_truncatable) this.truncate_story_height();
        this.$('img').load(_.bind(function() {
            this.images_to_load -= 1;
-            this.truncate_story_height();
+            if (is_truncatable) this.truncate_story_height();
+            if (this.images_to_load <= 0) {
+                this.model.set('images_loaded', true);
+            } else {
+                this.model.set('images_loaded', false);
+            }
        }, this));
    },
    
--- a/media/js/newsblur/views/story_save_view.js
+++ b/media/js/newsblur/views/story_save_view.js
@ -182,6 +182,7 @@ NEWSBLUR.Views.StorySaveView = Backbone.View.extend({
        if (!NEWSBLUR.reader.flags.narrow_content &&
            !options.close && !options.force && new_sideoptions_height >= original_height) {
            // Sideoptions too big, embiggen left side
+            console.log(["Sideoption too big, embiggening", content_height, sideoptions_height, new_sideoptions_height]);
            $story_content.stop(true, true).animate({
                'height': new_sideoptions_height
            }, {
--- a/media/js/newsblur/views/story_title_view.js
+++ b/media/js/newsblur/views/story_title_view.js
@ -76,7 +76,7 @@ NEWSBLUR.Views.StoryTitleView = Backbone.View.extend({
            });
            this.text_view.fetch_and_render(this.model, temporary_text);
            this.$(".NB-story-detail").html(this.text_view.$el);
-            this.story_detail.render_starred_tags();
+            this.text_view.story_detail.render_starred_tags();
        } else {
            this.story_detail = new NEWSBLUR.Views.StoryDetailView({
                model: this.model,
--- a/media/js/newsblur/views/text_tab_view.js
+++ b/media/js/newsblur/views/text_tab_view.js
@ -40,7 +40,7 @@ NEWSBLUR.Views.TextTabView = Backbone.View.extend({
        if (this.story == story) return;
        
        this.story = story;
-        this.$el.html(new NEWSBLUR.Views.StoryDetailView({
+        this.story_detail = new NEWSBLUR.Views.StoryDetailView({
            model: this.story,
            collection: this.story.collection,
            show_feed_title: true,
@ -48,7 +48,8 @@ NEWSBLUR.Views.TextTabView = Backbone.View.extend({
            text_view: true,
            tagName: 'div',
            inline_story_title: this.options.inline_story_title
-        }).render().el);
+        }).render();
+        this.$el.html(this.story_detail.el);
        this.$el.scrollTop(0);
        this.show_loading();
        NEWSBLUR.assets.fetch_original_text(story.get('id'), story.get('story_feed_id'), 
@ -72,6 +73,7 @@ NEWSBLUR.Views.TextTabView = Backbone.View.extend({
        } else {
            $content.html(this.story.get('original_text'));
            NEWSBLUR.reader.make_story_titles_pane_counter();
+            this.resize_starred_tags();
        }
        $content.css('opacity', 0);
        $content.show();
@ -142,6 +144,13 @@ NEWSBLUR.Views.TextTabView = Backbone.View.extend({
        this.$el.append($empty);
    },
    
+    resize_starred_tags: function() {
+        if (this.story.get('starred')) {
+            this.story_detail.save_view.reset_height({immediate: true});
+        }
+    },
+
+    
    // ==========
    // = Events =
    // ==========
--- a/vendor/readability/htmls.py
+++ b/vendor/readability/htmls.py
@ -3,20 +3,19 @@ from encoding import get_encoding
 from lxml.html import tostring
 import logging
 import lxml.html
-import re
-
-logging.getLogger().setLevel(logging.DEBUG)
+import re, sys

 utf8_parser = lxml.html.HTMLParser(encoding='utf-8')

 def build_doc(page):
    if isinstance(page, unicode):
+        enc = None
        page_unicode = page
    else:
-        enc = get_encoding(page)
+        enc = get_encoding(page) or 'utf-8'
        page_unicode = page.decode(enc, 'replace')
    doc = lxml.html.document_fromstring(page_unicode.encode('utf-8', 'replace'), parser=utf8_parser)
-    return doc
+    return doc, enc

 def js_re(src, pattern, flags, repl):
    return re.compile(pattern, flags).sub(src, repl.replace('$', '\\'))
@ -57,7 +56,7 @@ def add_match(collection, text, orig):

 def shorten_title(doc):
    title = doc.find('.//title')
-    if title is None or len(title.text) == 0:
+    if title is None or title.text is None or len(title.text) == 0:
        return ''

    title = orig = norm_title(title.text)
@ -111,5 +110,5 @@ def get_body(doc):
        #BeautifulSoup(cleaned) #FIXME do we really need to try loading it?
        return cleaned
    except Exception: #FIXME find the equivalent lxml error
-        logging.error("cleansing broke html content: %s\n---------\n%s" % (raw_html, cleaned))
-        return raw_html
+        #logging.error("cleansing broke html content: %s\n---------\n%s" % (raw_html, cleaned))
+        return raw_html
--- a/vendor/readability/readability.py
+++ b/vendor/readability/readability.py
@ -76,13 +76,23 @@ def clean(text):
 def text_length(i):
    return len(clean(i.text_content() or ""))

+regexp_type = type(re.compile('hello, world'))
+
+def compile_pattern(elements):
+    if not elements:
+        return None
+    if isinstance(elements, regexp_type):
+        return elements
+    if isinstance(elements, basestring):
+        elements = elements.split(',')
+    return re.compile(u'|'.join([re.escape(x.lower()) for x in elements]), re.U)

 class Document:
    """Class to build a etree document out of html."""
    TEXT_LENGTH_THRESHOLD = 25
    RETRY_LENGTH = 250

-    def __init__(self, input, **options):
+    def __init__(self, input, positive_keywords=None, negative_keywords=None, **options):
        """Generate the document

        :param input: string of the html content.
@ -93,11 +103,16 @@ class Document:
            - min_text_length:
            - retry_length:
            - url: will allow adjusting links to be absolute
-
+            - positive_keywords: the list of positive search patterns in classes and ids, for example: ["news-item", "block"]
+            - negative_keywords: the list of negative search patterns in classes and ids, for example: ["mysidebar", "related", "ads"]
+            Also positive_keywords and negative_keywords could be a regexp.
        """
        self.input = input
        self.options = options
        self.html = None
+        self.encoding = None
+        self.positive_keywords = compile_pattern(positive_keywords)
+        self.negative_keywords = compile_pattern(negative_keywords)

    def _html(self, force=False):
        if force or self.html is None:
@ -105,7 +120,7 @@ class Document:
        return self.html

    def _parse(self, input):
-        doc = build_doc(input)
+        doc, self.encoding = build_doc(input)
        doc = html_cleaner.clean_html(doc)
        base_href = self.options.get('url', None)
        if base_href:
@ -123,6 +138,9 @@ class Document:
    def short_title(self):
        return shorten_title(self._html(True))

+    def get_clean_html(self):
+         return clean_attributes(tounicode(self.html))
+
    def summary(self, html_partial=False):
        """Generate the summary of the html docuemnt

@ -308,19 +326,25 @@ class Document:

    def class_weight(self, e):
        weight = 0
-        if e.get('class', None):
-            if REGEXES['negativeRe'].search(e.get('class')):
-                weight -= 25
+        for feature in [e.get('class', None), e.get('id', None)]:
+            if feature:
+                if REGEXES['negativeRe'].search(feature):
+                    weight -= 25

-            if REGEXES['positiveRe'].search(e.get('class')):
-                weight += 25
+                if REGEXES['positiveRe'].search(feature):
+                    weight += 25

-        if e.get('id', None):
-            if REGEXES['negativeRe'].search(e.get('id')):
-                weight -= 25
+                if self.positive_keywords and self.positive_keywords.search(feature):
+                    weight += 25

-            if REGEXES['positiveRe'].search(e.get('id')):
-                weight += 25
+                if self.negative_keywords and self.negative_keywords.search(feature):
+                    weight -= 25
+
+        if self.positive_keywords and self.positive_keywords.match('tag-'+e.tag):
+            weight += 25
+
+        if self.negative_keywords and self.negative_keywords.match('tag-'+e.tag):
+            weight -= 25

        return weight

@ -530,7 +554,8 @@ class Document:
                #el.attrib = {} #FIXME:Checkout the effects of disabling this
                pass

-        return clean_attributes(tounicode(node))
+        self.html = node
+        return self.get_clean_html()


 class HashableElement():
@ -565,6 +590,8 @@ def main():
    parser = OptionParser(usage="%prog: [options] [file]")
    parser.add_option('-v', '--verbose', action='store_true')
    parser.add_option('-u', '--url', default=None, help="use URL instead of a local file")
+    parser.add_option('-p', '--positive-keywords', default=None, help="positive keywords (separated with comma)", action='store')
+    parser.add_option('-n', '--negative-keywords', default=None, help="negative keywords (separated with comma)", action='store')
    (options, args) = parser.parse_args()

    if not (len(args) == 1 or options.url):
@ -577,13 +604,16 @@ def main():
        file = urllib.urlopen(options.url)
    else:
        file = open(args[0], 'rt')
-    enc = sys.__stdout__.encoding or 'utf-8'
+    enc = sys.__stdout__.encoding or 'utf-8' # XXX: this hack could not always work, better to set PYTHONIOENCODING
    try:
        print Document(file.read(),
            debug=options.verbose,
-            url=options.url).summary().encode(enc, 'replace')
+            url=options.url,
+            positive_keywords = options.positive_keywords,
+            negative_keywords = options.negative_keywords,
+        ).summary().encode(enc, 'replace')
    finally:
        file.close()

 if __name__ == '__main__':
-    main()
+    main()