mirror of
https://github.com/viq/NewsBlur.git
synced 2025-09-18 21:43:31 +00:00
Upgrading readability 0.3.0.5.
This commit is contained in:
parent
2ce9ad8f9c
commit
6d69401887
1 changed files with 3 additions and 1 deletions
4
vendor/readability/readability.py
vendored
4
vendor/readability/readability.py
vendored
|
@ -160,6 +160,7 @@ class Document:
|
|||
self.remove_unlikely_candidates()
|
||||
self.transform_misused_divs_into_paragraphs()
|
||||
candidates = self.score_paragraphs()
|
||||
|
||||
best_candidate = self.select_best_candidate(candidates)
|
||||
|
||||
if best_candidate:
|
||||
|
@ -273,7 +274,7 @@ class Document:
|
|||
self.TEXT_LENGTH_THRESHOLD)
|
||||
candidates = {}
|
||||
ordered = []
|
||||
for elem in self.tags(self._html(), "p", "pre", "td", "section", "article"):
|
||||
for elem in self.tags(self._html(), "p", "pre", "td"):
|
||||
parent_node = elem.getparent()
|
||||
if parent_node is None:
|
||||
continue
|
||||
|
@ -451,6 +452,7 @@ class Document:
|
|||
for kind in ['p', 'img', 'li', 'a', 'embed', 'input']:
|
||||
counts[kind] = len(el.findall('.//%s' % kind))
|
||||
counts["li"] -= 100
|
||||
counts["input"] -= len(el.findall('.//input[@type="hidden"]'))
|
||||
|
||||
# Count the text length excluding any surrounding whitespace
|
||||
content_length = text_length(el)
|
||||
|
|
Loading…
Add table
Reference in a new issue