Fixing broken image handling from Mercury Reader that was causing image urls with a srcset to be concat'd together. This one's for @yesthatjwz.

This commit is contained in:
Samuel Clay 2018-01-17 16:51:06 -08:00
parent 9468c290fc
commit 8421f667d7
2 changed files with 16 additions and 1 deletions

View file

@ -2692,7 +2692,17 @@ class MStory(mongo.Document):
else:
return
self.image_urls = image_urls
if text:
urls = []
for url in image_urls:
if 'http://' in url[1:] or 'https://' in url[1:]:
continue
urls.append(url)
image_urls = urls
if len(image_urls):
self.image_urls = image_urls
return self.image_urls
def fetch_original_text(self, force=False, request=None, debug=False):
@ -2704,6 +2714,7 @@ class MStory(mongo.Document):
original_doc = ti.fetch(return_document=True)
original_text = original_doc.get('content') if original_doc else None
if original_doc and original_doc.get('image', False):
logging.user(request, "~FBReplacing ~FGoriginal (%s) ~FYimage url: %s" % (self.image_urls, original_doc['image']))
self.image_urls = [original_doc['image']]
else:
self.extract_image_urls(force=force, text=True)

View file

@ -78,6 +78,10 @@ class TextImporter:
url = doc['url']
image = doc['lead_image_url']
if 'http://' in image[1:] or 'https://' in image[1:]:
logging.user(self.request, "~SN~FRRemoving broken image from text: %s" % image)
image = None
return self.process_content(text, title, url, image, skip_save=skip_save, return_document=return_document)
def fetch_manually(self, skip_save=False, return_document=False):