From 1ccabe2965c09a7451fde15cfd082da2a981b882 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Fri, 8 Apr 2016 18:58:08 +0100 Subject: [PATCH] more PR feedback --- synapse/rest/media/v1/preview_url_resource.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py index faa88deb6e..2c86a74c7c 100644 --- a/synapse/rest/media/v1/preview_url_resource.py +++ b/synapse/rest/media/v1/preview_url_resource.py @@ -124,12 +124,10 @@ class PreviewUrlResource(BaseMediaResource): # first check the memory cache - good to handle all the clients on this # HS thundering away to preview the same URL at the same time. - try: - og = self.cache[url] + og = self.cache.get(url) + if og: respond_with_json_bytes(request, 200, json.dumps(og), send_cors=True) return - except: - pass # then check the URL cache in the DB (which will also provide us with # historical previews, if we have any) @@ -197,6 +195,12 @@ class PreviewUrlResource(BaseMediaResource): og = yield self._calc_og(tree, media_info, requester) except UnicodeDecodeError: # XXX: evil evil bodge + # Empirically, sites like google.com mix Latin-1 and utf-8 + # encodings in the same page. The rogue Latin-1 characters + # cause lxml to choke with a UnicodeDecodeError, so if we + # see this we go and do a manual decode of the HTML before + # handing it to lxml as utf-8 encoding, counter-intuitively, + # which seems to make it happier... file = open(media_info['filename']) body = file.read() file.close()