diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py index 6a5a57102f..99760d622f 100644 --- a/synapse/rest/media/v1/preview_url_resource.py +++ b/synapse/rest/media/v1/preview_url_resource.py @@ -381,7 +381,10 @@ def _calc_og(tree, media_uri): if 'og:title' not in og: # do some basic spidering of the HTML title = tree.xpath("(//title)[1] | (//h1)[1] | (//h2)[1] | (//h3)[1]") - og['og:title'] = title[0].text.strip() if title else None + if title and title[0].text is not None: + og['og:title'] = title[0].text.strip() + else: + og['og:title'] = None if 'og:image' not in og: # TODO: extract a favicon failing all else diff --git a/tests/test_preview.py b/tests/test_preview.py index ffa52e5dd4..5bd36c74aa 100644 --- a/tests/test_preview.py +++ b/tests/test_preview.py @@ -215,3 +215,53 @@ class PreviewUrlTestCase(unittest.TestCase): u"og:title": u"Foo", u"og:description": u"Some text." }) + + def test_missing_title(self): + html = u""" + + + Some text. + + + """ + + og = decode_and_calc_og(html, "http://example.com/test.html") + + self.assertEquals(og, { + u"og:title": None, + u"og:description": u"Some text." + }) + + def test_h1_as_title(self): + html = u""" + + + +

Title

+ + + """ + + og = decode_and_calc_og(html, "http://example.com/test.html") + + self.assertEquals(og, { + u"og:title": u"Title", + u"og:description": u"Some text." + }) + + def test_missing_title_and_broken_h1(self): + html = u""" + + +

+ Some text. + + + """ + + og = decode_and_calc_og(html, "http://example.com/test.html") + + self.assertEquals(og, { + u"og:title": None, + u"og:description": u"Some text." + })