From 732bbf6737813b75e0cf9a255cae73f529c981ec Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Wed, 13 Oct 2021 07:00:07 -0400 Subject: [PATCH] Be more lenient when parsing the version for oEmbed responses. (#11065) --- changelog.d/11065.misc | 1 + mypy.ini | 1 + synapse/rest/media/v1/oembed.py | 13 +++-- synapse/rest/media/v1/preview_url_resource.py | 2 +- tests/rest/media/v1/test_oembed.py | 51 +++++++++++++++++++ 5 files changed, 60 insertions(+), 8 deletions(-) create mode 100644 changelog.d/11065.misc create mode 100644 tests/rest/media/v1/test_oembed.py diff --git a/changelog.d/11065.misc b/changelog.d/11065.misc new file mode 100644 index 0000000000..c6f37fc52b --- /dev/null +++ b/changelog.d/11065.misc @@ -0,0 +1 @@ +Be more lenient when parsing oEmbed response versions. diff --git a/mypy.ini b/mypy.ini index 22768a037d..93757cd95d 100644 --- a/mypy.ini +++ b/mypy.ini @@ -90,6 +90,7 @@ files = tests/rest/client/test_login.py, tests/rest/client/test_auth.py, tests/rest/media/v1/test_filepath.py, + tests/rest/media/v1/test_oembed.py, tests/storage/test_state.py, tests/storage/test_user_directory.py, tests/util/test_itertools.py, diff --git a/synapse/rest/media/v1/oembed.py b/synapse/rest/media/v1/oembed.py index 78b1603f19..2a59552c20 100644 --- a/synapse/rest/media/v1/oembed.py +++ b/synapse/rest/media/v1/oembed.py @@ -17,7 +17,6 @@ from typing import TYPE_CHECKING, List, Optional import attr -from synapse.http.client import SimpleHttpClient from synapse.types import JsonDict from synapse.util import json_decoder @@ -48,7 +47,7 @@ class OEmbedProvider: requesting/parsing oEmbed content. """ - def __init__(self, hs: "HomeServer", client: SimpleHttpClient): + def __init__(self, hs: "HomeServer"): self._oembed_patterns = {} for oembed_endpoint in hs.config.oembed.oembed_patterns: api_endpoint = oembed_endpoint.api_endpoint @@ -69,7 +68,6 @@ class OEmbedProvider: # Iterate through each URL pattern and point it to the endpoint. for pattern in oembed_endpoint.url_patterns: self._oembed_patterns[pattern] = api_endpoint - self._client = client def get_oembed_url(self, url: str) -> Optional[str]: """ @@ -139,10 +137,11 @@ class OEmbedProvider: # oEmbed responses *must* be UTF-8 according to the spec. oembed = json_decoder.decode(raw_body.decode("utf-8")) - # Ensure there's a version of 1.0. - oembed_version = oembed["version"] - if oembed_version != "1.0": - raise RuntimeError(f"Invalid version: {oembed_version}") + # The version is a required string field, but not always provided, + # or sometimes provided as a float. Be lenient. + oembed_version = oembed.get("version", "1.0") + if oembed_version != "1.0" and oembed_version != 1: + raise RuntimeError(f"Invalid oEmbed version: {oembed_version}") # Ensure the cache age is None or an int. cache_age = oembed.get("cache_age") diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py index 1fe0fc8aa9..5bddd21ef1 100644 --- a/synapse/rest/media/v1/preview_url_resource.py +++ b/synapse/rest/media/v1/preview_url_resource.py @@ -140,7 +140,7 @@ class PreviewUrlResource(DirectServeJsonResource): self.primary_base_path = media_repo.primary_base_path self.media_storage = media_storage - self._oembed = OEmbedProvider(hs, self.client) + self._oembed = OEmbedProvider(hs) # We run the background jobs if we're the instance specified (or no # instance is specified, where we assume there is only one instance diff --git a/tests/rest/media/v1/test_oembed.py b/tests/rest/media/v1/test_oembed.py new file mode 100644 index 0000000000..048d0ca44a --- /dev/null +++ b/tests/rest/media/v1/test_oembed.py @@ -0,0 +1,51 @@ +# Copyright 2021 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json + +from twisted.test.proto_helpers import MemoryReactor + +from synapse.rest.media.v1.oembed import OEmbedProvider +from synapse.server import HomeServer +from synapse.types import JsonDict +from synapse.util import Clock + +from tests.unittest import HomeserverTestCase + + +class OEmbedTests(HomeserverTestCase): + def prepare(self, reactor: MemoryReactor, clock: Clock, homeserver: HomeServer): + self.oembed = OEmbedProvider(homeserver) + + def parse_response(self, response: JsonDict): + return self.oembed.parse_oembed_response( + "https://test", json.dumps(response).encode("utf-8") + ) + + def test_version(self): + """Accept versions that are similar to 1.0 as a string or int (or missing).""" + for version in ("1.0", 1.0, 1): + result = self.parse_response({"version": version, "type": "link"}) + # An empty Open Graph response is an error, ensure the URL is included. + self.assertIn("og:url", result.open_graph_result) + + # A missing version should be treated as 1.0. + result = self.parse_response({"type": "link"}) + self.assertIn("og:url", result.open_graph_result) + + # Invalid versions should be rejected. + for version in ("2.0", "1", 1.1, 0, None, {}, []): + result = self.parse_response({"version": version, "type": "link"}) + # An empty Open Graph response is an error, ensure the URL is included. + self.assertEqual({}, result.open_graph_result)