From f96b85eca8cf14530f26b678dbb4900c54fb6a59 Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Thu, 31 Mar 2022 11:49:49 +0200 Subject: [PATCH] Ensure the type of URL attributes is always str when matching against preview blacklist (#12333) --- changelog.d/12333.bugfix | 1 + synapse/rest/media/v1/preview_url_resource.py | 9 +++- tests/rest/media/v1/test_url_preview.py | 43 ++++++++++++++++++- 3 files changed, 49 insertions(+), 4 deletions(-) create mode 100644 changelog.d/12333.bugfix diff --git a/changelog.d/12333.bugfix b/changelog.d/12333.bugfix new file mode 100644 index 0000000000..2c073a77d5 --- /dev/null +++ b/changelog.d/12333.bugfix @@ -0,0 +1 @@ +Fix a long-standing bug affecting URL previews that would generate a 500 response instead of a 403 if the previewed URL includes a port that isn't allowed by the relevant blacklist. diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py index d47af8ead6..50383bdbd1 100644 --- a/synapse/rest/media/v1/preview_url_resource.py +++ b/synapse/rest/media/v1/preview_url_resource.py @@ -200,12 +200,17 @@ class PreviewUrlResource(DirectServeJsonResource): match = False continue + # Some attributes might not be parsed as strings by urlsplit (such as the + # port, which is parsed as an int). Because we use match functions that + # expect strings, we want to make sure that's what we give them. + value_str = str(value) + if pattern.startswith("^"): - if not re.match(pattern, getattr(url_tuple, attrib)): + if not re.match(pattern, value_str): match = False continue else: - if not fnmatch.fnmatch(getattr(url_tuple, attrib), pattern): + if not fnmatch.fnmatch(value_str, pattern): match = False continue if match: diff --git a/tests/rest/media/v1/test_url_preview.py b/tests/rest/media/v1/test_url_preview.py index 5148c39874..3b24d0ace6 100644 --- a/tests/rest/media/v1/test_url_preview.py +++ b/tests/rest/media/v1/test_url_preview.py @@ -17,7 +17,7 @@ import json import os import re from typing import Any, Dict, Optional, Sequence, Tuple, Type -from urllib.parse import urlencode +from urllib.parse import quote, urlencode from twisted.internet._resolver import HostResolution from twisted.internet.address import IPv4Address, IPv6Address @@ -69,7 +69,6 @@ class URLPreviewTests(unittest.HomeserverTestCase): "2001:800::/21", ) config["url_preview_ip_range_whitelist"] = ("1.1.1.1",) - config["url_preview_url_blacklist"] = [] config["url_preview_accept_language"] = [ "en-UK", "en-US;q=0.9", @@ -1123,3 +1122,43 @@ class URLPreviewTests(unittest.HomeserverTestCase): os.path.exists(path), f"{os.path.relpath(path, self.media_store_path)} was not deleted", ) + + @unittest.override_config({"url_preview_url_blacklist": [{"port": "*"}]}) + def test_blacklist_port(self) -> None: + """Tests that blacklisting URLs with a port makes previewing such URLs + fail with a 403 error and doesn't impact other previews. + """ + self.lookups["matrix.org"] = [(IPv4Address, "10.1.2.3")] + + bad_url = quote("http://matrix.org:8888/foo") + good_url = quote("http://matrix.org/foo") + + channel = self.make_request( + "GET", + "preview_url?url=" + bad_url, + shorthand=False, + await_result=False, + ) + self.pump() + self.assertEqual(channel.code, 403, channel.result) + + channel = self.make_request( + "GET", + "preview_url?url=" + good_url, + shorthand=False, + await_result=False, + ) + self.pump() + + client = self.reactor.tcpClients[0][2].buildProtocol(None) + server = AccumulatingProtocol() + server.makeConnection(FakeTransport(client, self.reactor)) + client.makeConnection(FakeTransport(server, self.reactor)) + client.dataReceived( + b"HTTP/1.0 200 OK\r\nContent-Length: %d\r\nContent-Type: text/html\r\n\r\n" + % (len(self.end_content),) + + self.end_content + ) + + self.pump() + self.assertEqual(channel.code, 200)