From 170ccc9de5c09a543a60a7d9eada2e02ba9c9980 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Mon, 13 Mar 2017 13:50:16 +0000 Subject: [PATCH] Fix routing loop when fetching remote media When we proxy a media request to a remote server, add a query-param, which will tell the remote server to 404 if it doesn't recognise the server_name. This should fix a routing loop where the server keeps forwarding back to itself. Also improves the error handling on remote media fetches, so that we don't always return a rather obscure 502. --- synapse/api/errors.py | 59 +++++++++++++++++++--- synapse/http/matrixfederationclient.py | 15 ++++-- synapse/rest/media/v1/download_resource.py | 12 +++++ synapse/rest/media/v1/media_repository.py | 30 +++++++++-- 4 files changed, 102 insertions(+), 14 deletions(-) diff --git a/synapse/api/errors.py b/synapse/api/errors.py index 921c457738..014bd60b9d 100644 --- a/synapse/api/errors.py +++ b/synapse/api/errors.py @@ -15,6 +15,7 @@ """Contains exceptions and error codes.""" +import json import logging logger = logging.getLogger(__name__) @@ -50,12 +51,15 @@ class Codes(object): class CodeMessageException(RuntimeError): - """An exception with integer code and message string attributes.""" + """An exception with integer code and message string attributes. - def __init__(self, code, msg): - super(CodeMessageException, self).__init__("%d: %s" % (code, msg)) + Attributes: + code (int): HTTP error code + response_code_message (str): HTTP reason phrase. None for the default. + """ + def __init__(self, code): + super(CodeMessageException, self).__init__("%d" % code) self.code = code - self.msg = msg self.response_code_message = None def error_dict(self): @@ -70,17 +74,44 @@ class SynapseError(CodeMessageException): Args: code (int): The integer error code (an HTTP response code) msg (str): The human-readable error message. - err (str): The error code e.g 'M_FORBIDDEN' + errcode (str): The synapse error code e.g 'M_FORBIDDEN' """ - super(SynapseError, self).__init__(code, msg) + super(SynapseError, self).__init__(code) + self.msg = msg self.errcode = errcode + def __str__(self): + return "%d: %s %s" % (self.code, self.errcode, self.msg) + def error_dict(self): return cs_error( self.msg, self.errcode, ) + @classmethod + def from_http_response_exception(cls, err): + """Make a SynapseError based on an HTTPResponseException + + Args: + err (HttpResponseException): + + Returns: + SynapseError: + """ + # try to parse the body as json, to get better errcode/msg, but + # default to M_UNKNOWN with the HTTP status as the error text + try: + j = json.loads(err.response) + except ValueError: + j = {} + errcode = j.get('errcode', Codes.UNKNOWN) + errmsg = j.get('error', err.response_code_message) + + res = SynapseError(err.code, errmsg, errcode) + res.response_code_message = err.response_code_message + return res + class RegistrationError(SynapseError): """An error raised when a registration event fails.""" @@ -243,6 +274,20 @@ class FederationError(RuntimeError): class HttpResponseException(CodeMessageException): + """ + Represents an HTTP-level failure of an outbound request + + Attributes: + response (str): body of response + """ def __init__(self, code, msg, response): + """ + + Args: + code (int): HTTP status code + msg (str): reason phrase from HTTP response status line + response (str): body of response + """ + super(HttpResponseException, self).__init__(code) + self.response_code_message = msg self.response = response - super(HttpResponseException, self).__init__(code, msg) diff --git a/synapse/http/matrixfederationclient.py b/synapse/http/matrixfederationclient.py index 78b92cef36..82586e3dea 100644 --- a/synapse/http/matrixfederationclient.py +++ b/synapse/http/matrixfederationclient.py @@ -108,6 +108,12 @@ class MatrixFederationHttpClient(object): query_bytes=b"", retry_on_dns_fail=True, timeout=None, long_retries=False): """ Creates and sends a request to the given url + + Returns: + Deferred: resolves with the http response object on success. + + Fails with ``HTTPRequestException``: if we get an HTTP response + code >= 300. """ headers_dict[b"User-Agent"] = [self.version_string] headers_dict[b"Host"] = [destination] @@ -408,8 +414,11 @@ class MatrixFederationHttpClient(object): output_stream (file): File to write the response body to. args (dict): Optional dictionary used to create the query string. Returns: - A (int,dict) tuple of the file length and a dict of the response - headers. + Deferred: resolves with an (int,dict) tuple of the file length and + a dict of the response headers. + + Fails with ``HTTPRequestException`` if we get an HTTP response code + >= 300 """ encoded_args = {} @@ -419,7 +428,7 @@ class MatrixFederationHttpClient(object): encoded_args[k] = [v.encode("UTF-8") for v in vs] query_bytes = urllib.urlencode(encoded_args, True) - logger.debug("Query bytes: %s Retry DNS: %s", args, retry_on_dns_fail) + logger.debug("Query bytes: %s Retry DNS: %s", query_bytes, retry_on_dns_fail) def body_callback(method, url_bytes, headers_dict): self.sign_request(destination, method, url_bytes, headers_dict) diff --git a/synapse/rest/media/v1/download_resource.py b/synapse/rest/media/v1/download_resource.py index dfb87ffd15..6788375e85 100644 --- a/synapse/rest/media/v1/download_resource.py +++ b/synapse/rest/media/v1/download_resource.py @@ -12,6 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import synapse.http.servlet from ._base import parse_media_id, respond_with_file, respond_404 from twisted.web.resource import Resource @@ -81,6 +82,17 @@ class DownloadResource(Resource): @defer.inlineCallbacks def _respond_remote_file(self, request, server_name, media_id, name): + # don't forward requests for remote media if allow_remote is false + allow_remote = synapse.http.servlet.parse_boolean( + request, "allow_remote", default=True) + if not allow_remote: + logger.info( + "Rejecting request for remote media %s/%s due to allow_remote", + server_name, media_id, + ) + respond_404(request) + return + media_info = yield self.media_repo.get_remote_media(server_name, media_id) media_type = media_info["media_type"] diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index 481ffee200..66464cfe66 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -12,6 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import twisted.internet.error from .upload_resource import UploadResource from .download_resource import DownloadResource @@ -26,7 +27,8 @@ from .thumbnailer import Thumbnailer from synapse.http.matrixfederationclient import MatrixFederationHttpClient from synapse.util.stringutils import random_string -from synapse.api.errors import SynapseError +from synapse.api.errors import SynapseError, HttpResponseException, \ + NotFoundError from twisted.internet import defer, threads @@ -157,11 +159,31 @@ class MediaRepository(object): try: length, headers = yield self.client.get_file( server_name, request_path, output_stream=f, - max_size=self.max_upload_size, + max_size=self.max_upload_size, args={ + # tell the remote server to 404 if it doesn't + # recognise the server_name, to make sure we don't + # end up with a routing loop. + "allow_remote": "false", + } ) + except twisted.internet.error.DNSLookupError as e: + logger.warn("HTTP error fetching remote media %s/%s: %r", + server_name, media_id, e) + raise NotFoundError() + + except HttpResponseException as e: + logger.warn("HTTP error fetching remote media %s/%s: %s", + server_name, media_id, e.response) + raise SynapseError.from_http_response_exception(e) + except Exception as e: - logger.warn("Failed to fetch remoted media %r", e) - raise SynapseError(502, "Failed to fetch remoted media") + logger.warn("Failed to fetch remote media %s/%s", + server_name, media_id, + exc_info=True) + if isinstance(e, SynapseError): + raise e + else: + raise SynapseError(502, "Failed to fetch remote media") media_type = headers["Content-Type"][0] time_now_ms = self.clock.time_msec()