Fix routing loop when fetching remote media

When we proxy a media request to a remote server, add a query-param, which will
tell the remote server to 404 if it doesn't recognise the server_name.

This should fix a routing loop where the server keeps forwarding back to
itself.

Also improves the error handling on remote media fetches, so that we don't
always return a rather obscure 502.
This commit is contained in:
Richard van der Hoff 2017-03-13 13:50:16 +00:00
parent 2cad971ab4
commit 170ccc9de5
4 changed files with 102 additions and 14 deletions

View file

@ -15,6 +15,7 @@
"""Contains exceptions and error codes.""" """Contains exceptions and error codes."""
import json
import logging import logging
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -50,12 +51,15 @@ class Codes(object):
class CodeMessageException(RuntimeError): class CodeMessageException(RuntimeError):
"""An exception with integer code and message string attributes.""" """An exception with integer code and message string attributes.
def __init__(self, code, msg): Attributes:
super(CodeMessageException, self).__init__("%d: %s" % (code, msg)) code (int): HTTP error code
response_code_message (str): HTTP reason phrase. None for the default.
"""
def __init__(self, code):
super(CodeMessageException, self).__init__("%d" % code)
self.code = code self.code = code
self.msg = msg
self.response_code_message = None self.response_code_message = None
def error_dict(self): def error_dict(self):
@ -70,17 +74,44 @@ class SynapseError(CodeMessageException):
Args: Args:
code (int): The integer error code (an HTTP response code) code (int): The integer error code (an HTTP response code)
msg (str): The human-readable error message. msg (str): The human-readable error message.
err (str): The error code e.g 'M_FORBIDDEN' errcode (str): The synapse error code e.g 'M_FORBIDDEN'
""" """
super(SynapseError, self).__init__(code, msg) super(SynapseError, self).__init__(code)
self.msg = msg
self.errcode = errcode self.errcode = errcode
def __str__(self):
return "%d: %s %s" % (self.code, self.errcode, self.msg)
def error_dict(self): def error_dict(self):
return cs_error( return cs_error(
self.msg, self.msg,
self.errcode, self.errcode,
) )
@classmethod
def from_http_response_exception(cls, err):
"""Make a SynapseError based on an HTTPResponseException
Args:
err (HttpResponseException):
Returns:
SynapseError:
"""
# try to parse the body as json, to get better errcode/msg, but
# default to M_UNKNOWN with the HTTP status as the error text
try:
j = json.loads(err.response)
except ValueError:
j = {}
errcode = j.get('errcode', Codes.UNKNOWN)
errmsg = j.get('error', err.response_code_message)
res = SynapseError(err.code, errmsg, errcode)
res.response_code_message = err.response_code_message
return res
class RegistrationError(SynapseError): class RegistrationError(SynapseError):
"""An error raised when a registration event fails.""" """An error raised when a registration event fails."""
@ -243,6 +274,20 @@ class FederationError(RuntimeError):
class HttpResponseException(CodeMessageException): class HttpResponseException(CodeMessageException):
"""
Represents an HTTP-level failure of an outbound request
Attributes:
response (str): body of response
"""
def __init__(self, code, msg, response): def __init__(self, code, msg, response):
"""
Args:
code (int): HTTP status code
msg (str): reason phrase from HTTP response status line
response (str): body of response
"""
super(HttpResponseException, self).__init__(code)
self.response_code_message = msg
self.response = response self.response = response
super(HttpResponseException, self).__init__(code, msg)

View file

@ -108,6 +108,12 @@ class MatrixFederationHttpClient(object):
query_bytes=b"", retry_on_dns_fail=True, query_bytes=b"", retry_on_dns_fail=True,
timeout=None, long_retries=False): timeout=None, long_retries=False):
""" Creates and sends a request to the given url """ Creates and sends a request to the given url
Returns:
Deferred: resolves with the http response object on success.
Fails with ``HTTPRequestException``: if we get an HTTP response
code >= 300.
""" """
headers_dict[b"User-Agent"] = [self.version_string] headers_dict[b"User-Agent"] = [self.version_string]
headers_dict[b"Host"] = [destination] headers_dict[b"Host"] = [destination]
@ -408,8 +414,11 @@ class MatrixFederationHttpClient(object):
output_stream (file): File to write the response body to. output_stream (file): File to write the response body to.
args (dict): Optional dictionary used to create the query string. args (dict): Optional dictionary used to create the query string.
Returns: Returns:
A (int,dict) tuple of the file length and a dict of the response Deferred: resolves with an (int,dict) tuple of the file length and
headers. a dict of the response headers.
Fails with ``HTTPRequestException`` if we get an HTTP response code
>= 300
""" """
encoded_args = {} encoded_args = {}
@ -419,7 +428,7 @@ class MatrixFederationHttpClient(object):
encoded_args[k] = [v.encode("UTF-8") for v in vs] encoded_args[k] = [v.encode("UTF-8") for v in vs]
query_bytes = urllib.urlencode(encoded_args, True) query_bytes = urllib.urlencode(encoded_args, True)
logger.debug("Query bytes: %s Retry DNS: %s", args, retry_on_dns_fail) logger.debug("Query bytes: %s Retry DNS: %s", query_bytes, retry_on_dns_fail)
def body_callback(method, url_bytes, headers_dict): def body_callback(method, url_bytes, headers_dict):
self.sign_request(destination, method, url_bytes, headers_dict) self.sign_request(destination, method, url_bytes, headers_dict)

View file

@ -12,6 +12,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import synapse.http.servlet
from ._base import parse_media_id, respond_with_file, respond_404 from ._base import parse_media_id, respond_with_file, respond_404
from twisted.web.resource import Resource from twisted.web.resource import Resource
@ -81,6 +82,17 @@ class DownloadResource(Resource):
@defer.inlineCallbacks @defer.inlineCallbacks
def _respond_remote_file(self, request, server_name, media_id, name): def _respond_remote_file(self, request, server_name, media_id, name):
# don't forward requests for remote media if allow_remote is false
allow_remote = synapse.http.servlet.parse_boolean(
request, "allow_remote", default=True)
if not allow_remote:
logger.info(
"Rejecting request for remote media %s/%s due to allow_remote",
server_name, media_id,
)
respond_404(request)
return
media_info = yield self.media_repo.get_remote_media(server_name, media_id) media_info = yield self.media_repo.get_remote_media(server_name, media_id)
media_type = media_info["media_type"] media_type = media_info["media_type"]

View file

@ -12,6 +12,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import twisted.internet.error
from .upload_resource import UploadResource from .upload_resource import UploadResource
from .download_resource import DownloadResource from .download_resource import DownloadResource
@ -26,7 +27,8 @@ from .thumbnailer import Thumbnailer
from synapse.http.matrixfederationclient import MatrixFederationHttpClient from synapse.http.matrixfederationclient import MatrixFederationHttpClient
from synapse.util.stringutils import random_string from synapse.util.stringutils import random_string
from synapse.api.errors import SynapseError from synapse.api.errors import SynapseError, HttpResponseException, \
NotFoundError
from twisted.internet import defer, threads from twisted.internet import defer, threads
@ -157,11 +159,31 @@ class MediaRepository(object):
try: try:
length, headers = yield self.client.get_file( length, headers = yield self.client.get_file(
server_name, request_path, output_stream=f, server_name, request_path, output_stream=f,
max_size=self.max_upload_size, max_size=self.max_upload_size, args={
# tell the remote server to 404 if it doesn't
# recognise the server_name, to make sure we don't
# end up with a routing loop.
"allow_remote": "false",
}
) )
except twisted.internet.error.DNSLookupError as e:
logger.warn("HTTP error fetching remote media %s/%s: %r",
server_name, media_id, e)
raise NotFoundError()
except HttpResponseException as e:
logger.warn("HTTP error fetching remote media %s/%s: %s",
server_name, media_id, e.response)
raise SynapseError.from_http_response_exception(e)
except Exception as e: except Exception as e:
logger.warn("Failed to fetch remoted media %r", e) logger.warn("Failed to fetch remote media %s/%s",
raise SynapseError(502, "Failed to fetch remoted media") server_name, media_id,
exc_info=True)
if isinstance(e, SynapseError):
raise e
else:
raise SynapseError(502, "Failed to fetch remote media")
media_type = headers["Content-Type"][0] media_type = headers["Content-Type"][0]
time_now_ms = self.clock.time_msec() time_now_ms = self.clock.time_msec()