diff --git a/synapse/federation/federation_client.py b/synapse/federation/federation_client.py index 6851f2376d..b4bcec77ed 100644 --- a/synapse/federation/federation_client.py +++ b/synapse/federation/federation_client.py @@ -707,7 +707,7 @@ class FederationClient(FederationBase): @defer.inlineCallbacks def get_missing_events(self, destination, room_id, earliest_events_ids, - latest_events, limit, min_depth): + latest_events, limit, min_depth, timeout): """Tries to fetch events we are missing. This is called when we receive an event without having received all of its ancestors. @@ -721,6 +721,7 @@ class FederationClient(FederationBase): have all previous events for. limit (int): Maximum number of events to return. min_depth (int): Minimum depth of events tor return. + timeout (int): Max time to wait in ms """ try: content = yield self.transport_layer.get_missing_events( @@ -730,6 +731,7 @@ class FederationClient(FederationBase): latest_events=[e.event_id for e in latest_events], limit=limit, min_depth=min_depth, + timeout=timeout, ) events = [ diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py index f4c60e67e3..800f04189f 100644 --- a/synapse/federation/federation_server.py +++ b/synapse/federation/federation_server.py @@ -425,6 +425,7 @@ class FederationServer(FederationBase): " limit: %d, min_depth: %d", earliest_events, latest_events, limit, min_depth ) + missing_events = yield self.handler.on_get_missing_events( origin, room_id, earliest_events, latest_events, limit, min_depth ) @@ -567,6 +568,25 @@ class FederationServer(FederationBase): len(prevs - seen), pdu.room_id, list(prevs - seen)[:5] ) + # XXX: we set timeout to 10s to help workaround + # https://github.com/matrix-org/synapse/issues/1733. + # The reason is to avoid holding the linearizer lock + # whilst processing inbound /send transactions, causing + # FDs to stack up and block other inbound transactions + # which empirically can currently take up to 30 minutes. + # + # N.B. this explicitly disables retry attempts. + # + # N.B. this also increases our chances of falling back to + # fetching fresh state for the room if the missing event + # can't be found, which slightly reduces our security. + # it may also increase our DAG extremity count for the room, + # causing additional state resolution? See #1760. + # However, fetching state doesn't hold the linearizer lock + # apparently. + # + # see https://github.com/matrix-org/synapse/pull/1744 + missing_events = yield self.get_missing_events( origin, pdu.room_id, @@ -574,6 +594,7 @@ class FederationServer(FederationBase): latest_events=[pdu], limit=10, min_depth=min_depth, + timeout=10000, ) # We want to sort these by depth so we process them and diff --git a/synapse/federation/transport/client.py b/synapse/federation/transport/client.py index 491cdc29e1..915af34409 100644 --- a/synapse/federation/transport/client.py +++ b/synapse/federation/transport/client.py @@ -386,7 +386,7 @@ class TransportLayerClient(object): @defer.inlineCallbacks @log_function def get_missing_events(self, destination, room_id, earliest_events, - latest_events, limit, min_depth): + latest_events, limit, min_depth, timeout): path = PREFIX + "/get_missing_events/%s" % (room_id,) content = yield self.client.post_json( @@ -397,7 +397,8 @@ class TransportLayerClient(object): "min_depth": int(min_depth), "earliest_events": earliest_events, "latest_events": latest_events, - } + }, + timeout=timeout, ) defer.returnValue(content)