Improve failover logic for MSC3083 restricted rooms. (#10447)

If the federation client receives an M_UNABLE_TO_AUTHORISE_JOIN or
M_UNABLE_TO_GRANT_JOIN response it will attempt another server
before giving up completely.
This commit is contained in:
Patrick Cloke 2021-07-29 07:50:14 -04:00 committed by GitHub
parent f8c87c65eb
commit 3a541a7daa
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 40 additions and 4 deletions

View file

@ -0,0 +1 @@
Update support for [MSC3083](https://github.com/matrix-org/matrix-doc/pull/3083) to consider changes in the MSC around which servers can issue join events.

View file

@ -22,6 +22,7 @@ from typing import (
Awaitable, Awaitable,
Callable, Callable,
Collection, Collection,
Container,
Dict, Dict,
Iterable, Iterable,
List, List,
@ -513,6 +514,7 @@ class FederationClient(FederationBase):
description: str, description: str,
destinations: Iterable[str], destinations: Iterable[str],
callback: Callable[[str], Awaitable[T]], callback: Callable[[str], Awaitable[T]],
failover_errcodes: Optional[Container[str]] = None,
failover_on_unknown_endpoint: bool = False, failover_on_unknown_endpoint: bool = False,
) -> T: ) -> T:
"""Try an operation on a series of servers, until it succeeds """Try an operation on a series of servers, until it succeeds
@ -533,6 +535,9 @@ class FederationClient(FederationBase):
next server tried. Normally the stacktrace is logged but this is next server tried. Normally the stacktrace is logged but this is
suppressed if the exception is an InvalidResponseError. suppressed if the exception is an InvalidResponseError.
failover_errcodes: Error codes (specific to this endpoint) which should
cause a failover when received as part of an HTTP 400 error.
failover_on_unknown_endpoint: if True, we will try other servers if it looks failover_on_unknown_endpoint: if True, we will try other servers if it looks
like a server doesn't support the endpoint. This is typically useful like a server doesn't support the endpoint. This is typically useful
if the endpoint in question is new or experimental. if the endpoint in question is new or experimental.
@ -544,6 +549,9 @@ class FederationClient(FederationBase):
SynapseError if the chosen remote server returns a 300/400 code, or SynapseError if the chosen remote server returns a 300/400 code, or
no servers were reachable. no servers were reachable.
""" """
if failover_errcodes is None:
failover_errcodes = ()
for destination in destinations: for destination in destinations:
if destination == self.server_name: if destination == self.server_name:
continue continue
@ -558,11 +566,17 @@ class FederationClient(FederationBase):
synapse_error = e.to_synapse_error() synapse_error = e.to_synapse_error()
failover = False failover = False
# Failover on an internal server error, or if the destination # Failover should occur:
# doesn't implemented the endpoint for some reason. #
# * On internal server errors.
# * If the destination responds that it cannot complete the request.
# * If the destination doesn't implemented the endpoint for some reason.
if 500 <= e.code < 600: if 500 <= e.code < 600:
failover = True failover = True
elif e.code == 400 and synapse_error.errcode in failover_errcodes:
failover = True
elif failover_on_unknown_endpoint and self._is_unknown_endpoint( elif failover_on_unknown_endpoint and self._is_unknown_endpoint(
e, synapse_error e, synapse_error
): ):
@ -678,8 +692,20 @@ class FederationClient(FederationBase):
return destination, ev, room_version return destination, ev, room_version
# MSC3083 defines additional error codes for room joins. Unfortunately
# we do not yet know the room version, assume these will only be returned
# by valid room versions.
failover_errcodes = (
(Codes.UNABLE_AUTHORISE_JOIN, Codes.UNABLE_TO_GRANT_JOIN)
if membership == Membership.JOIN
else None
)
return await self._try_destination_list( return await self._try_destination_list(
"make_" + membership, destinations, send_request "make_" + membership,
destinations,
send_request,
failover_errcodes=failover_errcodes,
) )
async def send_join( async def send_join(
@ -818,7 +844,14 @@ class FederationClient(FederationBase):
origin=destination, origin=destination,
) )
# MSC3083 defines additional error codes for room joins.
failover_errcodes = None
if room_version.msc3083_join_rules: if room_version.msc3083_join_rules:
failover_errcodes = (
Codes.UNABLE_AUTHORISE_JOIN,
Codes.UNABLE_TO_GRANT_JOIN,
)
# If the join is being authorised via allow rules, we need to send # If the join is being authorised via allow rules, we need to send
# the /send_join back to the same server that was originally used # the /send_join back to the same server that was originally used
# with /make_join. # with /make_join.
@ -827,7 +860,9 @@ class FederationClient(FederationBase):
get_domain_from_id(pdu.content["join_authorised_via_users_server"]) get_domain_from_id(pdu.content["join_authorised_via_users_server"])
] ]
return await self._try_destination_list("send_join", destinations, send_request) return await self._try_destination_list(
"send_join", destinations, send_request, failover_errcodes=failover_errcodes
)
async def _do_send_join( async def _do_send_join(
self, room_version: RoomVersion, destination: str, pdu: EventBase self, room_version: RoomVersion, destination: str, pdu: EventBase