From fde4ce22135b06d05b646141f90cdf3038ed4fe2 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 25 Sep 2019 11:32:41 +0100 Subject: [PATCH 01/85] Don't create new span for get_user_by_req We don't actually care about what happens in `get_user_by_req` and having it as a separate span means that the entity tag isn't added to the servlet spans, making it harder to search. --- synapse/api/auth.py | 1 - 1 file changed, 1 deletion(-) diff --git a/synapse/api/auth.py b/synapse/api/auth.py index 9e445cd808..59852bdbdb 100644 --- a/synapse/api/auth.py +++ b/synapse/api/auth.py @@ -179,7 +179,6 @@ class Auth(object): def get_public_keys(self, invite_event): return event_auth.get_public_keys(invite_event) - @opentracing.trace @defer.inlineCallbacks def get_user_by_req( self, request, allow_guest=False, rights="access", allow_expired=False From 5d99713854ee0672ba95d72ef13ce1cbcbc781c5 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 25 Sep 2019 11:39:15 +0100 Subject: [PATCH 02/85] Add tags for event_id and txn_id in event sending This will make it easier to search for sending event requests. --- synapse/rest/client/v1/room.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/synapse/rest/client/v1/room.py b/synapse/rest/client/v1/room.py index a6a7b3b57e..19f150af9d 100644 --- a/synapse/rest/client/v1/room.py +++ b/synapse/rest/client/v1/room.py @@ -44,6 +44,7 @@ from synapse.rest.client.v2_alpha._base import client_patterns from synapse.storage.state import StateFilter from synapse.streams.config import PaginationConfig from synapse.types import RoomAlias, RoomID, StreamToken, ThirdPartyInstanceID, UserID +from synapse.logging.opentracing import set_tag logger = logging.getLogger(__name__) @@ -81,6 +82,7 @@ class RoomCreateRestServlet(TransactionRestServlet): ) def on_PUT(self, request, txn_id): + set_tag("txn_id", txn_id) return self.txns.fetch_or_execute_request(request, self.on_POST, request) @defer.inlineCallbacks @@ -181,6 +183,9 @@ class RoomStateEventRestServlet(TransactionRestServlet): def on_PUT(self, request, room_id, event_type, state_key, txn_id=None): requester = yield self.auth.get_user_by_req(request) + if txn_id: + set_tag("txn_id", txn_id) + content = parse_json_object_from_request(request) event_dict = { @@ -209,6 +214,7 @@ class RoomStateEventRestServlet(TransactionRestServlet): ret = {} if event: + set_tag("event_id", event.event_id) ret = {"event_id": event.event_id} return 200, ret @@ -244,12 +250,15 @@ class RoomSendEventRestServlet(TransactionRestServlet): requester, event_dict, txn_id=txn_id ) + set_tag("event_id", event.event_id) return 200, {"event_id": event.event_id} def on_GET(self, request, room_id, event_type, txn_id): return 200, "Not implemented" def on_PUT(self, request, room_id, event_type, txn_id): + set_tag("txn_id", txn_id) + return self.txns.fetch_or_execute_request( request, self.on_POST, request, room_id, event_type, txn_id ) @@ -310,6 +319,8 @@ class JoinRoomAliasServlet(TransactionRestServlet): return 200, {"room_id": room_id} def on_PUT(self, request, room_identifier, txn_id): + set_tag("txn_id", txn_id) + return self.txns.fetch_or_execute_request( request, self.on_POST, request, room_identifier, txn_id ) @@ -655,6 +666,8 @@ class RoomForgetRestServlet(TransactionRestServlet): return 200, {} def on_PUT(self, request, room_id, txn_id): + set_tag("txn_id", txn_id) + return self.txns.fetch_or_execute_request( request, self.on_POST, request, room_id, txn_id ) @@ -738,6 +751,8 @@ class RoomMembershipRestServlet(TransactionRestServlet): return True def on_PUT(self, request, room_id, membership_action, txn_id): + set_tag("txn_id", txn_id) + return self.txns.fetch_or_execute_request( request, self.on_POST, request, room_id, membership_action, txn_id ) @@ -771,9 +786,12 @@ class RoomRedactEventRestServlet(TransactionRestServlet): txn_id=txn_id, ) + set_tag("event_id", event.event_id) return 200, {"event_id": event.event_id} def on_PUT(self, request, room_id, event_id, txn_id): + set_tag("txn_id", txn_id) + return self.txns.fetch_or_execute_request( request, self.on_POST, request, room_id, event_id, txn_id ) From 5c1af6d1b8ea8bad770fe8a70d9badb28dcfb9b9 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 25 Sep 2019 11:42:00 +0100 Subject: [PATCH 03/85] Newsfile --- changelog.d/6108.misc | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/6108.misc diff --git a/changelog.d/6108.misc b/changelog.d/6108.misc new file mode 100644 index 0000000000..6c3f9460e9 --- /dev/null +++ b/changelog.d/6108.misc @@ -0,0 +1 @@ +Remove `get_user_by_req` opentracing span and add some tags. From dc01cad690e3c6cb1ccb57995554dd93ab1636f2 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 25 Sep 2019 11:59:00 +0100 Subject: [PATCH 04/85] Add device and appservice tags --- synapse/api/auth.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/synapse/api/auth.py b/synapse/api/auth.py index 59852bdbdb..cb50579fd2 100644 --- a/synapse/api/auth.py +++ b/synapse/api/auth.py @@ -211,6 +211,7 @@ class Auth(object): if user_id: request.authenticated_entity = user_id opentracing.set_tag("authenticated_entity", user_id) + opentracing.set_tag("appservice_id", app_service.id) if ip_addr and self.hs.config.track_appservice_user_ips: yield self.store.insert_client_ip( @@ -262,6 +263,8 @@ class Auth(object): request.authenticated_entity = user.to_string() opentracing.set_tag("authenticated_entity", user.to_string()) + if device_id: + opentracing.set_tag("device_id", device_id) return synapse.types.create_requester( user, token_id, is_guest, device_id, app_service=app_service From dc2c97e1a36cc3c2f584223a0d8a3faa810471c0 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 25 Sep 2019 11:59:05 +0100 Subject: [PATCH 05/85] isort --- synapse/rest/client/v1/room.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/rest/client/v1/room.py b/synapse/rest/client/v1/room.py index 19f150af9d..6bf924dedc 100644 --- a/synapse/rest/client/v1/room.py +++ b/synapse/rest/client/v1/room.py @@ -39,12 +39,12 @@ from synapse.http.servlet import ( parse_json_object_from_request, parse_string, ) +from synapse.logging.opentracing import set_tag from synapse.rest.client.transactions import HttpTransactionCache from synapse.rest.client.v2_alpha._base import client_patterns from synapse.storage.state import StateFilter from synapse.streams.config import PaginationConfig from synapse.types import RoomAlias, RoomID, StreamToken, ThirdPartyInstanceID, UserID -from synapse.logging.opentracing import set_tag logger = logging.getLogger(__name__) From 54569c787b4abbc5674d9c23c012b56d8cc156ef Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Thu, 26 Sep 2019 15:38:25 +0100 Subject: [PATCH 06/85] Kill off half-implemented password-reset via sms (#6101) Doing a password reset via SMS has never worked, and in any case is a silly idea because msisdn recycling is a thing. See also matrix-org/matrix-doc#2303. --- changelog.d/6101.misc | 1 + synapse/rest/client/v2_alpha/account.py | 65 +------------------------ 2 files changed, 2 insertions(+), 64 deletions(-) create mode 100644 changelog.d/6101.misc diff --git a/changelog.d/6101.misc b/changelog.d/6101.misc new file mode 100644 index 0000000000..9743abb9e9 --- /dev/null +++ b/changelog.d/6101.misc @@ -0,0 +1 @@ +Kill off half-implemented password-reset via sms. diff --git a/synapse/rest/client/v2_alpha/account.py b/synapse/rest/client/v2_alpha/account.py index f99676fd30..80cf7126a0 100644 --- a/synapse/rest/client/v2_alpha/account.py +++ b/synapse/rest/client/v2_alpha/account.py @@ -129,66 +129,6 @@ class EmailPasswordRequestTokenRestServlet(RestServlet): return 200, ret -class MsisdnPasswordRequestTokenRestServlet(RestServlet): - PATTERNS = client_patterns("/account/password/msisdn/requestToken$") - - def __init__(self, hs): - super(MsisdnPasswordRequestTokenRestServlet, self).__init__() - self.hs = hs - self.datastore = self.hs.get_datastore() - self.identity_handler = hs.get_handlers().identity_handler - - @defer.inlineCallbacks - def on_POST(self, request): - body = parse_json_object_from_request(request) - - assert_params_in_dict( - body, ["client_secret", "country", "phone_number", "send_attempt"] - ) - client_secret = body["client_secret"] - country = body["country"] - phone_number = body["phone_number"] - send_attempt = body["send_attempt"] - next_link = body.get("next_link") # Optional param - - msisdn = phone_number_to_msisdn(country, phone_number) - - if not check_3pid_allowed(self.hs, "msisdn", msisdn): - raise SynapseError( - 403, - "Account phone numbers are not authorized on this server", - Codes.THREEPID_DENIED, - ) - - existing_user_id = yield self.datastore.get_user_id_by_threepid( - "msisdn", msisdn - ) - - if existing_user_id is None: - raise SynapseError(400, "MSISDN not found", Codes.THREEPID_NOT_FOUND) - - if not self.hs.config.account_threepid_delegate_msisdn: - logger.warn( - "No upstream msisdn account_threepid_delegate configured on the server to " - "handle this request" - ) - raise SynapseError( - 400, - "Password reset by phone number is not supported on this homeserver", - ) - - ret = yield self.identity_handler.requestMsisdnToken( - self.hs.config.account_threepid_delegate_msisdn, - country, - phone_number, - client_secret, - send_attempt, - next_link, - ) - - return 200, ret - - class PasswordResetSubmitTokenServlet(RestServlet): """Handles 3PID validation token submission""" @@ -301,9 +241,7 @@ class PasswordRestServlet(RestServlet): else: requester = None result, params, _ = yield self.auth_handler.check_auth( - [[LoginType.EMAIL_IDENTITY], [LoginType.MSISDN]], - body, - self.hs.get_ip_from_request(request), + [[LoginType.EMAIL_IDENTITY]], body, self.hs.get_ip_from_request(request) ) if LoginType.EMAIL_IDENTITY in result: @@ -843,7 +781,6 @@ class WhoamiRestServlet(RestServlet): def register_servlets(hs, http_server): EmailPasswordRequestTokenRestServlet(hs).register(http_server) - MsisdnPasswordRequestTokenRestServlet(hs).register(http_server) PasswordResetSubmitTokenServlet(hs).register(http_server) PasswordRestServlet(hs).register(http_server) DeactivateAccountRestServlet(hs).register(http_server) From 8c27bc8b60d4b78c059ea727a78e78dc8cd3df7a Mon Sep 17 00:00:00 2001 From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> Date: Fri, 27 Sep 2019 10:36:20 +0100 Subject: [PATCH 07/85] Move lookup-related functions from RoomMemberHandler to IdentityHandler (#5978) Just to have all the methods that make calls to identity services in one place. --- changelog.d/5978.misc | 1 + synapse/handlers/identity.py | 353 ++++++++++++++++++++++++++++++ synapse/handlers/room_member.py | 370 +------------------------------- 3 files changed, 360 insertions(+), 364 deletions(-) create mode 100644 changelog.d/5978.misc diff --git a/changelog.d/5978.misc b/changelog.d/5978.misc new file mode 100644 index 0000000000..6d2b69b11b --- /dev/null +++ b/changelog.d/5978.misc @@ -0,0 +1 @@ +Move lookup-related functions from RoomMemberHandler to IdentityHandler. \ No newline at end of file diff --git a/synapse/handlers/identity.py b/synapse/handlers/identity.py index 6d42a1aed8..ba99ddf76d 100644 --- a/synapse/handlers/identity.py +++ b/synapse/handlers/identity.py @@ -21,11 +21,15 @@ import logging import urllib from canonicaljson import json +from signedjson.key import decode_verify_key_bytes +from signedjson.sign import verify_signed_json +from unpaddedbase64 import decode_base64 from twisted.internet import defer from twisted.internet.error import TimeoutError from synapse.api.errors import ( + AuthError, CodeMessageException, Codes, HttpResponseException, @@ -33,12 +37,15 @@ from synapse.api.errors import ( ) from synapse.config.emailconfig import ThreepidBehaviour from synapse.http.client import SimpleHttpClient +from synapse.util.hash import sha256_and_url_safe_base64 from synapse.util.stringutils import random_string from ._base import BaseHandler logger = logging.getLogger(__name__) +id_server_scheme = "https://" + class IdentityHandler(BaseHandler): def __init__(self, hs): @@ -557,6 +564,352 @@ class IdentityHandler(BaseHandler): logger.warning("Error contacting msisdn account_threepid_delegate: %s", e) raise SynapseError(400, "Error contacting the identity server") + @defer.inlineCallbacks + def lookup_3pid(self, id_server, medium, address, id_access_token=None): + """Looks up a 3pid in the passed identity server. + + Args: + id_server (str): The server name (including port, if required) + of the identity server to use. + medium (str): The type of the third party identifier (e.g. "email"). + address (str): The third party identifier (e.g. "foo@example.com"). + id_access_token (str|None): The access token to authenticate to the identity + server with + + Returns: + str|None: the matrix ID of the 3pid, or None if it is not recognized. + """ + if id_access_token is not None: + try: + results = yield self._lookup_3pid_v2( + id_server, id_access_token, medium, address + ) + return results + + except Exception as e: + # Catch HttpResponseExcept for a non-200 response code + # Check if this identity server does not know about v2 lookups + if isinstance(e, HttpResponseException) and e.code == 404: + # This is an old identity server that does not yet support v2 lookups + logger.warning( + "Attempted v2 lookup on v1 identity server %s. Falling " + "back to v1", + id_server, + ) + else: + logger.warning("Error when looking up hashing details: %s", e) + return None + + return (yield self._lookup_3pid_v1(id_server, medium, address)) + + @defer.inlineCallbacks + def _lookup_3pid_v1(self, id_server, medium, address): + """Looks up a 3pid in the passed identity server using v1 lookup. + + Args: + id_server (str): The server name (including port, if required) + of the identity server to use. + medium (str): The type of the third party identifier (e.g. "email"). + address (str): The third party identifier (e.g. "foo@example.com"). + + Returns: + str: the matrix ID of the 3pid, or None if it is not recognized. + """ + try: + data = yield self.blacklisting_http_client.get_json( + "%s%s/_matrix/identity/api/v1/lookup" % (id_server_scheme, id_server), + {"medium": medium, "address": address}, + ) + + if "mxid" in data: + if "signatures" not in data: + raise AuthError(401, "No signatures on 3pid binding") + yield self._verify_any_signature(data, id_server) + return data["mxid"] + except TimeoutError: + raise SynapseError(500, "Timed out contacting identity server") + except IOError as e: + logger.warning("Error from v1 identity server lookup: %s" % (e,)) + + return None + + @defer.inlineCallbacks + def _lookup_3pid_v2(self, id_server, id_access_token, medium, address): + """Looks up a 3pid in the passed identity server using v2 lookup. + + Args: + id_server (str): The server name (including port, if required) + of the identity server to use. + id_access_token (str): The access token to authenticate to the identity server with + medium (str): The type of the third party identifier (e.g. "email"). + address (str): The third party identifier (e.g. "foo@example.com"). + + Returns: + Deferred[str|None]: the matrix ID of the 3pid, or None if it is not recognised. + """ + # Check what hashing details are supported by this identity server + try: + hash_details = yield self.blacklisting_http_client.get_json( + "%s%s/_matrix/identity/v2/hash_details" % (id_server_scheme, id_server), + {"access_token": id_access_token}, + ) + except TimeoutError: + raise SynapseError(500, "Timed out contacting identity server") + + if not isinstance(hash_details, dict): + logger.warning( + "Got non-dict object when checking hash details of %s%s: %s", + id_server_scheme, + id_server, + hash_details, + ) + raise SynapseError( + 400, + "Non-dict object from %s%s during v2 hash_details request: %s" + % (id_server_scheme, id_server, hash_details), + ) + + # Extract information from hash_details + supported_lookup_algorithms = hash_details.get("algorithms") + lookup_pepper = hash_details.get("lookup_pepper") + if ( + not supported_lookup_algorithms + or not isinstance(supported_lookup_algorithms, list) + or not lookup_pepper + or not isinstance(lookup_pepper, str) + ): + raise SynapseError( + 400, + "Invalid hash details received from identity server %s%s: %s" + % (id_server_scheme, id_server, hash_details), + ) + + # Check if any of the supported lookup algorithms are present + if LookupAlgorithm.SHA256 in supported_lookup_algorithms: + # Perform a hashed lookup + lookup_algorithm = LookupAlgorithm.SHA256 + + # Hash address, medium and the pepper with sha256 + to_hash = "%s %s %s" % (address, medium, lookup_pepper) + lookup_value = sha256_and_url_safe_base64(to_hash) + + elif LookupAlgorithm.NONE in supported_lookup_algorithms: + # Perform a non-hashed lookup + lookup_algorithm = LookupAlgorithm.NONE + + # Combine together plaintext address and medium + lookup_value = "%s %s" % (address, medium) + + else: + logger.warning( + "None of the provided lookup algorithms of %s are supported: %s", + id_server, + supported_lookup_algorithms, + ) + raise SynapseError( + 400, + "Provided identity server does not support any v2 lookup " + "algorithms that this homeserver supports.", + ) + + # Authenticate with identity server given the access token from the client + headers = {"Authorization": create_id_access_token_header(id_access_token)} + + try: + lookup_results = yield self.blacklisting_http_client.post_json_get_json( + "%s%s/_matrix/identity/v2/lookup" % (id_server_scheme, id_server), + { + "addresses": [lookup_value], + "algorithm": lookup_algorithm, + "pepper": lookup_pepper, + }, + headers=headers, + ) + except TimeoutError: + raise SynapseError(500, "Timed out contacting identity server") + except Exception as e: + logger.warning("Error when performing a v2 3pid lookup: %s", e) + raise SynapseError( + 500, "Unknown error occurred during identity server lookup" + ) + + # Check for a mapping from what we looked up to an MXID + if "mappings" not in lookup_results or not isinstance( + lookup_results["mappings"], dict + ): + logger.warning("No results from 3pid lookup") + return None + + # Return the MXID if it's available, or None otherwise + mxid = lookup_results["mappings"].get(lookup_value) + return mxid + + @defer.inlineCallbacks + def _verify_any_signature(self, data, server_hostname): + if server_hostname not in data["signatures"]: + raise AuthError(401, "No signature from server %s" % (server_hostname,)) + for key_name, signature in data["signatures"][server_hostname].items(): + try: + key_data = yield self.blacklisting_http_client.get_json( + "%s%s/_matrix/identity/api/v1/pubkey/%s" + % (id_server_scheme, server_hostname, key_name) + ) + except TimeoutError: + raise SynapseError(500, "Timed out contacting identity server") + if "public_key" not in key_data: + raise AuthError( + 401, "No public key named %s from %s" % (key_name, server_hostname) + ) + verify_signed_json( + data, + server_hostname, + decode_verify_key_bytes( + key_name, decode_base64(key_data["public_key"]) + ), + ) + return + + @defer.inlineCallbacks + def ask_id_server_for_third_party_invite( + self, + requester, + id_server, + medium, + address, + room_id, + inviter_user_id, + room_alias, + room_avatar_url, + room_join_rules, + room_name, + inviter_display_name, + inviter_avatar_url, + id_access_token=None, + ): + """ + Asks an identity server for a third party invite. + + Args: + requester (Requester) + id_server (str): hostname + optional port for the identity server. + medium (str): The literal string "email". + address (str): The third party address being invited. + room_id (str): The ID of the room to which the user is invited. + inviter_user_id (str): The user ID of the inviter. + room_alias (str): An alias for the room, for cosmetic notifications. + room_avatar_url (str): The URL of the room's avatar, for cosmetic + notifications. + room_join_rules (str): The join rules of the email (e.g. "public"). + room_name (str): The m.room.name of the room. + inviter_display_name (str): The current display name of the + inviter. + inviter_avatar_url (str): The URL of the inviter's avatar. + id_access_token (str|None): The access token to authenticate to the identity + server with + + Returns: + A deferred tuple containing: + token (str): The token which must be signed to prove authenticity. + public_keys ([{"public_key": str, "key_validity_url": str}]): + public_key is a base64-encoded ed25519 public key. + fallback_public_key: One element from public_keys. + display_name (str): A user-friendly name to represent the invited + user. + """ + invite_config = { + "medium": medium, + "address": address, + "room_id": room_id, + "room_alias": room_alias, + "room_avatar_url": room_avatar_url, + "room_join_rules": room_join_rules, + "room_name": room_name, + "sender": inviter_user_id, + "sender_display_name": inviter_display_name, + "sender_avatar_url": inviter_avatar_url, + } + + # Add the identity service access token to the JSON body and use the v2 + # Identity Service endpoints if id_access_token is present + data = None + base_url = "%s%s/_matrix/identity" % (id_server_scheme, id_server) + + if id_access_token: + key_validity_url = "%s%s/_matrix/identity/v2/pubkey/isvalid" % ( + id_server_scheme, + id_server, + ) + + # Attempt a v2 lookup + url = base_url + "/v2/store-invite" + try: + data = yield self.blacklisting_http_client.post_json_get_json( + url, + invite_config, + {"Authorization": create_id_access_token_header(id_access_token)}, + ) + except TimeoutError: + raise SynapseError(500, "Timed out contacting identity server") + except HttpResponseException as e: + if e.code != 404: + logger.info("Failed to POST %s with JSON: %s", url, e) + raise e + + if data is None: + key_validity_url = "%s%s/_matrix/identity/api/v1/pubkey/isvalid" % ( + id_server_scheme, + id_server, + ) + url = base_url + "/api/v1/store-invite" + + try: + data = yield self.blacklisting_http_client.post_json_get_json( + url, invite_config + ) + except TimeoutError: + raise SynapseError(500, "Timed out contacting identity server") + except HttpResponseException as e: + logger.warning( + "Error trying to call /store-invite on %s%s: %s", + id_server_scheme, + id_server, + e, + ) + + if data is None: + # Some identity servers may only support application/x-www-form-urlencoded + # types. This is especially true with old instances of Sydent, see + # https://github.com/matrix-org/sydent/pull/170 + try: + data = yield self.blacklisting_http_client.post_urlencoded_get_json( + url, invite_config + ) + except HttpResponseException as e: + logger.warning( + "Error calling /store-invite on %s%s with fallback " + "encoding: %s", + id_server_scheme, + id_server, + e, + ) + raise e + + # TODO: Check for success + token = data["token"] + public_keys = data.get("public_keys", []) + if "public_key" in data: + fallback_public_key = { + "public_key": data["public_key"], + "key_validity_url": key_validity_url, + } + else: + fallback_public_key = public_keys[0] + + if not public_keys: + public_keys.append(fallback_public_key) + display_name = data["display_name"] + return token, public_keys, fallback_public_key, display_name + def create_id_access_token_header(id_access_token): """Create an Authorization header for passing to SimpleHttpClient as the header value diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py index 94cd0cf3ef..8abdb1b6e6 100644 --- a/synapse/handlers/room_member.py +++ b/synapse/handlers/room_member.py @@ -20,29 +20,19 @@ import logging from six.moves import http_client -from signedjson.key import decode_verify_key_bytes -from signedjson.sign import verify_signed_json -from unpaddedbase64 import decode_base64 - from twisted.internet import defer -from twisted.internet.error import TimeoutError from synapse import types from synapse.api.constants import EventTypes, Membership -from synapse.api.errors import AuthError, Codes, HttpResponseException, SynapseError -from synapse.handlers.identity import LookupAlgorithm, create_id_access_token_header -from synapse.http.client import SimpleHttpClient +from synapse.api.errors import AuthError, Codes, SynapseError from synapse.types import RoomID, UserID from synapse.util.async_helpers import Linearizer from synapse.util.distributor import user_joined_room, user_left_room -from synapse.util.hash import sha256_and_url_safe_base64 from ._base import BaseHandler logger = logging.getLogger(__name__) -id_server_scheme = "https://" - class RoomMemberHandler(object): # TODO(paul): This handler currently contains a messy conflation of @@ -63,14 +53,10 @@ class RoomMemberHandler(object): self.auth = hs.get_auth() self.state_handler = hs.get_state_handler() self.config = hs.config - # We create a blacklisting instance of SimpleHttpClient for contacting identity - # servers specified by clients - self.simple_http_client = SimpleHttpClient( - hs, ip_blacklist=hs.config.federation_ip_range_blacklist - ) self.federation_handler = hs.get_handlers().federation_handler self.directory_handler = hs.get_handlers().directory_handler + self.identity_handler = hs.get_handlers().identity_handler self.registration_handler = hs.get_registration_handler() self.profile_handler = hs.get_profile_handler() self.event_creation_handler = hs.get_event_creation_handler() @@ -682,7 +668,9 @@ class RoomMemberHandler(object): 403, "Looking up third-party identifiers is denied from this server" ) - invitee = yield self._lookup_3pid(id_server, medium, address, id_access_token) + invitee = yield self.identity_handler.lookup_3pid( + id_server, medium, address, id_access_token + ) if invitee: yield self.update_membership( @@ -700,211 +688,6 @@ class RoomMemberHandler(object): id_access_token=id_access_token, ) - @defer.inlineCallbacks - def _lookup_3pid(self, id_server, medium, address, id_access_token=None): - """Looks up a 3pid in the passed identity server. - - Args: - id_server (str): The server name (including port, if required) - of the identity server to use. - medium (str): The type of the third party identifier (e.g. "email"). - address (str): The third party identifier (e.g. "foo@example.com"). - id_access_token (str|None): The access token to authenticate to the identity - server with - - Returns: - str|None: the matrix ID of the 3pid, or None if it is not recognized. - """ - if id_access_token is not None: - try: - results = yield self._lookup_3pid_v2( - id_server, id_access_token, medium, address - ) - return results - - except Exception as e: - # Catch HttpResponseExcept for a non-200 response code - # Check if this identity server does not know about v2 lookups - if isinstance(e, HttpResponseException) and e.code == 404: - # This is an old identity server that does not yet support v2 lookups - logger.warning( - "Attempted v2 lookup on v1 identity server %s. Falling " - "back to v1", - id_server, - ) - else: - logger.warning("Error when looking up hashing details: %s", e) - return None - - return (yield self._lookup_3pid_v1(id_server, medium, address)) - - @defer.inlineCallbacks - def _lookup_3pid_v1(self, id_server, medium, address): - """Looks up a 3pid in the passed identity server using v1 lookup. - - Args: - id_server (str): The server name (including port, if required) - of the identity server to use. - medium (str): The type of the third party identifier (e.g. "email"). - address (str): The third party identifier (e.g. "foo@example.com"). - - Returns: - str: the matrix ID of the 3pid, or None if it is not recognized. - """ - try: - data = yield self.simple_http_client.get_json( - "%s%s/_matrix/identity/api/v1/lookup" % (id_server_scheme, id_server), - {"medium": medium, "address": address}, - ) - - if "mxid" in data: - if "signatures" not in data: - raise AuthError(401, "No signatures on 3pid binding") - yield self._verify_any_signature(data, id_server) - return data["mxid"] - except TimeoutError: - raise SynapseError(500, "Timed out contacting identity server") - except IOError as e: - logger.warning("Error from v1 identity server lookup: %s" % (e,)) - - return None - - @defer.inlineCallbacks - def _lookup_3pid_v2(self, id_server, id_access_token, medium, address): - """Looks up a 3pid in the passed identity server using v2 lookup. - - Args: - id_server (str): The server name (including port, if required) - of the identity server to use. - id_access_token (str): The access token to authenticate to the identity server with - medium (str): The type of the third party identifier (e.g. "email"). - address (str): The third party identifier (e.g. "foo@example.com"). - - Returns: - Deferred[str|None]: the matrix ID of the 3pid, or None if it is not recognised. - """ - # Check what hashing details are supported by this identity server - try: - hash_details = yield self.simple_http_client.get_json( - "%s%s/_matrix/identity/v2/hash_details" % (id_server_scheme, id_server), - {"access_token": id_access_token}, - ) - except TimeoutError: - raise SynapseError(500, "Timed out contacting identity server") - - if not isinstance(hash_details, dict): - logger.warning( - "Got non-dict object when checking hash details of %s%s: %s", - id_server_scheme, - id_server, - hash_details, - ) - raise SynapseError( - 400, - "Non-dict object from %s%s during v2 hash_details request: %s" - % (id_server_scheme, id_server, hash_details), - ) - - # Extract information from hash_details - supported_lookup_algorithms = hash_details.get("algorithms") - lookup_pepper = hash_details.get("lookup_pepper") - if ( - not supported_lookup_algorithms - or not isinstance(supported_lookup_algorithms, list) - or not lookup_pepper - or not isinstance(lookup_pepper, str) - ): - raise SynapseError( - 400, - "Invalid hash details received from identity server %s%s: %s" - % (id_server_scheme, id_server, hash_details), - ) - - # Check if any of the supported lookup algorithms are present - if LookupAlgorithm.SHA256 in supported_lookup_algorithms: - # Perform a hashed lookup - lookup_algorithm = LookupAlgorithm.SHA256 - - # Hash address, medium and the pepper with sha256 - to_hash = "%s %s %s" % (address, medium, lookup_pepper) - lookup_value = sha256_and_url_safe_base64(to_hash) - - elif LookupAlgorithm.NONE in supported_lookup_algorithms: - # Perform a non-hashed lookup - lookup_algorithm = LookupAlgorithm.NONE - - # Combine together plaintext address and medium - lookup_value = "%s %s" % (address, medium) - - else: - logger.warning( - "None of the provided lookup algorithms of %s are supported: %s", - id_server, - supported_lookup_algorithms, - ) - raise SynapseError( - 400, - "Provided identity server does not support any v2 lookup " - "algorithms that this homeserver supports.", - ) - - # Authenticate with identity server given the access token from the client - headers = {"Authorization": create_id_access_token_header(id_access_token)} - - try: - lookup_results = yield self.simple_http_client.post_json_get_json( - "%s%s/_matrix/identity/v2/lookup" % (id_server_scheme, id_server), - { - "addresses": [lookup_value], - "algorithm": lookup_algorithm, - "pepper": lookup_pepper, - }, - headers=headers, - ) - except TimeoutError: - raise SynapseError(500, "Timed out contacting identity server") - except Exception as e: - logger.warning("Error when performing a v2 3pid lookup: %s", e) - raise SynapseError( - 500, "Unknown error occurred during identity server lookup" - ) - - # Check for a mapping from what we looked up to an MXID - if "mappings" not in lookup_results or not isinstance( - lookup_results["mappings"], dict - ): - logger.warning("No results from 3pid lookup") - return None - - # Return the MXID if it's available, or None otherwise - mxid = lookup_results["mappings"].get(lookup_value) - return mxid - - @defer.inlineCallbacks - def _verify_any_signature(self, data, server_hostname): - if server_hostname not in data["signatures"]: - raise AuthError(401, "No signature from server %s" % (server_hostname,)) - for key_name, signature in data["signatures"][server_hostname].items(): - try: - key_data = yield self.simple_http_client.get_json( - "%s%s/_matrix/identity/api/v1/pubkey/%s" - % (id_server_scheme, server_hostname, key_name) - ) - except TimeoutError: - raise SynapseError(500, "Timed out contacting identity server") - if "public_key" not in key_data: - raise AuthError( - 401, "No public key named %s from %s" % (key_name, server_hostname) - ) - verify_signed_json( - data, - server_hostname, - decode_verify_key_bytes( - key_name, decode_base64(key_data["public_key"]) - ), - ) - return - @defer.inlineCallbacks def _make_and_store_3pid_invite( self, @@ -951,7 +734,7 @@ class RoomMemberHandler(object): room_avatar_url = room_avatar_event.content.get("url", "") token, public_keys, fallback_public_key, display_name = ( - yield self._ask_id_server_for_third_party_invite( + yield self.identity_handler.ask_id_server_for_third_party_invite( requester=requester, id_server=id_server, medium=medium, @@ -987,147 +770,6 @@ class RoomMemberHandler(object): txn_id=txn_id, ) - @defer.inlineCallbacks - def _ask_id_server_for_third_party_invite( - self, - requester, - id_server, - medium, - address, - room_id, - inviter_user_id, - room_alias, - room_avatar_url, - room_join_rules, - room_name, - inviter_display_name, - inviter_avatar_url, - id_access_token=None, - ): - """ - Asks an identity server for a third party invite. - - Args: - requester (Requester) - id_server (str): hostname + optional port for the identity server. - medium (str): The literal string "email". - address (str): The third party address being invited. - room_id (str): The ID of the room to which the user is invited. - inviter_user_id (str): The user ID of the inviter. - room_alias (str): An alias for the room, for cosmetic notifications. - room_avatar_url (str): The URL of the room's avatar, for cosmetic - notifications. - room_join_rules (str): The join rules of the email (e.g. "public"). - room_name (str): The m.room.name of the room. - inviter_display_name (str): The current display name of the - inviter. - inviter_avatar_url (str): The URL of the inviter's avatar. - id_access_token (str|None): The access token to authenticate to the identity - server with - - Returns: - A deferred tuple containing: - token (str): The token which must be signed to prove authenticity. - public_keys ([{"public_key": str, "key_validity_url": str}]): - public_key is a base64-encoded ed25519 public key. - fallback_public_key: One element from public_keys. - display_name (str): A user-friendly name to represent the invited - user. - """ - invite_config = { - "medium": medium, - "address": address, - "room_id": room_id, - "room_alias": room_alias, - "room_avatar_url": room_avatar_url, - "room_join_rules": room_join_rules, - "room_name": room_name, - "sender": inviter_user_id, - "sender_display_name": inviter_display_name, - "sender_avatar_url": inviter_avatar_url, - } - - # Add the identity service access token to the JSON body and use the v2 - # Identity Service endpoints if id_access_token is present - data = None - base_url = "%s%s/_matrix/identity" % (id_server_scheme, id_server) - - if id_access_token: - key_validity_url = "%s%s/_matrix/identity/v2/pubkey/isvalid" % ( - id_server_scheme, - id_server, - ) - - # Attempt a v2 lookup - url = base_url + "/v2/store-invite" - try: - data = yield self.simple_http_client.post_json_get_json( - url, - invite_config, - {"Authorization": create_id_access_token_header(id_access_token)}, - ) - except TimeoutError: - raise SynapseError(500, "Timed out contacting identity server") - except HttpResponseException as e: - if e.code != 404: - logger.info("Failed to POST %s with JSON: %s", url, e) - raise e - - if data is None: - key_validity_url = "%s%s/_matrix/identity/api/v1/pubkey/isvalid" % ( - id_server_scheme, - id_server, - ) - url = base_url + "/api/v1/store-invite" - - try: - data = yield self.simple_http_client.post_json_get_json( - url, invite_config - ) - except TimeoutError: - raise SynapseError(500, "Timed out contacting identity server") - except HttpResponseException as e: - logger.warning( - "Error trying to call /store-invite on %s%s: %s", - id_server_scheme, - id_server, - e, - ) - - if data is None: - # Some identity servers may only support application/x-www-form-urlencoded - # types. This is especially true with old instances of Sydent, see - # https://github.com/matrix-org/sydent/pull/170 - try: - data = yield self.simple_http_client.post_urlencoded_get_json( - url, invite_config - ) - except HttpResponseException as e: - logger.warning( - "Error calling /store-invite on %s%s with fallback " - "encoding: %s", - id_server_scheme, - id_server, - e, - ) - raise e - - # TODO: Check for success - token = data["token"] - public_keys = data.get("public_keys", []) - if "public_key" in data: - fallback_public_key = { - "public_key": data["public_key"], - "key_validity_url": key_validity_url, - } - else: - fallback_public_key = public_keys[0] - - if not public_keys: - public_keys.append(fallback_public_key) - display_name = data["display_name"] - return token, public_keys, fallback_public_key, display_name - @defer.inlineCallbacks def _is_host_in_room(self, current_state_ids): # Have we just created the room, and is this about to be the very From 5257a2fb1c983158bbdee8be4e61066f1a83d4a8 Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Fri, 27 Sep 2019 14:49:53 +0100 Subject: [PATCH 08/85] Reject pending invites on deactivation --- synapse/handlers/deactivate_account.py | 31 ++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/synapse/handlers/deactivate_account.py b/synapse/handlers/deactivate_account.py index d83912c9a4..9815365f54 100644 --- a/synapse/handlers/deactivate_account.py +++ b/synapse/handlers/deactivate_account.py @@ -120,6 +120,10 @@ class DeactivateAccountHandler(BaseHandler): # parts users from rooms (if it isn't already running) self._start_user_parting() + # Reject all pending invites for the user, so that it doesn't show up in the + # invitees list of rooms. + self._reject_pending_invites_for_user(user_id) + # Remove all information on the user from the account_validity table. if self._account_validity_enabled: yield self.store.delete_account_validity_for_user(user_id) @@ -129,6 +133,33 @@ class DeactivateAccountHandler(BaseHandler): return identity_server_supports_unbinding + def _reject_pending_invites_for_user(self, user_id): + """Reject pending invites addressed to a given user ID. + + Args: + user_id (str): The user ID to reject pending invites for. + """ + user = UserID.from_string(user_id) + pending_invites = yield self.store.get_invited_rooms_for_user(user_id) + + for room in pending_invites: + try: + yield self._room_member_handler.update_membership( + create_requester(user), + user, + room.room_id, + "leave", + ratelimit=False, + require_consent=False, + ) + except Exception: + logger.exception( + "Failed to reject invite for user %r in room %r:" + " ignoring and continuing", + user_id, + room.room_id, + ) + def _start_user_parting(self): """ Start the process that goes through the table of users From 72a2708ac6335985eb5171f5685f73d2ea120a2e Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Fri, 27 Sep 2019 15:13:39 +0100 Subject: [PATCH 09/85] Fixup and add some logging --- synapse/handlers/deactivate_account.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/synapse/handlers/deactivate_account.py b/synapse/handlers/deactivate_account.py index 9815365f54..763fea3a24 100644 --- a/synapse/handlers/deactivate_account.py +++ b/synapse/handlers/deactivate_account.py @@ -122,7 +122,7 @@ class DeactivateAccountHandler(BaseHandler): # Reject all pending invites for the user, so that it doesn't show up in the # invitees list of rooms. - self._reject_pending_invites_for_user(user_id) + yield self._reject_pending_invites_for_user(user_id) # Remove all information on the user from the account_validity table. if self._account_validity_enabled: @@ -133,6 +133,7 @@ class DeactivateAccountHandler(BaseHandler): return identity_server_supports_unbinding + @defer.inlineCallbacks def _reject_pending_invites_for_user(self, user_id): """Reject pending invites addressed to a given user ID. @@ -142,6 +143,8 @@ class DeactivateAccountHandler(BaseHandler): user = UserID.from_string(user_id) pending_invites = yield self.store.get_invited_rooms_for_user(user_id) + logger.info(pending_invites) + for room in pending_invites: try: yield self._room_member_handler.update_membership( @@ -152,6 +155,11 @@ class DeactivateAccountHandler(BaseHandler): ratelimit=False, require_consent=False, ) + logger.info( + "Rejected invite for user %r in room %r", + user_id, + room.room_id, + ) except Exception: logger.exception( "Failed to reject invite for user %r in room %r:" From e94ff67903c3370fc5bc8b6c336433057e38ff05 Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Fri, 27 Sep 2019 15:14:02 +0100 Subject: [PATCH 10/85] Add test to validate the change --- tests/rest/client/v2_alpha/test_account.py | 70 ++++++++++++++++++---- 1 file changed, 57 insertions(+), 13 deletions(-) diff --git a/tests/rest/client/v2_alpha/test_account.py b/tests/rest/client/v2_alpha/test_account.py index 920de41de4..69c33dfd8a 100644 --- a/tests/rest/client/v2_alpha/test_account.py +++ b/tests/rest/client/v2_alpha/test_account.py @@ -23,8 +23,8 @@ from email.parser import Parser import pkg_resources import synapse.rest.admin -from synapse.api.constants import LoginType -from synapse.rest.client.v1 import login +from synapse.api.constants import LoginType, Membership +from synapse.rest.client.v1 import login, room from synapse.rest.client.v2_alpha import account, register from tests import unittest @@ -244,16 +244,69 @@ class DeactivateTestCase(unittest.HomeserverTestCase): synapse.rest.admin.register_servlets_for_client_rest_resource, login.register_servlets, account.register_servlets, + room.register_servlets, ] def make_homeserver(self, reactor, clock): - hs = self.setup_test_homeserver() - return hs + self.hs = self.setup_test_homeserver() + return self.hs def test_deactivate_account(self): user_id = self.register_user("kermit", "test") tok = self.login("kermit", "test") + self.deactivate(user_id, tok) + + store = self.hs.get_datastore() + + # Check that the user has been marked as deactivated. + self.assertTrue(self.get_success(store.get_user_deactivated_status(user_id))) + + # Check that this access token has been invalidated. + request, channel = self.make_request("GET", "account/whoami") + self.render(request) + self.assertEqual(request.code, 401) + + @unittest.INFO + def test_pending_invites(self): + """Tests that deactivating a user rejects every pending invite for them.""" + store = self.hs.get_datastore() + + inviter_id = self.register_user("inviter", "test") + inviter_tok = self.login("inviter", "test") + + invitee_id = self.register_user("invitee", "test") + invitee_tok = self.login("invitee", "test") + + # Make @inviter:test invite @invitee:test in a new room. + room_id = self.helper.create_room_as(inviter_id, tok=inviter_tok) + self.helper.invite( + room=room_id, + src=inviter_id, + targ=invitee_id, + tok=inviter_tok, + ) + + # Make sure the invite is here. + pending_invites = self.get_success(store.get_invited_rooms_for_user(invitee_id)) + self.assertEqual(len(pending_invites), 1, pending_invites) + self.assertEqual(pending_invites[0].room_id, room_id, pending_invites) + + # Deactivate @invitee:test. + self.deactivate(invitee_id, invitee_tok) + + # Check that the invite isn't there anymore. + pending_invites = self.get_success(store.get_invited_rooms_for_user(invitee_id)) + self.assertEqual(len(pending_invites), 0, pending_invites) + + # Check that the membership of @invitee:test in the room is now "leave". + memberships = self.get_success( + store.get_rooms_for_user_where_membership_is(invitee_id, [Membership.LEAVE]) + ) + self.assertEqual(len(memberships), 1, memberships) + self.assertEqual(memberships[0].room_id, room_id, memberships) + + def deactivate(self, user_id, tok): request_data = json.dumps( { "auth": { @@ -270,12 +323,3 @@ class DeactivateTestCase(unittest.HomeserverTestCase): self.render(request) self.assertEqual(request.code, 200) - store = self.hs.get_datastore() - - # Check that the user has been marked as deactivated. - self.assertTrue(self.get_success(store.get_user_deactivated_status(user_id))) - - # Check that this access token has been invalidated. - request, channel = self.make_request("GET", "account/whoami") - self.render(request) - self.assertEqual(request.code, 401) From 0804a27c8c7c2cc9f0adbb0329bffcd8ce10e1bd Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Fri, 27 Sep 2019 15:14:34 +0100 Subject: [PATCH 11/85] Changelog --- changelog.d/6125.feature | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/6125.feature diff --git a/changelog.d/6125.feature b/changelog.d/6125.feature new file mode 100644 index 0000000000..432e255ad4 --- /dev/null +++ b/changelog.d/6125.feature @@ -0,0 +1 @@ +Reject all pending invite for a user during deactivation. From 873fe7883cf0d7cf5346a9a55d40967a35848e33 Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Fri, 27 Sep 2019 15:21:03 +0100 Subject: [PATCH 12/85] Lint --- synapse/handlers/deactivate_account.py | 4 +--- tests/rest/client/v2_alpha/test_account.py | 8 +------- 2 files changed, 2 insertions(+), 10 deletions(-) diff --git a/synapse/handlers/deactivate_account.py b/synapse/handlers/deactivate_account.py index 763fea3a24..148d1424ca 100644 --- a/synapse/handlers/deactivate_account.py +++ b/synapse/handlers/deactivate_account.py @@ -156,9 +156,7 @@ class DeactivateAccountHandler(BaseHandler): require_consent=False, ) logger.info( - "Rejected invite for user %r in room %r", - user_id, - room.room_id, + "Rejected invite for user %r in room %r", user_id, room.room_id ) except Exception: logger.exception( diff --git a/tests/rest/client/v2_alpha/test_account.py b/tests/rest/client/v2_alpha/test_account.py index 69c33dfd8a..434b730faf 100644 --- a/tests/rest/client/v2_alpha/test_account.py +++ b/tests/rest/client/v2_alpha/test_account.py @@ -280,12 +280,7 @@ class DeactivateTestCase(unittest.HomeserverTestCase): # Make @inviter:test invite @invitee:test in a new room. room_id = self.helper.create_room_as(inviter_id, tok=inviter_tok) - self.helper.invite( - room=room_id, - src=inviter_id, - targ=invitee_id, - tok=inviter_tok, - ) + self.helper.invite(room=room_id, src=inviter_id, targ=invitee_id, tok=inviter_tok) # Make sure the invite is here. pending_invites = self.get_success(store.get_invited_rooms_for_user(invitee_id)) @@ -322,4 +317,3 @@ class DeactivateTestCase(unittest.HomeserverTestCase): ) self.render(request) self.assertEqual(request.code, 200) - From fbb8ff3088abab48bd5815a1acaeb9243ada7431 Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Fri, 27 Sep 2019 15:23:07 +0100 Subject: [PATCH 13/85] ok --- tests/rest/client/v2_alpha/test_account.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/rest/client/v2_alpha/test_account.py b/tests/rest/client/v2_alpha/test_account.py index 434b730faf..0f51895b81 100644 --- a/tests/rest/client/v2_alpha/test_account.py +++ b/tests/rest/client/v2_alpha/test_account.py @@ -280,7 +280,9 @@ class DeactivateTestCase(unittest.HomeserverTestCase): # Make @inviter:test invite @invitee:test in a new room. room_id = self.helper.create_room_as(inviter_id, tok=inviter_tok) - self.helper.invite(room=room_id, src=inviter_id, targ=invitee_id, tok=inviter_tok) + self.helper.invite( + room=room_id, src=inviter_id, targ=invitee_id, tok=inviter_tok + ) # Make sure the invite is here. pending_invites = self.get_success(store.get_invited_rooms_for_user(invitee_id)) From 25a0a36ad9b63aa2becabc5c311025cb612d466f Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Fri, 27 Sep 2019 16:10:24 +0100 Subject: [PATCH 14/85] Update changelog.d/6125.feature Co-Authored-By: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> --- changelog.d/6125.feature | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/changelog.d/6125.feature b/changelog.d/6125.feature index 432e255ad4..cbe5f8d3c8 100644 --- a/changelog.d/6125.feature +++ b/changelog.d/6125.feature @@ -1 +1 @@ -Reject all pending invite for a user during deactivation. +Reject all pending invites for a user during deactivation. From bbe2a0f33916d7b01179c56b230307c46843625a Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Fri, 27 Sep 2019 16:10:36 +0100 Subject: [PATCH 15/85] Update synapse/handlers/deactivate_account.py Co-Authored-By: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> --- synapse/handlers/deactivate_account.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/handlers/deactivate_account.py b/synapse/handlers/deactivate_account.py index 148d1424ca..5cf01479db 100644 --- a/synapse/handlers/deactivate_account.py +++ b/synapse/handlers/deactivate_account.py @@ -120,7 +120,7 @@ class DeactivateAccountHandler(BaseHandler): # parts users from rooms (if it isn't already running) self._start_user_parting() - # Reject all pending invites for the user, so that it doesn't show up in the + # Reject all pending invites for the user, so that they do not show up in the # invitees list of rooms. yield self._reject_pending_invites_for_user(user_id) From af92110c465ea7cf4d04e1193b58f16ae26a75d6 Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Fri, 27 Sep 2019 16:12:15 +0100 Subject: [PATCH 16/85] Update synapse/handlers/deactivate_account.py Co-Authored-By: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> --- synapse/handlers/deactivate_account.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/synapse/handlers/deactivate_account.py b/synapse/handlers/deactivate_account.py index 5cf01479db..5f142f82c2 100644 --- a/synapse/handlers/deactivate_account.py +++ b/synapse/handlers/deactivate_account.py @@ -156,7 +156,9 @@ class DeactivateAccountHandler(BaseHandler): require_consent=False, ) logger.info( - "Rejected invite for user %r in room %r", user_id, room.room_id + "Rejected invite for deactivated user %r in room %r", + user_id, + room.room_id, ) except Exception: logger.exception( From 3e42d47a5a06ea5d353b75a42040107bf401d8ba Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Fri, 27 Sep 2019 16:15:01 +0100 Subject: [PATCH 17/85] Incorporate review --- synapse/handlers/deactivate_account.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/synapse/handlers/deactivate_account.py b/synapse/handlers/deactivate_account.py index 5f142f82c2..63267a0a4c 100644 --- a/synapse/handlers/deactivate_account.py +++ b/synapse/handlers/deactivate_account.py @@ -120,8 +120,8 @@ class DeactivateAccountHandler(BaseHandler): # parts users from rooms (if it isn't already running) self._start_user_parting() - # Reject all pending invites for the user, so that they do not show up in the - # invitees list of rooms. + # Reject all pending invites for the user, so that the user doesn't show up in the + # "invited" section of rooms' members list. yield self._reject_pending_invites_for_user(user_id) # Remove all information on the user from the account_validity table. @@ -143,8 +143,6 @@ class DeactivateAccountHandler(BaseHandler): user = UserID.from_string(user_id) pending_invites = yield self.store.get_invited_rooms_for_user(user_id) - logger.info(pending_invites) - for room in pending_invites: try: yield self._room_member_handler.update_membership( From f3451118a6dca1499daadf224c3eab801dad0c0c Mon Sep 17 00:00:00 2001 From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> Date: Fri, 27 Sep 2019 17:59:18 +0100 Subject: [PATCH 18/85] Edit SimpleHttpClient to reference that header keys can be passed as str or bytes (#6077) --- changelog.d/6077.misc | 1 + synapse/http/client.py | 12 ++++++------ 2 files changed, 7 insertions(+), 6 deletions(-) create mode 100644 changelog.d/6077.misc diff --git a/changelog.d/6077.misc b/changelog.d/6077.misc new file mode 100644 index 0000000000..31ac5b97a4 --- /dev/null +++ b/changelog.d/6077.misc @@ -0,0 +1 @@ +Edit header dicts docstrings in SimpleHttpClient to note that `str` or `bytes` can be passed as header keys. diff --git a/synapse/http/client.py b/synapse/http/client.py index 51765ae3c0..cdf828a4ff 100644 --- a/synapse/http/client.py +++ b/synapse/http/client.py @@ -327,7 +327,7 @@ class SimpleHttpClient(object): Args: uri (str): args (dict[str, str|List[str]]): query params - headers (dict[str, List[str]]|None): If not None, a map from + headers (dict[str|bytes, List[str|bytes]]|None): If not None, a map from header name to a list of values for that header Returns: @@ -371,7 +371,7 @@ class SimpleHttpClient(object): Args: uri (str): post_json (object): - headers (dict[str, List[str]]|None): If not None, a map from + headers (dict[str|bytes, List[str|bytes]]|None): If not None, a map from header name to a list of values for that header Returns: @@ -414,7 +414,7 @@ class SimpleHttpClient(object): None. **Note**: The value of each key is assumed to be an iterable and *not* a string. - headers (dict[str, List[str]]|None): If not None, a map from + headers (dict[str|bytes, List[str|bytes]]|None): If not None, a map from header name to a list of values for that header Returns: Deferred: Succeeds when we get *any* 2xx HTTP response, with the @@ -438,7 +438,7 @@ class SimpleHttpClient(object): None. **Note**: The value of each key is assumed to be an iterable and *not* a string. - headers (dict[str, List[str]]|None): If not None, a map from + headers (dict[str|bytes, List[str|bytes]]|None): If not None, a map from header name to a list of values for that header Returns: Deferred: Succeeds when we get *any* 2xx HTTP response, with the @@ -482,7 +482,7 @@ class SimpleHttpClient(object): None. **Note**: The value of each key is assumed to be an iterable and *not* a string. - headers (dict[str, List[str]]|None): If not None, a map from + headers (dict[str|bytes, List[str|bytes]]|None): If not None, a map from header name to a list of values for that header Returns: Deferred: Succeeds when we get *any* 2xx HTTP response, with the @@ -516,7 +516,7 @@ class SimpleHttpClient(object): Args: url (str): The URL to GET output_stream (file): File to write the response body to. - headers (dict[str, List[str]]|None): If not None, a map from + headers (dict[str|bytes, List[str|bytes]]|None): If not None, a map from header name to a list of values for that header Returns: A (int,dict,string,int) tuple of the file length, dict of the response From 16cb9a71b8b46604d49944f0b9c316687becca93 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Mon, 30 Sep 2019 09:38:41 +0100 Subject: [PATCH 19/85] Drop unused tables (#6115) These tables are unused since #5893 (as amended by #6047), so we can now drop them. Fixes #6048. --- changelog.d/6115.misc | 1 + .../delta/56/drop_unused_event_tables.sql | 20 +++++++++++++++++++ 2 files changed, 21 insertions(+) create mode 100644 changelog.d/6115.misc create mode 100644 synapse/storage/schema/delta/56/drop_unused_event_tables.sql diff --git a/changelog.d/6115.misc b/changelog.d/6115.misc new file mode 100644 index 0000000000..b19e395a99 --- /dev/null +++ b/changelog.d/6115.misc @@ -0,0 +1 @@ +Drop some unused database tables. diff --git a/synapse/storage/schema/delta/56/drop_unused_event_tables.sql b/synapse/storage/schema/delta/56/drop_unused_event_tables.sql new file mode 100644 index 0000000000..9f09922c67 --- /dev/null +++ b/synapse/storage/schema/delta/56/drop_unused_event_tables.sql @@ -0,0 +1,20 @@ +/* Copyright 2019 The Matrix.org Foundation C.I.C. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +-- these tables are never used. +DROP TABLE IF EXISTS room_names; +DROP TABLE IF EXISTS topics; +DROP TABLE IF EXISTS history_visibility; +DROP TABLE IF EXISTS guest_access; From 5705ecaec6b7a85c152691c79a4fa3526792a3eb Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Wed, 2 Oct 2019 11:16:38 +0100 Subject: [PATCH 20/85] Don't 500 code when trying to exchange a revoked 3PID invite While this is not documented in the spec (but should be), Riot (and other clients) revoke 3PID invites by sending a m.room.third_party_invite event with an empty ({}) content to the room's state. When the invited 3PID gets associated with a MXID, the identity server (which doesn't know about revocations) sends down to the MXID's homeserver all of the undelivered invites it has for this 3PID. The homeserver then tries to talk to the inviting homeserver in order to exchange these invite for m.room.member events. When one of the invite is revoked, the inviting homeserver responds with a 500 error because it tries to extract a 'display_name' property from the content, which is empty. This might cause the invited server to consider that the server is down and not try to exchange other, valid invites (or at least delay it). This fix handles the case of revoked invites by avoiding trying to fetch a 'display_name' from the original invite's content, and letting the m.room.member event fail the auth rules (because, since the original invite's content is empty, it doesn't have public keys), which results in sending a 403 with the correct error message to the invited server. --- synapse/handlers/federation.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index f72b81d419..a3d7739ead 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -2599,8 +2599,19 @@ class FederationHandler(BaseHandler): original_invite_id, allow_none=True ) if original_invite: - display_name = original_invite.content["display_name"] - event_dict["content"]["third_party_invite"]["display_name"] = display_name + # If the m.room.third_party_invite event's content is empty, it means the + # invite has been revoked. + if original_invite.content: + display_name = original_invite.content["display_name"] + event_dict["content"]["third_party_invite"]["display_name"] = display_name + else: + # Don't discard or raise an error here because that's not the right place + # to do auth checks. The auth check will fail on this invite because we + # won't be able to fetch public keys from the m.room.third_party_invite + # event's content (because it's empty). + logger.info( + "Found invite event for third_party_invite but it has been revoked" + ) else: logger.info( "Could not find invite event for third_party_invite: %r", event_dict From d69fd53f74f693e0ec756eec479f3d51d93fd2aa Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 2 Oct 2019 11:21:52 +0100 Subject: [PATCH 21/85] Bound find_next_generated_user_id DB query. We can easily bound the set of user IDs we pull out of the DB, so lets do that. --- synapse/storage/registration.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/synapse/storage/registration.py b/synapse/storage/registration.py index 241a7be51e..1a859352b6 100644 --- a/synapse/storage/registration.py +++ b/synapse/storage/registration.py @@ -493,7 +493,9 @@ class RegistrationWorkerStore(SQLBaseStore): """ def _find_next_generated_user_id(txn): - txn.execute("SELECT name FROM users") + # We bound between '@1' and '@a' to avoid pulling the entire table + # out. + txn.execute("SELECT name FROM users WHERE '@1' <= name AND name < '@a'") regex = re.compile(r"^@(\d+):") From de1823b521cd9b691d060dbdc477be16decdf2af Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 2 Oct 2019 11:23:55 +0100 Subject: [PATCH 22/85] Newsfile --- changelog.d/6148.misc | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/6148.misc diff --git a/changelog.d/6148.misc b/changelog.d/6148.misc new file mode 100644 index 0000000000..1d5213345c --- /dev/null +++ b/changelog.d/6148.misc @@ -0,0 +1 @@ +Improve performance of `find_next_generated_user_id` DB query. From 2a1470cd05558d4a3bc69a0bc5e8969ba8631426 Mon Sep 17 00:00:00 2001 From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> Date: Wed, 2 Oct 2019 12:04:22 +0100 Subject: [PATCH 23/85] Fix yields and copy instead of move push rules on room upgrade (#6144) Copy push rules during a room upgrade from the old room to the new room, instead of deleting them from the old room. For instance, we've defined upgrading of a room multiple times to be possible, and push rules won't be transferred on the second upgrade if they're deleted during the first. Also fix some missing yields that probably broke things quite a bit. --- changelog.d/6144.bugfix | 1 + synapse/handlers/room_member.py | 4 ++-- synapse/storage/push_rule.py | 16 ++++++---------- 3 files changed, 9 insertions(+), 12 deletions(-) create mode 100644 changelog.d/6144.bugfix diff --git a/changelog.d/6144.bugfix b/changelog.d/6144.bugfix new file mode 100644 index 0000000000..eee63961e4 --- /dev/null +++ b/changelog.d/6144.bugfix @@ -0,0 +1 @@ +Prevent user push rules being deleted from a room when it is upgraded. \ No newline at end of file diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py index 8abdb1b6e6..95a244d86c 100644 --- a/synapse/handlers/room_member.py +++ b/synapse/handlers/room_member.py @@ -216,8 +216,8 @@ class RoomMemberHandler(object): self.copy_room_tags_and_direct_to_room( predecessor["room_id"], room_id, user_id ) - # Move over old push rules - self.store.move_push_rules_from_room_to_room_for_user( + # Copy over push rules + yield self.store.copy_push_rules_from_room_to_room_for_user( predecessor["room_id"], room_id, user_id ) elif event.membership == Membership.LEAVE: diff --git a/synapse/storage/push_rule.py b/synapse/storage/push_rule.py index a6517c4cf3..c4e24edff2 100644 --- a/synapse/storage/push_rule.py +++ b/synapse/storage/push_rule.py @@ -183,8 +183,8 @@ class PushRulesWorkerStore( return results @defer.inlineCallbacks - def move_push_rule_from_room_to_room(self, new_room_id, user_id, rule): - """Move a single push rule from one room to another for a specific user. + def copy_push_rule_from_room_to_room(self, new_room_id, user_id, rule): + """Copy a single push rule from one room to another for a specific user. Args: new_room_id (str): ID of the new room. @@ -209,14 +209,11 @@ class PushRulesWorkerStore( actions=rule["actions"], ) - # Delete push rule for the old room - yield self.delete_push_rule(user_id, rule["rule_id"]) - @defer.inlineCallbacks - def move_push_rules_from_room_to_room_for_user( + def copy_push_rules_from_room_to_room_for_user( self, old_room_id, new_room_id, user_id ): - """Move all of the push rules from one room to another for a specific + """Copy all of the push rules from one room to another for a specific user. Args: @@ -227,15 +224,14 @@ class PushRulesWorkerStore( # Retrieve push rules for this user user_push_rules = yield self.get_push_rules_for_user(user_id) - # Get rules relating to the old room, move them to the new room, then - # delete them from the old room + # Get rules relating to the old room and copy them to the new room for rule in user_push_rules: conditions = rule.get("conditions", []) if any( (c.get("key") == "room_id" and c.get("pattern") == old_room_id) for c in conditions ): - self.move_push_rule_from_room_to_room(new_room_id, user_id, rule) + yield self.copy_push_rule_from_room_to_room(new_room_id, user_id, rule) @defer.inlineCallbacks def bulk_get_push_rules_for_room(self, event, context): From 972c9f65d7ddf94ce024b57397d651d184cb2d26 Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Wed, 2 Oct 2019 12:17:46 +0100 Subject: [PATCH 24/85] Lint --- synapse/handlers/federation.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index a3d7739ead..75d79bb8e4 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -2603,7 +2603,9 @@ class FederationHandler(BaseHandler): # invite has been revoked. if original_invite.content: display_name = original_invite.content["display_name"] - event_dict["content"]["third_party_invite"]["display_name"] = display_name + event_dict["content"]["third_party_invite"][ + "display_name" + ] = display_name else: # Don't discard or raise an error here because that's not the right place # to do auth checks. The auth check will fail on this invite because we From 24efea338d8643ee15f03b30c080a53320926ee8 Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Wed, 2 Oct 2019 12:20:03 +0100 Subject: [PATCH 25/85] Changelog --- changelog.d/6147.bugfix | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/6147.bugfix diff --git a/changelog.d/6147.bugfix b/changelog.d/6147.bugfix new file mode 100644 index 0000000000..b0f936d280 --- /dev/null +++ b/changelog.d/6147.bugfix @@ -0,0 +1 @@ +Don't 500 when trying to exchange a revoked 3PID invite. From 864f14454322c6cba11476667ade8fc6cbea6f44 Mon Sep 17 00:00:00 2001 From: Amber Brown Date: Wed, 2 Oct 2019 05:29:01 -0700 Subject: [PATCH 26/85] Fix up some typechecking (#6150) * type checking fixes * changelog --- .gitignore | 1 + changelog.d/6150.misc | 1 + synapse/api/errors.py | 3 ++- synapse/api/room_versions.py | 5 ++++- synapse/app/_base.py | 4 +++- synapse/config/appservice.py | 5 +++-- synapse/config/consent_config.py | 4 ++-- synapse/config/password_auth_providers.py | 4 +++- synapse/config/repository.py | 5 +++-- synapse/config/server.py | 10 +++++++--- synapse/config/server_notices_config.py | 4 ++-- synapse/logging/opentracing.py | 9 +++++---- synapse/logging/utils.py | 20 ++++++++++++++++---- synapse/metrics/__init__.py | 4 ++-- synapse/metrics/_exposition.py | 4 ++-- synapse/python_dependencies.py | 17 +++++++++++++---- synapse/types.py | 3 ++- synapse/util/async_helpers.py | 10 +++++++--- synapse/util/caches/__init__.py | 3 ++- synapse/util/caches/descriptors.py | 22 ++++++++++++++++++++-- synapse/util/caches/treecache.py | 4 +++- synapse/util/module_loader.py | 2 +- 22 files changed, 104 insertions(+), 40 deletions(-) create mode 100644 changelog.d/6150.misc diff --git a/.gitignore b/.gitignore index e53d4908d5..747b8714d7 100644 --- a/.gitignore +++ b/.gitignore @@ -10,6 +10,7 @@ *.tac _trial_temp/ _trial_temp*/ +/out # stuff that is likely to exist when you run a server locally /*.db diff --git a/changelog.d/6150.misc b/changelog.d/6150.misc new file mode 100644 index 0000000000..a373c091ab --- /dev/null +++ b/changelog.d/6150.misc @@ -0,0 +1 @@ +Expand type-checking on modules imported by synapse.config. diff --git a/synapse/api/errors.py b/synapse/api/errors.py index cf1ebf1af2..1bb2e86789 100644 --- a/synapse/api/errors.py +++ b/synapse/api/errors.py @@ -17,6 +17,7 @@ """Contains exceptions and error codes.""" import logging +from typing import Dict from six import iteritems from six.moves import http_client @@ -111,7 +112,7 @@ class ProxiedRequestError(SynapseError): def __init__(self, code, msg, errcode=Codes.UNKNOWN, additional_fields=None): super(ProxiedRequestError, self).__init__(code, msg, errcode) if additional_fields is None: - self._additional_fields = {} + self._additional_fields = {} # type: Dict else: self._additional_fields = dict(additional_fields) diff --git a/synapse/api/room_versions.py b/synapse/api/room_versions.py index 95292b7dec..c6f50fd7b9 100644 --- a/synapse/api/room_versions.py +++ b/synapse/api/room_versions.py @@ -12,6 +12,9 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + +from typing import Dict + import attr @@ -102,4 +105,4 @@ KNOWN_ROOM_VERSIONS = { RoomVersions.V4, RoomVersions.V5, ) -} # type: dict[str, RoomVersion] +} # type: Dict[str, RoomVersion] diff --git a/synapse/app/_base.py b/synapse/app/_base.py index c30fdeee9a..2ac7d5c064 100644 --- a/synapse/app/_base.py +++ b/synapse/app/_base.py @@ -263,7 +263,9 @@ def start(hs, listeners=None): refresh_certificate(hs) # Start the tracer - synapse.logging.opentracing.init_tracer(hs.config) + synapse.logging.opentracing.init_tracer( # type: ignore[attr-defined] # noqa + hs.config + ) # It is now safe to start your Synapse. hs.start_listening(listeners) diff --git a/synapse/config/appservice.py b/synapse/config/appservice.py index 8387ff6805..28d36b1bc3 100644 --- a/synapse/config/appservice.py +++ b/synapse/config/appservice.py @@ -13,6 +13,7 @@ # limitations under the License. import logging +from typing import Dict from six import string_types from six.moves.urllib import parse as urlparse @@ -56,8 +57,8 @@ def load_appservices(hostname, config_files): return [] # Dicts of value -> filename - seen_as_tokens = {} - seen_ids = {} + seen_as_tokens = {} # type: Dict[str, str] + seen_ids = {} # type: Dict[str, str] appservices = [] diff --git a/synapse/config/consent_config.py b/synapse/config/consent_config.py index 94916f3a49..48976e17b1 100644 --- a/synapse/config/consent_config.py +++ b/synapse/config/consent_config.py @@ -73,8 +73,8 @@ DEFAULT_CONFIG = """\ class ConsentConfig(Config): - def __init__(self): - super(ConsentConfig, self).__init__() + def __init__(self, *args): + super(ConsentConfig, self).__init__(*args) self.user_consent_version = None self.user_consent_template_dir = None diff --git a/synapse/config/password_auth_providers.py b/synapse/config/password_auth_providers.py index 788c39c9fb..c50e244394 100644 --- a/synapse/config/password_auth_providers.py +++ b/synapse/config/password_auth_providers.py @@ -13,6 +13,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +from typing import Any, List + from synapse.util.module_loader import load_module from ._base import Config @@ -22,7 +24,7 @@ LDAP_PROVIDER = "ldap_auth_provider.LdapAuthProvider" class PasswordAuthProviderConfig(Config): def read_config(self, config, **kwargs): - self.password_providers = [] + self.password_providers = [] # type: List[Any] providers = [] # We want to be backwards compatible with the old `ldap_config` diff --git a/synapse/config/repository.py b/synapse/config/repository.py index 52e014608a..14740891f3 100644 --- a/synapse/config/repository.py +++ b/synapse/config/repository.py @@ -15,6 +15,7 @@ import os from collections import namedtuple +from typing import Dict, List from synapse.python_dependencies import DependencyException, check_requirements from synapse.util.module_loader import load_module @@ -61,7 +62,7 @@ def parse_thumbnail_requirements(thumbnail_sizes): Dictionary mapping from media type string to list of ThumbnailRequirement tuples. """ - requirements = {} + requirements = {} # type: Dict[str, List] for size in thumbnail_sizes: width = size["width"] height = size["height"] @@ -130,7 +131,7 @@ class ContentRepositoryConfig(Config): # # We don't create the storage providers here as not all workers need # them to be started. - self.media_storage_providers = [] + self.media_storage_providers = [] # type: List[tuple] for provider_config in storage_providers: # We special case the module "file_system" so as not to need to diff --git a/synapse/config/server.py b/synapse/config/server.py index 536ee7f29c..709bd387e5 100644 --- a/synapse/config/server.py +++ b/synapse/config/server.py @@ -19,6 +19,7 @@ import logging import os.path import re from textwrap import indent +from typing import List import attr import yaml @@ -243,7 +244,7 @@ class ServerConfig(Config): # events with profile information that differ from the target's global profile. self.allow_per_room_profiles = config.get("allow_per_room_profiles", True) - self.listeners = [] + self.listeners = [] # type: List[dict] for listener in config.get("listeners", []): if not isinstance(listener.get("port", None), int): raise ConfigError( @@ -287,7 +288,10 @@ class ServerConfig(Config): validator=attr.validators.instance_of(bool), default=False ) complexity = attr.ib( - validator=attr.validators.instance_of((int, float)), default=1.0 + validator=attr.validators.instance_of( + (float, int) # type: ignore[arg-type] # noqa + ), + default=1.0, ) complexity_error = attr.ib( validator=attr.validators.instance_of(str), @@ -366,7 +370,7 @@ class ServerConfig(Config): "cleanup_extremities_with_dummy_events", True ) - def has_tls_listener(self): + def has_tls_listener(self) -> bool: return any(l["tls"] for l in self.listeners) def generate_config_section( diff --git a/synapse/config/server_notices_config.py b/synapse/config/server_notices_config.py index eaac3d73bc..6d4285ef93 100644 --- a/synapse/config/server_notices_config.py +++ b/synapse/config/server_notices_config.py @@ -59,8 +59,8 @@ class ServerNoticesConfig(Config): None if server notices are not enabled. """ - def __init__(self): - super(ServerNoticesConfig, self).__init__() + def __init__(self, *args): + super(ServerNoticesConfig, self).__init__(*args) self.server_notices_mxid = None self.server_notices_mxid_display_name = None self.server_notices_mxid_avatar_url = None diff --git a/synapse/logging/opentracing.py b/synapse/logging/opentracing.py index 308a27213b..cd1ff6a518 100644 --- a/synapse/logging/opentracing.py +++ b/synapse/logging/opentracing.py @@ -170,6 +170,7 @@ import inspect import logging import re from functools import wraps +from typing import Dict from canonicaljson import json @@ -547,7 +548,7 @@ def inject_active_span_twisted_headers(headers, destination, check_destination=T return span = opentracing.tracer.active_span - carrier = {} + carrier = {} # type: Dict[str, str] opentracing.tracer.inject(span, opentracing.Format.HTTP_HEADERS, carrier) for key, value in carrier.items(): @@ -584,7 +585,7 @@ def inject_active_span_byte_dict(headers, destination, check_destination=True): span = opentracing.tracer.active_span - carrier = {} + carrier = {} # type: Dict[str, str] opentracing.tracer.inject(span, opentracing.Format.HTTP_HEADERS, carrier) for key, value in carrier.items(): @@ -639,7 +640,7 @@ def get_active_span_text_map(destination=None): if destination and not whitelisted_homeserver(destination): return {} - carrier = {} + carrier = {} # type: Dict[str, str] opentracing.tracer.inject( opentracing.tracer.active_span, opentracing.Format.TEXT_MAP, carrier ) @@ -653,7 +654,7 @@ def active_span_context_as_string(): Returns: The active span context encoded as a string. """ - carrier = {} + carrier = {} # type: Dict[str, str] if opentracing: opentracing.tracer.inject( opentracing.tracer.active_span, opentracing.Format.TEXT_MAP, carrier diff --git a/synapse/logging/utils.py b/synapse/logging/utils.py index 7df0fa6087..6073fc2725 100644 --- a/synapse/logging/utils.py +++ b/synapse/logging/utils.py @@ -119,7 +119,11 @@ def trace_function(f): logger = logging.getLogger(name) level = logging.DEBUG - s = inspect.currentframe().f_back + frame = inspect.currentframe() + if frame is None: + raise Exception("Can't get current frame!") + + s = frame.f_back to_print = [ "\t%s:%s %s. Args: args=%s, kwargs=%s" @@ -144,7 +148,7 @@ def trace_function(f): pathname=pathname, lineno=lineno, msg=msg, - args=None, + args=tuple(), exc_info=None, ) @@ -157,7 +161,12 @@ def trace_function(f): def get_previous_frames(): - s = inspect.currentframe().f_back.f_back + + frame = inspect.currentframe() + if frame is None: + raise Exception("Can't get current frame!") + + s = frame.f_back.f_back to_return = [] while s: if s.f_globals["__name__"].startswith("synapse"): @@ -174,7 +183,10 @@ def get_previous_frames(): def get_previous_frame(ignore=[]): - s = inspect.currentframe().f_back.f_back + frame = inspect.currentframe() + if frame is None: + raise Exception("Can't get current frame!") + s = frame.f_back.f_back while s: if s.f_globals["__name__"].startswith("synapse"): diff --git a/synapse/metrics/__init__.py b/synapse/metrics/__init__.py index bec3b13397..0b45e1f52a 100644 --- a/synapse/metrics/__init__.py +++ b/synapse/metrics/__init__.py @@ -125,7 +125,7 @@ class InFlightGauge(object): ) # Counts number of in flight blocks for a given set of label values - self._registrations = {} + self._registrations = {} # type: Dict # Protects access to _registrations self._lock = threading.Lock() @@ -226,7 +226,7 @@ class BucketCollector(object): # Fetch the data -- this must be synchronous! data = self.data_collector() - buckets = {} + buckets = {} # type: Dict[float, int] res = [] for x in data.keys(): diff --git a/synapse/metrics/_exposition.py b/synapse/metrics/_exposition.py index 74d9c3ecd3..a248103191 100644 --- a/synapse/metrics/_exposition.py +++ b/synapse/metrics/_exposition.py @@ -36,9 +36,9 @@ from twisted.web.resource import Resource try: from prometheus_client.samples import Sample except ImportError: - Sample = namedtuple( + Sample = namedtuple( # type: ignore[no-redef] # noqa "Sample", ["name", "labels", "value", "timestamp", "exemplar"] - ) # type: ignore + ) CONTENT_TYPE_LATEST = str("text/plain; version=0.0.4; charset=utf-8") diff --git a/synapse/python_dependencies.py b/synapse/python_dependencies.py index 0bd563edc7..aa7da1c543 100644 --- a/synapse/python_dependencies.py +++ b/synapse/python_dependencies.py @@ -15,7 +15,7 @@ # limitations under the License. import logging -from typing import Set +from typing import List, Set from pkg_resources import ( DistributionNotFound, @@ -73,6 +73,7 @@ REQUIREMENTS = [ "netaddr>=0.7.18", "Jinja2>=2.9", "bleach>=1.4.3", + "typing-extensions>=3.7.4", ] CONDITIONAL_REQUIREMENTS = { @@ -144,7 +145,11 @@ def check_requirements(for_feature=None): deps_needed.append(dependency) errors.append( "Needed %s, got %s==%s" - % (dependency, e.dist.project_name, e.dist.version) + % ( + dependency, + e.dist.project_name, # type: ignore[attr-defined] # noqa + e.dist.version, # type: ignore[attr-defined] # noqa + ) ) except DistributionNotFound: deps_needed.append(dependency) @@ -159,7 +164,7 @@ def check_requirements(for_feature=None): if not for_feature: # Check the optional dependencies are up to date. We allow them to not be # installed. - OPTS = sum(CONDITIONAL_REQUIREMENTS.values(), []) + OPTS = sum(CONDITIONAL_REQUIREMENTS.values(), []) # type: List[str] for dependency in OPTS: try: @@ -168,7 +173,11 @@ def check_requirements(for_feature=None): deps_needed.append(dependency) errors.append( "Needed optional %s, got %s==%s" - % (dependency, e.dist.project_name, e.dist.version) + % ( + dependency, + e.dist.project_name, # type: ignore[attr-defined] # noqa + e.dist.version, # type: ignore[attr-defined] # noqa + ) ) except DistributionNotFound: # If it's not found, we don't care diff --git a/synapse/types.py b/synapse/types.py index 51eadb6ad4..8f79797f17 100644 --- a/synapse/types.py +++ b/synapse/types.py @@ -318,6 +318,7 @@ class StreamToken( ) ): _SEPARATOR = "_" + START = None # type: StreamToken @classmethod def from_string(cls, string): @@ -402,7 +403,7 @@ class RoomStreamToken(namedtuple("_StreamToken", "topological stream")): followed by the "stream_ordering" id of the event it comes after. """ - __slots__ = [] + __slots__ = [] # type: list @classmethod def parse(cls, string): diff --git a/synapse/util/async_helpers.py b/synapse/util/async_helpers.py index f1c46836b1..0d3bdd88ce 100644 --- a/synapse/util/async_helpers.py +++ b/synapse/util/async_helpers.py @@ -13,9 +13,11 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + import collections import logging from contextlib import contextmanager +from typing import Dict, Sequence, Set, Union from six.moves import range @@ -213,7 +215,9 @@ class Linearizer(object): # the first element is the number of things executing, and # the second element is an OrderedDict, where the keys are deferreds for the # things blocked from executing. - self.key_to_defer = {} + self.key_to_defer = ( + {} + ) # type: Dict[str, Sequence[Union[int, Dict[defer.Deferred, int]]]] def queue(self, key): # we avoid doing defer.inlineCallbacks here, so that cancellation works correctly. @@ -340,10 +344,10 @@ class ReadWriteLock(object): def __init__(self): # Latest readers queued - self.key_to_current_readers = {} + self.key_to_current_readers = {} # type: Dict[str, Set[defer.Deferred]] # Latest writer queued - self.key_to_current_writer = {} + self.key_to_current_writer = {} # type: Dict[str, defer.Deferred] @defer.inlineCallbacks def read(self, key): diff --git a/synapse/util/caches/__init__.py b/synapse/util/caches/__init__.py index b50e3503f0..43fd65d693 100644 --- a/synapse/util/caches/__init__.py +++ b/synapse/util/caches/__init__.py @@ -16,6 +16,7 @@ import logging import os +from typing import Dict import six from six.moves import intern @@ -37,7 +38,7 @@ def get_cache_factor_for(cache_name): caches_by_name = {} -collectors_by_name = {} +collectors_by_name = {} # type: Dict cache_size = Gauge("synapse_util_caches_cache:size", "", ["name"]) cache_hits = Gauge("synapse_util_caches_cache:hits", "", ["name"]) diff --git a/synapse/util/caches/descriptors.py b/synapse/util/caches/descriptors.py index 43f66ec4be..5ac2530a6a 100644 --- a/synapse/util/caches/descriptors.py +++ b/synapse/util/caches/descriptors.py @@ -18,10 +18,12 @@ import inspect import logging import threading from collections import namedtuple +from typing import Any, cast from six import itervalues from prometheus_client import Gauge +from typing_extensions import Protocol from twisted.internet import defer @@ -37,6 +39,18 @@ from . import register_cache logger = logging.getLogger(__name__) +class _CachedFunction(Protocol): + invalidate = None # type: Any + invalidate_all = None # type: Any + invalidate_many = None # type: Any + prefill = None # type: Any + cache = None # type: Any + num_args = None # type: Any + + def __name__(self): + ... + + cache_pending_metric = Gauge( "synapse_util_caches_cache_pending", "Number of lookups currently pending for this cache", @@ -245,7 +259,9 @@ class Cache(object): class _CacheDescriptorBase(object): - def __init__(self, orig, num_args, inlineCallbacks, cache_context=False): + def __init__( + self, orig: _CachedFunction, num_args, inlineCallbacks, cache_context=False + ): self.orig = orig if inlineCallbacks: @@ -404,7 +420,7 @@ class CacheDescriptor(_CacheDescriptorBase): return tuple(get_cache_key_gen(args, kwargs)) @functools.wraps(self.orig) - def wrapped(*args, **kwargs): + def _wrapped(*args, **kwargs): # If we're passed a cache_context then we'll want to call its invalidate() # whenever we are invalidated invalidate_callback = kwargs.pop("on_invalidate", None) @@ -440,6 +456,8 @@ class CacheDescriptor(_CacheDescriptorBase): return make_deferred_yieldable(observer) + wrapped = cast(_CachedFunction, _wrapped) + if self.num_args == 1: wrapped.invalidate = lambda key: cache.invalidate(key[0]) wrapped.prefill = lambda key, val: cache.prefill(key[0], val) diff --git a/synapse/util/caches/treecache.py b/synapse/util/caches/treecache.py index 9a72218d85..2ea4e4e911 100644 --- a/synapse/util/caches/treecache.py +++ b/synapse/util/caches/treecache.py @@ -1,3 +1,5 @@ +from typing import Dict + from six import itervalues SENTINEL = object() @@ -12,7 +14,7 @@ class TreeCache(object): def __init__(self): self.size = 0 - self.root = {} + self.root = {} # type: Dict def __setitem__(self, key, value): return self.set(key, value) diff --git a/synapse/util/module_loader.py b/synapse/util/module_loader.py index 7ff7eb1e4d..2705cbe5f8 100644 --- a/synapse/util/module_loader.py +++ b/synapse/util/module_loader.py @@ -54,5 +54,5 @@ def load_python_module(location: str): if spec is None: raise Exception("Unable to load module at %s" % (location,)) mod = importlib.util.module_from_spec(spec) - spec.loader.exec_module(mod) + spec.loader.exec_module(mod) # type: ignore return mod From a5166e4d5febc0e03ba9da9db99127a797a0bc4d Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 2 Oct 2019 14:08:35 +0100 Subject: [PATCH 27/85] Land improved room list based on room stats (#6019) Use room_stats and room_state for room directory search --- changelog.d/6019.misc | 1 + synapse/federation/transport/server.py | 8 + synapse/handlers/room_list.py | 331 +++++------------- synapse/rest/client/v1/room.py | 8 + synapse/storage/room.py | 238 ++++++++----- .../schema/delta/56/public_room_list_idx.sql | 16 + tests/handlers/test_roomlist.py | 39 --- 7 files changed, 282 insertions(+), 359 deletions(-) create mode 100644 changelog.d/6019.misc create mode 100644 synapse/storage/schema/delta/56/public_room_list_idx.sql delete mode 100644 tests/handlers/test_roomlist.py diff --git a/changelog.d/6019.misc b/changelog.d/6019.misc new file mode 100644 index 0000000000..dfee73c28f --- /dev/null +++ b/changelog.d/6019.misc @@ -0,0 +1 @@ +Improve performance of the public room list directory. diff --git a/synapse/federation/transport/server.py b/synapse/federation/transport/server.py index 7f8a16e355..0f16f21c2d 100644 --- a/synapse/federation/transport/server.py +++ b/synapse/federation/transport/server.py @@ -765,6 +765,10 @@ class PublicRoomList(BaseFederationServlet): else: network_tuple = ThirdPartyInstanceID(None, None) + if limit == 0: + # zero is a special value which corresponds to no limit. + limit = None + data = await maybeDeferred( self.handler.get_local_public_room_list, limit, @@ -800,6 +804,10 @@ class PublicRoomList(BaseFederationServlet): if search_filter is None: logger.warning("Nonefilter") + if limit == 0: + # zero is a special value which corresponds to no limit. + limit = None + data = await self.handler.get_local_public_room_list( limit=limit, since_token=since_token, diff --git a/synapse/handlers/room_list.py b/synapse/handlers/room_list.py index a7e55f00e5..4e1cc5460f 100644 --- a/synapse/handlers/room_list.py +++ b/synapse/handlers/room_list.py @@ -16,8 +16,7 @@ import logging from collections import namedtuple -from six import PY3, iteritems -from six.moves import range +from six import iteritems import msgpack from unpaddedbase64 import decode_base64, encode_base64 @@ -27,7 +26,6 @@ from twisted.internet import defer from synapse.api.constants import EventTypes, JoinRules from synapse.api.errors import Codes, HttpResponseException from synapse.types import ThirdPartyInstanceID -from synapse.util.async_helpers import concurrently_execute from synapse.util.caches.descriptors import cachedInlineCallbacks from synapse.util.caches.response_cache import ResponseCache @@ -37,7 +35,6 @@ logger = logging.getLogger(__name__) REMOTE_ROOM_LIST_POLL_INTERVAL = 60 * 1000 - # This is used to indicate we should only return rooms published to the main list. EMPTY_THIRD_PARTY_ID = ThirdPartyInstanceID(None, None) @@ -72,6 +69,8 @@ class RoomListHandler(BaseHandler): This can be (None, None) to indicate the main list, or a particular appservice and network id to use an appservice specific one. Setting to None returns all public rooms across all lists. + from_federation (bool): true iff the request comes from the federation + API """ if not self.enable_room_list_search: return defer.succeed({"chunk": [], "total_room_count_estimate": 0}) @@ -133,239 +132,109 @@ class RoomListHandler(BaseHandler): from_federation (bool): Whether this request originated from a federating server or a client. Used for room filtering. timeout (int|None): Amount of seconds to wait for a response before - timing out. + timing out. TODO """ - if since_token and since_token != "END": - since_token = RoomListNextBatch.from_token(since_token) - else: - since_token = None - rooms_to_order_value = {} - rooms_to_num_joined = {} + # Pagination tokens work by storing the room ID sent in the last batch, + # plus the direction (forwards or backwards). Next batch tokens always + # go forwards, prev batch tokens always go backwards. - newly_visible = [] - newly_unpublished = [] if since_token: - stream_token = since_token.stream_ordering - current_public_id = yield self.store.get_current_public_room_stream_id() - public_room_stream_id = since_token.public_room_stream_id - newly_visible, newly_unpublished = yield self.store.get_public_room_changes( - public_room_stream_id, current_public_id, network_tuple=network_tuple - ) - else: - stream_token = yield self.store.get_room_max_stream_ordering() - public_room_stream_id = yield self.store.get_current_public_room_stream_id() + batch_token = RoomListNextBatch.from_token(since_token) - room_ids = yield self.store.get_public_room_ids_at_stream_id( - public_room_stream_id, network_tuple=network_tuple + last_room_id = batch_token.last_room_id + forwards = batch_token.direction_is_forward + else: + batch_token = None + + last_room_id = None + forwards = True + + # we request one more than wanted to see if there are more pages to come + probing_limit = limit + 1 if limit is not None else None + + results = yield self.store.get_largest_public_rooms( + network_tuple, + search_filter, + probing_limit, + last_room_id=last_room_id, + forwards=forwards, + ignore_non_federatable=from_federation, ) - # We want to return rooms in a particular order: the number of joined - # users. We then arbitrarily use the room_id as a tie breaker. + def build_room_entry(room): + entry = { + "room_id": room["room_id"], + "name": room["name"], + "topic": room["topic"], + "canonical_alias": room["canonical_alias"], + "num_joined_members": room["joined_members"], + "avatar_url": room["avatar"], + "world_readable": room["history_visibility"] == "world_readable", + "guest_can_join": room["guest_access"] == "can_join", + } - @defer.inlineCallbacks - def get_order_for_room(room_id): - # Most of the rooms won't have changed between the since token and - # now (especially if the since token is "now"). So, we can ask what - # the current users are in a room (that will hit a cache) and then - # check if the room has changed since the since token. (We have to - # do it in that order to avoid races). - # If things have changed then fall back to getting the current state - # at the since token. - joined_users = yield self.store.get_users_in_room(room_id) - if self.store.has_room_changed_since(room_id, stream_token): - latest_event_ids = yield self.store.get_forward_extremeties_for_room( - room_id, stream_token - ) + # Filter out Nones – rather omit the field altogether + return {k: v for k, v in entry.items() if v is not None} - if not latest_event_ids: - return + results = [build_room_entry(r) for r in results] - joined_users = yield self.state_handler.get_current_users_in_room( - room_id, latest_event_ids - ) + response = {} + num_results = len(results) + if limit is not None: + more_to_come = num_results == probing_limit - num_joined_users = len(joined_users) - rooms_to_num_joined[room_id] = num_joined_users + # Depending on direction we trim either the front or back. + if forwards: + results = results[:limit] + else: + results = results[-limit:] + else: + more_to_come = False - if num_joined_users == 0: - return + if num_results > 0: + final_room_id = results[-1]["room_id"] + initial_room_id = results[0]["room_id"] - # We want larger rooms to be first, hence negating num_joined_users - rooms_to_order_value[room_id] = (-num_joined_users, room_id) + if forwards: + if batch_token: + # If there was a token given then we assume that there + # must be previous results. + response["prev_batch"] = RoomListNextBatch( + last_room_id=initial_room_id, direction_is_forward=False + ).to_token() - logger.info( - "Getting ordering for %i rooms since %s", len(room_ids), stream_token + if more_to_come: + response["next_batch"] = RoomListNextBatch( + last_room_id=final_room_id, direction_is_forward=True + ).to_token() + else: + if batch_token: + response["next_batch"] = RoomListNextBatch( + last_room_id=final_room_id, direction_is_forward=True + ).to_token() + + if more_to_come: + response["prev_batch"] = RoomListNextBatch( + last_room_id=initial_room_id, direction_is_forward=False + ).to_token() + + for room in results: + # populate search result entries with additional fields, namely + # 'aliases' + room_id = room["room_id"] + + aliases = yield self.store.get_aliases_for_room(room_id) + if aliases: + room["aliases"] = aliases + + response["chunk"] = results + + response["total_room_count_estimate"] = yield self.store.count_public_rooms( + network_tuple, ignore_non_federatable=from_federation ) - yield concurrently_execute(get_order_for_room, room_ids, 10) - sorted_entries = sorted(rooms_to_order_value.items(), key=lambda e: e[1]) - sorted_rooms = [room_id for room_id, _ in sorted_entries] - - # `sorted_rooms` should now be a list of all public room ids that is - # stable across pagination. Therefore, we can use indices into this - # list as our pagination tokens. - - # Filter out rooms that we don't want to return - rooms_to_scan = [ - r - for r in sorted_rooms - if r not in newly_unpublished and rooms_to_num_joined[r] > 0 - ] - - total_room_count = len(rooms_to_scan) - - if since_token: - # Filter out rooms we've already returned previously - # `since_token.current_limit` is the index of the last room we - # sent down, so we exclude it and everything before/after it. - if since_token.direction_is_forward: - rooms_to_scan = rooms_to_scan[since_token.current_limit + 1 :] - else: - rooms_to_scan = rooms_to_scan[: since_token.current_limit] - rooms_to_scan.reverse() - - logger.info("After sorting and filtering, %i rooms remain", len(rooms_to_scan)) - - # _append_room_entry_to_chunk will append to chunk but will stop if - # len(chunk) > limit - # - # Normally we will generate enough results on the first iteration here, - # but if there is a search filter, _append_room_entry_to_chunk may - # filter some results out, in which case we loop again. - # - # We don't want to scan over the entire range either as that - # would potentially waste a lot of work. - # - # XXX if there is no limit, we may end up DoSing the server with - # calls to get_current_state_ids for every single room on the - # server. Surely we should cap this somehow? - # - if limit: - step = limit + 1 - else: - # step cannot be zero - step = len(rooms_to_scan) if len(rooms_to_scan) != 0 else 1 - - chunk = [] - for i in range(0, len(rooms_to_scan), step): - if timeout and self.clock.time() > timeout: - raise Exception("Timed out searching room directory") - - batch = rooms_to_scan[i : i + step] - logger.info("Processing %i rooms for result", len(batch)) - yield concurrently_execute( - lambda r: self._append_room_entry_to_chunk( - r, - rooms_to_num_joined[r], - chunk, - limit, - search_filter, - from_federation=from_federation, - ), - batch, - 5, - ) - logger.info("Now %i rooms in result", len(chunk)) - if len(chunk) >= limit + 1: - break - - chunk.sort(key=lambda e: (-e["num_joined_members"], e["room_id"])) - - # Work out the new limit of the batch for pagination, or None if we - # know there are no more results that would be returned. - # i.e., [since_token.current_limit..new_limit] is the batch of rooms - # we've returned (or the reverse if we paginated backwards) - # We tried to pull out limit + 1 rooms above, so if we have <= limit - # then we know there are no more results to return - new_limit = None - if chunk and (not limit or len(chunk) > limit): - - if not since_token or since_token.direction_is_forward: - if limit: - chunk = chunk[:limit] - last_room_id = chunk[-1]["room_id"] - else: - if limit: - chunk = chunk[-limit:] - last_room_id = chunk[0]["room_id"] - - new_limit = sorted_rooms.index(last_room_id) - - results = {"chunk": chunk, "total_room_count_estimate": total_room_count} - - if since_token: - results["new_rooms"] = bool(newly_visible) - - if not since_token or since_token.direction_is_forward: - if new_limit is not None: - results["next_batch"] = RoomListNextBatch( - stream_ordering=stream_token, - public_room_stream_id=public_room_stream_id, - current_limit=new_limit, - direction_is_forward=True, - ).to_token() - - if since_token: - results["prev_batch"] = since_token.copy_and_replace( - direction_is_forward=False, - current_limit=since_token.current_limit + 1, - ).to_token() - else: - if new_limit is not None: - results["prev_batch"] = RoomListNextBatch( - stream_ordering=stream_token, - public_room_stream_id=public_room_stream_id, - current_limit=new_limit, - direction_is_forward=False, - ).to_token() - - if since_token: - results["next_batch"] = since_token.copy_and_replace( - direction_is_forward=True, - current_limit=since_token.current_limit - 1, - ).to_token() - - return results - - @defer.inlineCallbacks - def _append_room_entry_to_chunk( - self, - room_id, - num_joined_users, - chunk, - limit, - search_filter, - from_federation=False, - ): - """Generate the entry for a room in the public room list and append it - to the `chunk` if it matches the search filter - - Args: - room_id (str): The ID of the room. - num_joined_users (int): The number of joined users in the room. - chunk (list) - limit (int|None): Maximum amount of rooms to display. Function will - return if length of chunk is greater than limit + 1. - search_filter (dict|None) - from_federation (bool): Whether this request originated from a - federating server or a client. Used for room filtering. - """ - if limit and len(chunk) > limit + 1: - # We've already got enough, so lets just drop it. - return - - result = yield self.generate_room_entry(room_id, num_joined_users) - if not result: - return - - if from_federation and not result.get("m.federate", True): - # This is a room that other servers cannot join. Do not show them - # this room. - return - - if _matches_room_entry(result, search_filter): - chunk.append(result) + return response @cachedInlineCallbacks(num_args=1, cache_context=True) def generate_room_entry( @@ -580,32 +449,18 @@ class RoomListNextBatch( namedtuple( "RoomListNextBatch", ( - "stream_ordering", # stream_ordering of the first public room list - "public_room_stream_id", # public room stream id for first public room list - "current_limit", # The number of previous rooms returned + "last_room_id", # The room_id to get rooms after/before "direction_is_forward", # Bool if this is a next_batch, false if prev_batch ), ) ): - - KEY_DICT = { - "stream_ordering": "s", - "public_room_stream_id": "p", - "current_limit": "n", - "direction_is_forward": "d", - } + KEY_DICT = {"last_room_id": "r", "direction_is_forward": "d"} REVERSE_KEY_DICT = {v: k for k, v in KEY_DICT.items()} @classmethod def from_token(cls, token): - if PY3: - # The argument raw=False is only available on new versions of - # msgpack, and only really needed on Python 3. Gate it behind - # a PY3 check to avoid causing issues on Debian-packaged versions. - decoded = msgpack.loads(decode_base64(token), raw=False) - else: - decoded = msgpack.loads(decode_base64(token)) + decoded = msgpack.loads(decode_base64(token), raw=False) return RoomListNextBatch( **{cls.REVERSE_KEY_DICT[key]: val for key, val in decoded.items()} ) diff --git a/synapse/rest/client/v1/room.py b/synapse/rest/client/v1/room.py index 6bf924dedc..9c1d41421c 100644 --- a/synapse/rest/client/v1/room.py +++ b/synapse/rest/client/v1/room.py @@ -361,6 +361,10 @@ class PublicRoomListRestServlet(TransactionRestServlet): limit = parse_integer(request, "limit", 0) since_token = parse_string(request, "since", None) + if limit == 0: + # zero is a special value which corresponds to no limit. + limit = None + handler = self.hs.get_room_list_handler() if server: data = yield handler.get_remote_public_room_list( @@ -398,6 +402,10 @@ class PublicRoomListRestServlet(TransactionRestServlet): else: network_tuple = ThirdPartyInstanceID.from_string(third_party_instance_id) + if limit == 0: + # zero is a special value which corresponds to no limit. + limit = None + handler = self.hs.get_room_list_handler() if server: data = yield handler.get_remote_public_room_list( diff --git a/synapse/storage/room.py b/synapse/storage/room.py index 08e13f3a3b..c02787a73d 100644 --- a/synapse/storage/room.py +++ b/synapse/storage/room.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- # Copyright 2014-2016 OpenMarket Ltd +# Copyright 2019 The Matrix.org Foundation C.I.C. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -63,103 +64,176 @@ class RoomWorkerStore(SQLBaseStore): desc="get_public_room_ids", ) - @cached(num_args=2, max_entries=100) - def get_public_room_ids_at_stream_id(self, stream_id, network_tuple): - """Get pulbic rooms for a particular list, or across all lists. + def count_public_rooms(self, network_tuple, ignore_non_federatable): + """Counts the number of public rooms as tracked in the room_stats_current + and room_stats_state table. Args: - stream_id (int) - network_tuple (ThirdPartyInstanceID): The list to use (None, None) - means the main list, None means all lsits. + network_tuple (ThirdPartyInstanceID|None) + ignore_non_federatable (bool): If true filters out non-federatable rooms """ - return self.runInteraction( - "get_public_room_ids_at_stream_id", - self.get_public_room_ids_at_stream_id_txn, - stream_id, - network_tuple=network_tuple, - ) - def get_public_room_ids_at_stream_id_txn(self, txn, stream_id, network_tuple): - return { - rm - for rm, vis in self.get_published_at_stream_id_txn( - txn, stream_id, network_tuple=network_tuple - ).items() - if vis + def _count_public_rooms_txn(txn): + query_args = [] + + if network_tuple: + if network_tuple.appservice_id: + published_sql = """ + SELECT room_id from appservice_room_list + WHERE appservice_id = ? AND network_id = ? + """ + query_args.append(network_tuple.appservice_id) + query_args.append(network_tuple.network_id) + else: + published_sql = """ + SELECT room_id FROM rooms WHERE is_public + """ + else: + published_sql = """ + SELECT room_id FROM rooms WHERE is_public + UNION SELECT room_id from appservice_room_list + """ + + sql = """ + SELECT + COALESCE(COUNT(*), 0) + FROM ( + %(published_sql)s + ) published + INNER JOIN room_stats_state USING (room_id) + INNER JOIN room_stats_current USING (room_id) + WHERE + ( + join_rules = 'public' OR history_visibility = 'world_readable' + ) + AND joined_members > 0 + """ % { + "published_sql": published_sql + } + + txn.execute(sql, query_args) + return txn.fetchone()[0] + + return self.runInteraction("count_public_rooms", _count_public_rooms_txn) + + @defer.inlineCallbacks + def get_largest_public_rooms( + self, + network_tuple, + search_filter, + limit, + last_room_id, + forwards, + ignore_non_federatable=False, + ): + """Gets the largest public rooms (where largest is in terms of joined + members, as tracked in the statistics table). + + Args: + network_tuple (ThirdPartyInstanceID|None): + search_filter (dict|None): + limit (int|None): Maxmimum number of rows to return, unlimited otherwise. + last_room_id (str|None): if present, a room ID which bounds the + result set, and is always *excluded* from the result set. + forwards (bool): true iff going forwards, going backwards otherwise + ignore_non_federatable (bool): If true filters out non-federatable rooms. + + Returns: + Rooms in order: biggest number of joined users first. + We then arbitrarily use the room_id as a tie breaker. + + """ + + where_clauses = [] + query_args = [] + + if last_room_id: + if forwards: + where_clauses.append("room_id < ?") + else: + where_clauses.append("? < room_id") + + query_args += [last_room_id] + + if search_filter and search_filter.get("generic_search_term", None): + search_term = "%" + search_filter["generic_search_term"] + "%" + + where_clauses.append( + """ + ( + name LIKE ? + OR topic LIKE ? + OR canonical_alias LIKE ? + ) + """ + ) + query_args += [search_term, search_term, search_term] + + if network_tuple: + if network_tuple.appservice_id: + published_sql = """ + SELECT room_id from appservice_room_list + WHERE appservice_id = ? AND network_id = ? + """ + query_args.append(network_tuple.appservice_id) + query_args.append(network_tuple.network_id) + else: + published_sql = """ + SELECT room_id FROM rooms WHERE is_public + """ + else: + published_sql = """ + SELECT room_id FROM rooms WHERE is_public + UNION SELECT room_id from appservice_room_list + """ + + where_clause = "" + if where_clauses: + where_clause = " AND " + " AND ".join(where_clauses) + + sql = """ + SELECT + room_id, name, topic, canonical_alias, joined_members, + avatar, history_visibility, joined_members, guest_access + FROM ( + %(published_sql)s + ) published + INNER JOIN room_stats_state USING (room_id) + INNER JOIN room_stats_current USING (room_id) + WHERE + ( + join_rules = 'public' OR history_visibility = 'world_readable' + ) + AND joined_members > 0 + %(where_clause)s + ORDER BY joined_members %(dir)s, room_id %(dir)s + """ % { + "published_sql": published_sql, + "where_clause": where_clause, + "dir": "DESC" if forwards else "ASC", } - def get_published_at_stream_id_txn(self, txn, stream_id, network_tuple): - if network_tuple: - # We want to get from a particular list. No aggregation required. + if limit is not None: + query_args.append(limit) - sql = """ - SELECT room_id, visibility FROM public_room_list_stream - INNER JOIN ( - SELECT room_id, max(stream_id) AS stream_id - FROM public_room_list_stream - WHERE stream_id <= ? %s - GROUP BY room_id - ) grouped USING (room_id, stream_id) + sql += """ + LIMIT ? """ - if network_tuple.appservice_id is not None: - txn.execute( - sql % ("AND appservice_id = ? AND network_id = ?",), - (stream_id, network_tuple.appservice_id, network_tuple.network_id), - ) - else: - txn.execute(sql % ("AND appservice_id IS NULL",), (stream_id,)) - return dict(txn) - else: - # We want to get from all lists, so we need to aggregate the results + def _get_largest_public_rooms_txn(txn): + txn.execute(sql, query_args) - logger.info("Executing full list") + results = self.cursor_to_dict(txn) - sql = """ - SELECT room_id, visibility - FROM public_room_list_stream - INNER JOIN ( - SELECT - room_id, max(stream_id) AS stream_id, appservice_id, - network_id - FROM public_room_list_stream - WHERE stream_id <= ? - GROUP BY room_id, appservice_id, network_id - ) grouped USING (room_id, stream_id) - """ - - txn.execute(sql, (stream_id,)) - - results = {} - # A room is visible if its visible on any list. - for room_id, visibility in txn: - results[room_id] = bool(visibility) or results.get(room_id, False) + if not forwards: + results.reverse() return results - def get_public_room_changes(self, prev_stream_id, new_stream_id, network_tuple): - def get_public_room_changes_txn(txn): - then_rooms = self.get_public_room_ids_at_stream_id_txn( - txn, prev_stream_id, network_tuple - ) - - now_rooms_dict = self.get_published_at_stream_id_txn( - txn, new_stream_id, network_tuple - ) - - now_rooms_visible = set(rm for rm, vis in now_rooms_dict.items() if vis) - now_rooms_not_visible = set( - rm for rm, vis in now_rooms_dict.items() if not vis - ) - - newly_visible = now_rooms_visible - then_rooms - newly_unpublished = now_rooms_not_visible & then_rooms - - return newly_visible, newly_unpublished - - return self.runInteraction( - "get_public_room_changes", get_public_room_changes_txn + ret_val = yield self.runInteraction( + "get_largest_public_rooms", _get_largest_public_rooms_txn ) + defer.returnValue(ret_val) @cached(max_entries=10000) def is_room_blocked(self, room_id): diff --git a/synapse/storage/schema/delta/56/public_room_list_idx.sql b/synapse/storage/schema/delta/56/public_room_list_idx.sql new file mode 100644 index 0000000000..7be31ffebb --- /dev/null +++ b/synapse/storage/schema/delta/56/public_room_list_idx.sql @@ -0,0 +1,16 @@ +/* Copyright 2019 The Matrix.org Foundation C.I.C. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +CREATE INDEX public_room_list_stream_network ON public_room_list_stream (appservice_id, network_id, room_id); diff --git a/tests/handlers/test_roomlist.py b/tests/handlers/test_roomlist.py deleted file mode 100644 index 61eebb6985..0000000000 --- a/tests/handlers/test_roomlist.py +++ /dev/null @@ -1,39 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright 2018 New Vector Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from synapse.handlers.room_list import RoomListNextBatch - -import tests.unittest -import tests.utils - - -class RoomListTestCase(tests.unittest.TestCase): - """ Tests RoomList's RoomListNextBatch. """ - - def setUp(self): - pass - - def test_check_read_batch_tokens(self): - batch_token = RoomListNextBatch( - stream_ordering="abcdef", - public_room_stream_id="123", - current_limit=20, - direction_is_forward=True, - ).to_token() - next_batch = RoomListNextBatch.from_token(batch_token) - self.assertEquals(next_batch.stream_ordering, "abcdef") - self.assertEquals(next_batch.public_room_stream_id, "123") - self.assertEquals(next_batch.current_limit, 20) - self.assertEquals(next_batch.direction_is_forward, True) From 03cf4385e098ae73730b9c5ef695fa3f16c1806f Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 2 Oct 2019 15:09:10 +0100 Subject: [PATCH 28/85] Fix public room list pagination. We incorrectly used `room_id` as to bound the result set, even though we order by `joined_members, room_id`, leading to incorrect results after pagination. --- synapse/handlers/room_list.py | 33 +++++++++++++++------- synapse/storage/room.py | 53 ++++++++++++++++++++++++----------- 2 files changed, 59 insertions(+), 27 deletions(-) diff --git a/synapse/handlers/room_list.py b/synapse/handlers/room_list.py index 4e1cc5460f..cfed344d4d 100644 --- a/synapse/handlers/room_list.py +++ b/synapse/handlers/room_list.py @@ -142,12 +142,12 @@ class RoomListHandler(BaseHandler): if since_token: batch_token = RoomListNextBatch.from_token(since_token) - last_room_id = batch_token.last_room_id + bounds = (batch_token.last_joined_members, batch_token.last_room_id) forwards = batch_token.direction_is_forward else: batch_token = None + bounds = None - last_room_id = None forwards = True # we request one more than wanted to see if there are more pages to come @@ -157,7 +157,7 @@ class RoomListHandler(BaseHandler): network_tuple, search_filter, probing_limit, - last_room_id=last_room_id, + bounds=bounds, forwards=forwards, ignore_non_federatable=from_federation, ) @@ -193,30 +193,38 @@ class RoomListHandler(BaseHandler): more_to_come = False if num_results > 0: - final_room_id = results[-1]["room_id"] - initial_room_id = results[0]["room_id"] + final_entry = results[-1] + initial_entry = results[0] if forwards: if batch_token: # If there was a token given then we assume that there # must be previous results. response["prev_batch"] = RoomListNextBatch( - last_room_id=initial_room_id, direction_is_forward=False + last_joined_members=initial_entry["num_joined_members"], + last_room_id=initial_entry["room_id"], + direction_is_forward=False, ).to_token() if more_to_come: response["next_batch"] = RoomListNextBatch( - last_room_id=final_room_id, direction_is_forward=True + last_joined_members=final_entry["num_joined_members"], + last_room_id=final_entry["room_id"], + direction_is_forward=True, ).to_token() else: if batch_token: response["next_batch"] = RoomListNextBatch( - last_room_id=final_room_id, direction_is_forward=True + last_joined_members=final_entry["num_joined_members"], + last_room_id=final_entry["room_id"], + direction_is_forward=True, ).to_token() if more_to_come: response["prev_batch"] = RoomListNextBatch( - last_room_id=initial_room_id, direction_is_forward=False + last_joined_members=initial_entry["num_joined_members"], + last_room_id=initial_entry["room_id"], + direction_is_forward=False, ).to_token() for room in results: @@ -449,12 +457,17 @@ class RoomListNextBatch( namedtuple( "RoomListNextBatch", ( + "last_joined_members", # The count to get rooms after/before "last_room_id", # The room_id to get rooms after/before "direction_is_forward", # Bool if this is a next_batch, false if prev_batch ), ) ): - KEY_DICT = {"last_room_id": "r", "direction_is_forward": "d"} + KEY_DICT = { + "last_joined_members": "m", + "last_room_id": "r", + "direction_is_forward": "d", + } REVERSE_KEY_DICT = {v: k for k, v in KEY_DICT.items()} diff --git a/synapse/storage/room.py b/synapse/storage/room.py index c02787a73d..9b7e31583c 100644 --- a/synapse/storage/room.py +++ b/synapse/storage/room.py @@ -17,6 +17,7 @@ import collections import logging import re +from typing import Optional, Tuple from canonicaljson import json @@ -25,6 +26,7 @@ from twisted.internet import defer from synapse.api.errors import StoreError from synapse.storage._base import SQLBaseStore from synapse.storage.search import SearchStore +from synapse.types import ThirdPartyInstanceID from synapse.util.caches.descriptors import cached, cachedInlineCallbacks logger = logging.getLogger(__name__) @@ -119,24 +121,25 @@ class RoomWorkerStore(SQLBaseStore): @defer.inlineCallbacks def get_largest_public_rooms( self, - network_tuple, - search_filter, - limit, - last_room_id, - forwards, - ignore_non_federatable=False, + network_tuple: Optional[ThirdPartyInstanceID], + search_filter: Optional[dict], + limit: Optional[int], + bounds: Optional[Tuple[int, str]], + forwards: bool, + ignore_non_federatable: bool = False, ): """Gets the largest public rooms (where largest is in terms of joined members, as tracked in the statistics table). Args: - network_tuple (ThirdPartyInstanceID|None): - search_filter (dict|None): - limit (int|None): Maxmimum number of rows to return, unlimited otherwise. - last_room_id (str|None): if present, a room ID which bounds the - result set, and is always *excluded* from the result set. - forwards (bool): true iff going forwards, going backwards otherwise - ignore_non_federatable (bool): If true filters out non-federatable rooms. + network_tuple + search_filter + limit: Maxmimum number of rows to return, unlimited otherwise. + bounds: An uppoer or lower bound to apply to result set if given, + consists of a joined member count and room_id (these are + excluded from result set). + forwards: true iff going forwards, going backwards otherwise + ignore_non_federatable: If true filters out non-federatable rooms. Returns: Rooms in order: biggest number of joined users first. @@ -147,13 +150,29 @@ class RoomWorkerStore(SQLBaseStore): where_clauses = [] query_args = [] - if last_room_id: + # Work out the bounds if we're given them, these bounds look slightly + # odd, but are designed to help query planner use indices by pulling + # out a common bound. + if bounds: + last_joined_members, last_room_id = bounds if forwards: - where_clauses.append("room_id < ?") + where_clauses.append( + """ + joined_members <= ? AND ( + joined_members < ? OR room_id < ? + ) + """ + ) else: - where_clauses.append("? < room_id") + where_clauses.append( + """ + joined_members >= ? AND ( + joined_members > ? OR room_id > ? + ) + """ + ) - query_args += [last_room_id] + query_args += [last_joined_members, last_joined_members, last_room_id] if search_filter and search_filter.get("generic_search_term", None): search_term = "%" + search_filter["generic_search_term"] + "%" From 8e32240e6b650746d73315178af9aeb6dfa9be94 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 2 Oct 2019 15:12:17 +0100 Subject: [PATCH 29/85] Newsfile --- changelog.d/6152.misc | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/6152.misc diff --git a/changelog.d/6152.misc b/changelog.d/6152.misc new file mode 100644 index 0000000000..dfee73c28f --- /dev/null +++ b/changelog.d/6152.misc @@ -0,0 +1 @@ +Improve performance of the public room list directory. From 4c4f44930d2153056dc1b992c571f43f2d360d07 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 2 Oct 2019 15:20:36 +0100 Subject: [PATCH 30/85] Fix not showing non-federatable rooms to remote room list queries --- synapse/storage/room.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/synapse/storage/room.py b/synapse/storage/room.py index 9b7e31583c..615c0d3f65 100644 --- a/synapse/storage/room.py +++ b/synapse/storage/room.py @@ -174,6 +174,9 @@ class RoomWorkerStore(SQLBaseStore): query_args += [last_joined_members, last_joined_members, last_room_id] + if ignore_non_federatable: + where_clauses.append("is_federatable") + if search_filter and search_filter.get("generic_search_term", None): search_term = "%" + search_filter["generic_search_term"] + "%" From ed73f04bef517eddebb3b0f0319d6e3322d1b7ec Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 2 Oct 2019 15:24:33 +0100 Subject: [PATCH 31/85] Newsfile --- changelog.d/6153.misc | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/6153.misc diff --git a/changelog.d/6153.misc b/changelog.d/6153.misc new file mode 100644 index 0000000000..dfee73c28f --- /dev/null +++ b/changelog.d/6153.misc @@ -0,0 +1 @@ +Improve performance of the public room list directory. From 7a5f080f91f42fe011a1ab497acdce481187da5f Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 2 Oct 2019 15:47:22 +0100 Subject: [PATCH 32/85] Fix appservice room list pagination --- synapse/storage/room.py | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/synapse/storage/room.py b/synapse/storage/room.py index 9b7e31583c..70bd719521 100644 --- a/synapse/storage/room.py +++ b/synapse/storage/room.py @@ -150,6 +150,24 @@ class RoomWorkerStore(SQLBaseStore): where_clauses = [] query_args = [] + if network_tuple: + if network_tuple.appservice_id: + published_sql = """ + SELECT room_id from appservice_room_list + WHERE appservice_id = ? AND network_id = ? + """ + query_args.append(network_tuple.appservice_id) + query_args.append(network_tuple.network_id) + else: + published_sql = """ + SELECT room_id FROM rooms WHERE is_public + """ + else: + published_sql = """ + SELECT room_id FROM rooms WHERE is_public + UNION SELECT room_id from appservice_room_list + """ + # Work out the bounds if we're given them, these bounds look slightly # odd, but are designed to help query planner use indices by pulling # out a common bound. @@ -188,24 +206,6 @@ class RoomWorkerStore(SQLBaseStore): ) query_args += [search_term, search_term, search_term] - if network_tuple: - if network_tuple.appservice_id: - published_sql = """ - SELECT room_id from appservice_room_list - WHERE appservice_id = ? AND network_id = ? - """ - query_args.append(network_tuple.appservice_id) - query_args.append(network_tuple.network_id) - else: - published_sql = """ - SELECT room_id FROM rooms WHERE is_public - """ - else: - published_sql = """ - SELECT room_id FROM rooms WHERE is_public - UNION SELECT room_id from appservice_room_list - """ - where_clause = "" if where_clauses: where_clause = " AND " + " AND ".join(where_clauses) From 5be4083306c294ab5905683d32c5fa8c90219c95 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 2 Oct 2019 15:48:36 +0100 Subject: [PATCH 33/85] Newsfile --- changelog.d/6154.misc | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/6154.misc diff --git a/changelog.d/6154.misc b/changelog.d/6154.misc new file mode 100644 index 0000000000..dfee73c28f --- /dev/null +++ b/changelog.d/6154.misc @@ -0,0 +1 @@ +Improve performance of the public room list directory. From 6527fa18c1e6f9bcb22318916b5e9534c91c84c1 Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Wed, 2 Oct 2019 14:44:58 +0100 Subject: [PATCH 34/85] Add test case --- synapse/handlers/federation.py | 2 +- tests/handlers/test_federation.py | 83 +++++++++++++++++++++++++++++++ 2 files changed, 84 insertions(+), 1 deletion(-) create mode 100644 tests/handlers/test_federation.py diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 75d79bb8e4..91f3a69298 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -2570,7 +2570,7 @@ class FederationHandler(BaseHandler): ) try: - self.auth.check_from_context(room_version, event, context) + yield self.auth.check_from_context(room_version, event, context) except AuthError as e: logger.warn("Denying third party invite %r because %s", event, e) raise e diff --git a/tests/handlers/test_federation.py b/tests/handlers/test_federation.py new file mode 100644 index 0000000000..20416a0142 --- /dev/null +++ b/tests/handlers/test_federation.py @@ -0,0 +1,83 @@ +# -*- coding: utf-8 -*- +# Copyright 2019 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from synapse.api.constants import EventTypes +from synapse.api.errors import AuthError, Codes +from synapse.rest import admin +from synapse.rest.client.v1 import login, room + +from tests import unittest + + +class FederationTestCase(unittest.HomeserverTestCase): + servlets = [ + admin.register_servlets, + login.register_servlets, + room.register_servlets, + ] + + def make_homeserver(self, reactor, clock): + hs = self.setup_test_homeserver(http_client=None) + self.handler = hs.get_handlers().federation_handler + self.store = hs.get_datastore() + return hs + + def test_exchange_revoked_invite(self): + user_id = self.register_user("kermit", "test") + tok = self.login("kermit", "test") + + room_id = self.helper.create_room_as( + room_creator=user_id, tok=tok + ) + + # Send a 3PID invite event with an empty body so it's considered as a revoked one. + invite_token = "sometoken" + self.helper.send_state( + room_id=room_id, + event_type=EventTypes.ThirdPartyInvite, + state_key=invite_token, + body={}, + tok=tok, + ) + + d = self.handler.on_exchange_third_party_invite_request( + room_id=room_id, + event_dict={ + "type": EventTypes.Member, + "room_id": room_id, + "sender": user_id, + "state_key": "@someone:example.org", + "content": { + "membership": "invite", + "third_party_invite": { + "display_name": "alice", + "signed": { + "mxid": "@alice:localhost", + "token": invite_token, + "signatures": { + "magic.forest": { + "ed25519:3": "fQpGIW1Snz+pwLZu6sTy2aHy/DYWWTspTJRPyNp0PKkymfIsNffysMl6ObMMFdIJhk6g6pwlIqZ54rxo8SLmAg" + } + } + } + } + } + } + ) + + failure = self.get_failure(d, AuthError).value + + self.assertEqual(failure.code, 403, failure) + self.assertEqual(failure.errcode, Codes.FORBIDDEN, failure) + self.assertEqual(failure.msg, "You are not invited to this room.") From ebcb6a30d7b1bdb859a1fd22d567b163a1488763 Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Thu, 3 Oct 2019 11:29:07 +0100 Subject: [PATCH 35/85] Lint --- tests/handlers/test_federation.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/handlers/test_federation.py b/tests/handlers/test_federation.py index 20416a0142..a18dfc0e96 100644 --- a/tests/handlers/test_federation.py +++ b/tests/handlers/test_federation.py @@ -69,11 +69,11 @@ class FederationTestCase(unittest.HomeserverTestCase): "magic.forest": { "ed25519:3": "fQpGIW1Snz+pwLZu6sTy2aHy/DYWWTspTJRPyNp0PKkymfIsNffysMl6ObMMFdIJhk6g6pwlIqZ54rxo8SLmAg" } - } - } - } - } - } + }, + }, + }, + }, + }, ) failure = self.get_failure(d, AuthError).value From 8a5e8e829b98687ea274fae47db3aa801b6f97d3 Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Thu, 3 Oct 2019 11:30:43 +0100 Subject: [PATCH 36/85] Lint (again) --- tests/handlers/test_federation.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/handlers/test_federation.py b/tests/handlers/test_federation.py index a18dfc0e96..d56220f403 100644 --- a/tests/handlers/test_federation.py +++ b/tests/handlers/test_federation.py @@ -37,9 +37,7 @@ class FederationTestCase(unittest.HomeserverTestCase): user_id = self.register_user("kermit", "test") tok = self.login("kermit", "test") - room_id = self.helper.create_room_as( - room_creator=user_id, tok=tok - ) + room_id = self.helper.create_room_as(room_creator=user_id, tok=tok) # Send a 3PID invite event with an empty body so it's considered as a revoked one. invite_token = "sometoken" From c8145af8a9446911bbb52fc0114eebf4eebede4b Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 3 Oct 2019 17:11:04 +0100 Subject: [PATCH 37/85] Cache room membership lookups in _get_joined_users_from_context --- synapse/storage/roommember.py | 64 ++++++++++++++++++++++++----------- 1 file changed, 45 insertions(+), 19 deletions(-) diff --git a/synapse/storage/roommember.py b/synapse/storage/roommember.py index 4df8ebdacd..f11bbd05f8 100644 --- a/synapse/storage/roommember.py +++ b/synapse/storage/roommember.py @@ -16,6 +16,7 @@ import logging from collections import namedtuple +from typing import Iterable from six import iteritems, itervalues @@ -32,7 +33,7 @@ from synapse.storage.events_worker import EventsWorkerStore from synapse.types import get_domain_from_id from synapse.util.async_helpers import Linearizer from synapse.util.caches import intern_string -from synapse.util.caches.descriptors import cached, cachedInlineCallbacks +from synapse.util.caches.descriptors import cached, cachedInlineCallbacks, cachedList from synapse.util.stringutils import to_ascii logger = logging.getLogger(__name__) @@ -567,25 +568,10 @@ class RoomMemberWorkerStore(EventsWorkerStore): missing_member_event_ids.append(event_id) if missing_member_event_ids: - rows = yield self._simple_select_many_batch( - table="room_memberships", - column="event_id", - iterable=missing_member_event_ids, - retcols=("user_id", "display_name", "avatar_url"), - keyvalues={"membership": Membership.JOIN}, - batch_size=500, - desc="_get_joined_users_from_context", - ) - - users_in_room.update( - { - to_ascii(row["user_id"]): ProfileInfo( - avatar_url=to_ascii(row["avatar_url"]), - display_name=to_ascii(row["display_name"]), - ) - for row in rows - } + event_to_memberships = yield self._get_membership_from_event_ids( + missing_member_event_ids ) + users_in_room.update((row for row in event_to_memberships.values() if row)) if event is not None and event.type == EventTypes.Member: if event.membership == Membership.JOIN: @@ -597,6 +583,46 @@ class RoomMemberWorkerStore(EventsWorkerStore): return users_in_room + @cached(max_entries=10000) + def _get_membership_from_event_id(self, event_id): + raise NotADirectoryError() + + @cachedList( + cached_method_name="_get_membership_from_event_id", + list_name="event_ids", + inlineCallbacks=True, + ) + def _get_membership_from_event_ids(self, event_ids: Iterable[str]): + """Lookup profile info for set of member event IDs. + + Args: + event_ids: The member event IDs to lookup + + Returns: + Deferred[dict[str, Tuple[str, ProfileInfo]|None]]: Map from event ID + to `user_id` and ProfileInfo (or None if couldn't find event). + """ + + rows = yield self._simple_select_many_batch( + table="room_memberships", + column="event_id", + iterable=event_ids, + retcols=("user_id", "display_name", "avatar_url"), + keyvalues={"membership": Membership.JOIN}, + batch_size=500, + desc="_get_membership_from_event_ids", + ) + + return { + row["event_id"]: ( + row["user_id"], + ProfileInfo( + avatar_url=row["avatar_url"], display_name=row["display_name"] + ), + ) + for row in rows + } + @cachedInlineCallbacks(max_entries=10000) def is_host_joined(self, room_id, host): if "%" in host or "_" in host: From 0ccf0ffc855f8d12f16598af77f356f236096994 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 3 Oct 2019 17:12:24 +0100 Subject: [PATCH 38/85] Newsfile --- changelog.d/6159.misc | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/6159.misc diff --git a/changelog.d/6159.misc b/changelog.d/6159.misc new file mode 100644 index 0000000000..06cc163f8b --- /dev/null +++ b/changelog.d/6159.misc @@ -0,0 +1 @@ +Add more caching to `_get_joined_users_from_context` DB query. From d89ebf7c25b4f55d513da41a3ea20b9f8adc62d1 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 3 Oct 2019 17:23:11 +0100 Subject: [PATCH 39/85] cachedList descriptor doesn't like typing --- synapse/storage/roommember.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/synapse/storage/roommember.py b/synapse/storage/roommember.py index f11bbd05f8..ef6179cbe5 100644 --- a/synapse/storage/roommember.py +++ b/synapse/storage/roommember.py @@ -592,11 +592,11 @@ class RoomMemberWorkerStore(EventsWorkerStore): list_name="event_ids", inlineCallbacks=True, ) - def _get_membership_from_event_ids(self, event_ids: Iterable[str]): + def _get_membership_from_event_ids(self, event_ids): """Lookup profile info for set of member event IDs. Args: - event_ids: The member event IDs to lookup + event_ids (Iterable[str]): The member event IDs to lookup Returns: Deferred[dict[str, Tuple[str, ProfileInfo]|None]]: Map from event ID From a9610cdf02e04ecd8df52ebe74b1cb1338a5be97 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 3 Oct 2019 17:26:56 +0100 Subject: [PATCH 40/85] Fixup names and comments --- synapse/storage/roommember.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/synapse/storage/roommember.py b/synapse/storage/roommember.py index ef6179cbe5..37c0723eb6 100644 --- a/synapse/storage/roommember.py +++ b/synapse/storage/roommember.py @@ -568,7 +568,7 @@ class RoomMemberWorkerStore(EventsWorkerStore): missing_member_event_ids.append(event_id) if missing_member_event_ids: - event_to_memberships = yield self._get_membership_from_event_ids( + event_to_memberships = yield self._get_joined_profiles_from_event_ids( missing_member_event_ids ) users_in_room.update((row for row in event_to_memberships.values() if row)) @@ -584,23 +584,24 @@ class RoomMemberWorkerStore(EventsWorkerStore): return users_in_room @cached(max_entries=10000) - def _get_membership_from_event_id(self, event_id): + def _get_joined_profile_from_event_id(self, event_id): raise NotADirectoryError() @cachedList( - cached_method_name="_get_membership_from_event_id", + cached_method_name="_get_joined_profile_from_event_id", list_name="event_ids", inlineCallbacks=True, ) - def _get_membership_from_event_ids(self, event_ids): - """Lookup profile info for set of member event IDs. + def _get_joined_profiles_from_event_ids(self, event_ids): + """For given set of member event_ids check if they point to a join + event and if so return the associated user and profile info. Args: event_ids (Iterable[str]): The member event IDs to lookup Returns: Deferred[dict[str, Tuple[str, ProfileInfo]|None]]: Map from event ID - to `user_id` and ProfileInfo (or None if couldn't find event). + to `user_id` and ProfileInfo (or None if not join event). """ rows = yield self._simple_select_many_batch( From 84691da6c3058f265f8b86d9a6592ba8ce90e2ed Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 3 Oct 2019 17:27:18 +0100 Subject: [PATCH 41/85] pep8 --- synapse/storage/roommember.py | 1 - 1 file changed, 1 deletion(-) diff --git a/synapse/storage/roommember.py b/synapse/storage/roommember.py index 37c0723eb6..ed7d936b3d 100644 --- a/synapse/storage/roommember.py +++ b/synapse/storage/roommember.py @@ -16,7 +16,6 @@ import logging from collections import namedtuple -from typing import Iterable from six import iteritems, itervalues From 91f61fc6d74e923822eb92933e0d028848285a40 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 3 Oct 2019 17:28:31 +0100 Subject: [PATCH 42/85] Use the right error.... --- synapse/storage/roommember.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/storage/roommember.py b/synapse/storage/roommember.py index ed7d936b3d..1160b98ccc 100644 --- a/synapse/storage/roommember.py +++ b/synapse/storage/roommember.py @@ -584,7 +584,7 @@ class RoomMemberWorkerStore(EventsWorkerStore): @cached(max_entries=10000) def _get_joined_profile_from_event_id(self, event_id): - raise NotADirectoryError() + raise NotImplementedError() @cachedList( cached_method_name="_get_joined_profile_from_event_id", From 693156aaf4579f48ad265f42f90b1bd73feda129 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 3 Oct 2019 17:33:54 +0100 Subject: [PATCH 43/85] Don't regenerate numeric user ID if registration fails. This causes huge amounts of DB IO if registrations start to fail e.g. because the DB is struggling with IO. --- synapse/handlers/register.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/synapse/handlers/register.py b/synapse/handlers/register.py index 06bd03b77c..6dd7ef3745 100644 --- a/synapse/handlers/register.py +++ b/synapse/handlers/register.py @@ -220,7 +220,7 @@ class RegistrationHandler(BaseHandler): attempts = 0 user = None while not user: - localpart = yield self._generate_user_id(attempts > 0) + localpart = yield self._generate_user_id() user = UserID(localpart, self.hs.hostname) user_id = user.to_string() yield self.check_user_id_not_appservice_exclusive(user_id) @@ -379,10 +379,10 @@ class RegistrationHandler(BaseHandler): ) @defer.inlineCallbacks - def _generate_user_id(self, reseed=False): - if reseed or self._next_generated_user_id is None: + def _generate_user_id(self): + if self._next_generated_user_id is None: with (yield self._generate_user_id_linearizer.queue(())): - if reseed or self._next_generated_user_id is None: + if self._next_generated_user_id is None: self._next_generated_user_id = ( yield self.store.find_next_generated_user_id_localpart() ) From 4fc60f12deef19407e9b761f3d9c24c48384118c Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 3 Oct 2019 17:35:50 +0100 Subject: [PATCH 44/85] Newsfile --- changelog.d/6161.misc | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/6161.misc diff --git a/changelog.d/6161.misc b/changelog.d/6161.misc new file mode 100644 index 0000000000..7c5d61cb86 --- /dev/null +++ b/changelog.d/6161.misc @@ -0,0 +1 @@ +Don't regenerate numeric user ID if registration fails. From ab8a64772b9e663e74fbdafef9a729bd49369e65 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 3 Oct 2019 17:42:32 +0100 Subject: [PATCH 45/85] Remove unused variable --- synapse/handlers/register.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/synapse/handlers/register.py b/synapse/handlers/register.py index 6dd7ef3745..53410f120b 100644 --- a/synapse/handlers/register.py +++ b/synapse/handlers/register.py @@ -217,7 +217,6 @@ class RegistrationHandler(BaseHandler): else: # autogen a sequential user ID - attempts = 0 user = None while not user: localpart = yield self._generate_user_id() @@ -238,7 +237,6 @@ class RegistrationHandler(BaseHandler): # if user id is taken, just generate another user = None user_id = None - attempts += 1 if not self.hs.config.user_consent_at_registration: yield self._auto_join_rooms(user_id) From 0186ec9df7e55e35fa9b6579869cd308dc178a3c Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 3 Oct 2019 17:46:43 +0100 Subject: [PATCH 46/85] Fixup newsfile --- changelog.d/6161.bugfix | 1 + changelog.d/6161.misc | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) create mode 100644 changelog.d/6161.bugfix delete mode 100644 changelog.d/6161.misc diff --git a/changelog.d/6161.bugfix b/changelog.d/6161.bugfix new file mode 100644 index 0000000000..a0e2adb979 --- /dev/null +++ b/changelog.d/6161.bugfix @@ -0,0 +1 @@ +Fix bug where guest account registration can wedge after restart. diff --git a/changelog.d/6161.misc b/changelog.d/6161.misc deleted file mode 100644 index 7c5d61cb86..0000000000 --- a/changelog.d/6161.misc +++ /dev/null @@ -1 +0,0 @@ -Don't regenerate numeric user ID if registration fails. From 66537e10ce77e47fac52e3f27569ac1ef0f1aaa3 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Thu, 3 Oct 2019 17:47:20 +0100 Subject: [PATCH 47/85] add some metrics on the federation sender (#6160) --- changelog.d/6160.misc | 1 + synapse/federation/sender/__init__.py | 11 ++++++----- synapse/state/__init__.py | 24 ++++++++++++++++++------ synapse/storage/roommember.py | 21 +++++++++++++++------ synapse/util/metrics.py | 6 ++++-- 5 files changed, 44 insertions(+), 19 deletions(-) create mode 100644 changelog.d/6160.misc diff --git a/changelog.d/6160.misc b/changelog.d/6160.misc new file mode 100644 index 0000000000..3d7cce00e1 --- /dev/null +++ b/changelog.d/6160.misc @@ -0,0 +1 @@ +Add some metrics on the federation sender. diff --git a/synapse/federation/sender/__init__.py b/synapse/federation/sender/__init__.py index d46f4aaeb1..2b2ee8612a 100644 --- a/synapse/federation/sender/__init__.py +++ b/synapse/federation/sender/__init__.py @@ -38,7 +38,7 @@ from synapse.metrics import ( events_processed_counter, ) from synapse.metrics.background_process_metrics import run_as_background_process -from synapse.util.metrics import measure_func +from synapse.util.metrics import Measure, measure_func logger = logging.getLogger(__name__) @@ -183,8 +183,8 @@ class FederationSender(object): # Otherwise if the last member on a server in a room is # banned then it won't receive the event because it won't # be in the room after the ban. - destinations = yield self.state.get_current_hosts_in_room( - event.room_id, latest_event_ids=event.prev_event_ids() + destinations = yield self.state.get_hosts_in_room_at_events( + event.room_id, event_ids=event.prev_event_ids() ) except Exception: logger.exception( @@ -207,8 +207,9 @@ class FederationSender(object): @defer.inlineCallbacks def handle_room_events(events): - for event in events: - yield handle_event(event) + with Measure(self.clock, "handle_room_events"): + for event in events: + yield handle_event(event) events_by_room = {} for event in events: diff --git a/synapse/state/__init__.py b/synapse/state/__init__.py index 2b0f4c79ee..dc9f5a9008 100644 --- a/synapse/state/__init__.py +++ b/synapse/state/__init__.py @@ -33,7 +33,7 @@ from synapse.state import v1, v2 from synapse.util.async_helpers import Linearizer from synapse.util.caches import get_cache_factor_for from synapse.util.caches.expiringcache import ExpiringCache -from synapse.util.metrics import Measure +from synapse.util.metrics import Measure, measure_func logger = logging.getLogger(__name__) @@ -191,11 +191,22 @@ class StateHandler(object): return joined_users @defer.inlineCallbacks - def get_current_hosts_in_room(self, room_id, latest_event_ids=None): - if not latest_event_ids: - latest_event_ids = yield self.store.get_latest_event_ids_in_room(room_id) - logger.debug("calling resolve_state_groups from get_current_hosts_in_room") - entry = yield self.resolve_state_groups_for_events(room_id, latest_event_ids) + def get_current_hosts_in_room(self, room_id): + event_ids = yield self.store.get_latest_event_ids_in_room(room_id) + return (yield self.get_hosts_in_room_at_events(room_id, event_ids)) + + @defer.inlineCallbacks + def get_hosts_in_room_at_events(self, room_id, event_ids): + """Get the hosts that were in a room at the given event ids + + Args: + room_id (str): + event_ids (list[str]): + + Returns: + Deferred[list[str]]: the hosts in the room at the given events + """ + entry = yield self.resolve_state_groups_for_events(room_id, event_ids) joined_hosts = yield self.store.get_joined_hosts(room_id, entry) return joined_hosts @@ -344,6 +355,7 @@ class StateHandler(object): return context + @measure_func() @defer.inlineCallbacks def resolve_state_groups_for_events(self, room_id, event_ids): """ Given a list of event_ids this method fetches the state at each diff --git a/synapse/storage/roommember.py b/synapse/storage/roommember.py index 4df8ebdacd..1550d827ba 100644 --- a/synapse/storage/roommember.py +++ b/synapse/storage/roommember.py @@ -33,6 +33,7 @@ from synapse.types import get_domain_from_id from synapse.util.async_helpers import Linearizer from synapse.util.caches import intern_string from synapse.util.caches.descriptors import cached, cachedInlineCallbacks +from synapse.util.metrics import Measure from synapse.util.stringutils import to_ascii logger = logging.getLogger(__name__) @@ -483,6 +484,7 @@ class RoomMemberWorkerStore(EventsWorkerStore): ) return result + @defer.inlineCallbacks def get_joined_users_from_state(self, room_id, state_entry): state_group = state_entry.state_group if not state_group: @@ -492,9 +494,12 @@ class RoomMemberWorkerStore(EventsWorkerStore): # To do this we set the state_group to a new object as object() != object() state_group = object() - return self._get_joined_users_from_context( - room_id, state_group, state_entry.state, context=state_entry - ) + with Measure(self._clock, "get_joined_users_from_state"): + return ( + yield self._get_joined_users_from_context( + room_id, state_group, state_entry.state, context=state_entry + ) + ) @cachedInlineCallbacks( num_args=2, cache_context=True, iterable=True, max_entries=100000 @@ -669,6 +674,7 @@ class RoomMemberWorkerStore(EventsWorkerStore): return True + @defer.inlineCallbacks def get_joined_hosts(self, room_id, state_entry): state_group = state_entry.state_group if not state_group: @@ -678,9 +684,12 @@ class RoomMemberWorkerStore(EventsWorkerStore): # To do this we set the state_group to a new object as object() != object() state_group = object() - return self._get_joined_hosts( - room_id, state_group, state_entry.state, state_entry=state_entry - ) + with Measure(self._clock, "get_joined_hosts"): + return ( + yield self._get_joined_hosts( + room_id, state_group, state_entry.state, state_entry=state_entry + ) + ) @cachedInlineCallbacks(num_args=2, max_entries=10000, iterable=True) # @defer.inlineCallbacks diff --git a/synapse/util/metrics.py b/synapse/util/metrics.py index 0910930c21..4b1bcdf23c 100644 --- a/synapse/util/metrics.py +++ b/synapse/util/metrics.py @@ -60,12 +60,14 @@ in_flight = InFlightGauge( ) -def measure_func(name): +def measure_func(name=None): def wrapper(func): + block_name = func.__name__ if name is None else name + @wraps(func) @defer.inlineCallbacks def measured_func(self, *args, **kwargs): - with Measure(self.clock, name): + with Measure(self.clock, block_name): r = yield func(self, *args, **kwargs) return r From 39b40d6d9989b09de39da5b6d3f85ee535e41138 Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Fri, 4 Oct 2019 10:34:52 +0200 Subject: [PATCH 48/85] media/thumbnailer: Better quality for 1-bit / 8-bit color palette images (#2142) Pillow will use nearest neighbour as the resampling algorithm if the source image is either 1-bit or a color palette using 8 bits. If we convert to RGB before scaling, we'll probably get a better result. --- changelog.d/2142.feature | 1 + synapse/rest/media/v1/thumbnailer.py | 14 +++++++++++--- 2 files changed, 12 insertions(+), 3 deletions(-) create mode 100644 changelog.d/2142.feature diff --git a/changelog.d/2142.feature b/changelog.d/2142.feature new file mode 100644 index 0000000000..e21e8325e1 --- /dev/null +++ b/changelog.d/2142.feature @@ -0,0 +1 @@ +Improve quality of thumbnails for 1-bit/8-bit color palette images. diff --git a/synapse/rest/media/v1/thumbnailer.py b/synapse/rest/media/v1/thumbnailer.py index c995d7e043..8cf415e29d 100644 --- a/synapse/rest/media/v1/thumbnailer.py +++ b/synapse/rest/media/v1/thumbnailer.py @@ -82,13 +82,21 @@ class Thumbnailer(object): else: return (max_height * self.width) // self.height, max_height + def _resize(self, width, height): + # 1-bit or 8-bit color palette images need converting to RGB + # otherwise they will be scaled using nearest neighbour which + # looks awful + if self.image.mode in ["1", "P"]: + self.image = self.image.convert("RGB") + return self.image.resize((width, height), Image.ANTIALIAS) + def scale(self, width, height, output_type): """Rescales the image to the given dimensions. Returns: BytesIO: the bytes of the encoded image ready to be written to disk """ - scaled = self.image.resize((width, height), Image.ANTIALIAS) + scaled = self._resize(width, height) return self._encode_image(scaled, output_type) def crop(self, width, height, output_type): @@ -107,13 +115,13 @@ class Thumbnailer(object): """ if width * self.height > height * self.width: scaled_height = (width * self.height) // self.width - scaled_image = self.image.resize((width, scaled_height), Image.ANTIALIAS) + scaled_image = self._resize(width, scaled_height) crop_top = (scaled_height - height) // 2 crop_bottom = height + crop_top cropped = scaled_image.crop((0, crop_top, width, crop_bottom)) else: scaled_width = (height * self.width) // self.height - scaled_image = self.image.resize((scaled_width, height), Image.ANTIALIAS) + scaled_image = self._resize(scaled_width, height) crop_left = (scaled_width - width) // 2 crop_right = width + crop_left cropped = scaled_image.crop((crop_left, 0, crop_right, height)) From 13c4345c844e75b0d1a4ce66e4fb2eb9820cb7f6 Mon Sep 17 00:00:00 2001 From: Alexander Maznev Date: Fri, 4 Oct 2019 04:34:16 -0500 Subject: [PATCH 49/85] Update `user_filters` table to have a unique index, and non-null columns (#1172) --- changelog.d/1172.misc | 1 + .../delta/56/unique_user_filter_index.py | 46 +++++++++++++++++++ 2 files changed, 47 insertions(+) create mode 100644 changelog.d/1172.misc create mode 100644 synapse/storage/schema/delta/56/unique_user_filter_index.py diff --git a/changelog.d/1172.misc b/changelog.d/1172.misc new file mode 100644 index 0000000000..30b3e56082 --- /dev/null +++ b/changelog.d/1172.misc @@ -0,0 +1 @@ +Update `user_filters` table to have a unique index, and non-null columns. Thanks to @pik for contributing this. \ No newline at end of file diff --git a/synapse/storage/schema/delta/56/unique_user_filter_index.py b/synapse/storage/schema/delta/56/unique_user_filter_index.py new file mode 100644 index 0000000000..4efc1a586f --- /dev/null +++ b/synapse/storage/schema/delta/56/unique_user_filter_index.py @@ -0,0 +1,46 @@ +import logging + +from synapse.storage.engines import PostgresEngine + +logger = logging.getLogger(__name__) + + +def run_upgrade(cur, database_engine, *args, **kwargs): + if isinstance(database_engine, PostgresEngine): + select_clause = """ + CREATE TEMPORARY TABLE user_filters_migration AS + SELECT DISTINCT ON (user_id, filter_id) user_id, filter_id, filter_json + FROM user_filters; + """ + else: + select_clause = """ + CREATE TEMPORARY TABLE user_filters_migration AS + SELECT * FROM user_filters GROUP BY user_id, filter_id; + """ + sql = ( + """ + BEGIN; + %s + DROP INDEX user_filters_by_user_id_filter_id; + DELETE FROM user_filters; + ALTER TABLE user_filters + ALTER COLUMN user_id SET NOT NULL + ALTER COLUMN filter_id SET NOT NULL + ALTER COLUMN filter_json SET NOT NULL; + INSERT INTO user_filters(user_id, filter_id, filter_json) + SELECT * FROM user_filters_migration; + DROP TABLE user_filters_migration; + CREATE UNIQUE INDEX user_filters_by_user_id_filter_id_unique + ON user_filters(user_id, filter_id); + END; + """ + % select_clause + ) + if isinstance(database_engine, PostgresEngine): + cur.execute(sql) + else: + cur.executescript(sql) + + +def run_create(cur, database_engine, *args, **kwargs): + pass From 81d51ce48b449ee55bdcc17b76050283d848d405 Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Fri, 4 Oct 2019 11:16:19 +0100 Subject: [PATCH 50/85] Incorporate review --- synapse/handlers/federation.py | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 91f3a69298..58a1654885 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -2600,20 +2600,14 @@ class FederationHandler(BaseHandler): ) if original_invite: # If the m.room.third_party_invite event's content is empty, it means the - # invite has been revoked. - if original_invite.content: - display_name = original_invite.content["display_name"] - event_dict["content"]["third_party_invite"][ - "display_name" - ] = display_name - else: - # Don't discard or raise an error here because that's not the right place - # to do auth checks. The auth check will fail on this invite because we - # won't be able to fetch public keys from the m.room.third_party_invite - # event's content (because it's empty). - logger.info( - "Found invite event for third_party_invite but it has been revoked" - ) + # invite has been revoked. In this case, we don't have to raise an error here + # because the auth check will fail on the invite (because it's not able to + # fetch public keys from the m.room.third_party_invite event's content, which + # is empty. + display_name = original_invite.content.get("display_name") + event_dict["content"]["third_party_invite"][ + "display_name" + ] = display_name else: logger.info( "Could not find invite event for third_party_invite: %r", event_dict From 4676732ca061ede3089b9c9b97a6d6b523b8c8e0 Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Fri, 4 Oct 2019 11:18:28 +0100 Subject: [PATCH 51/85] Lint --- synapse/handlers/federation.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 58a1654885..b2b3a7e221 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -2605,9 +2605,7 @@ class FederationHandler(BaseHandler): # fetch public keys from the m.room.third_party_invite event's content, which # is empty. display_name = original_invite.content.get("display_name") - event_dict["content"]["third_party_invite"][ - "display_name" - ] = display_name + event_dict["content"]["third_party_invite"]["display_name"] = display_name else: logger.info( "Could not find invite event for third_party_invite: %r", event_dict From 21d51ab59852d6a4d504a6ccd79ad82070c03a12 Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Fri, 4 Oct 2019 11:21:24 +0100 Subject: [PATCH 52/85] Typo --- synapse/handlers/federation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index b2b3a7e221..50fc0fde2a 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -2603,7 +2603,7 @@ class FederationHandler(BaseHandler): # invite has been revoked. In this case, we don't have to raise an error here # because the auth check will fail on the invite (because it's not able to # fetch public keys from the m.room.third_party_invite event's content, which - # is empty. + # is empty). display_name = original_invite.content.get("display_name") event_dict["content"]["third_party_invite"]["display_name"] = display_name else: From 5119a4cac7f42691beb455eedbefd99a39d64897 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 7 Oct 2019 12:21:17 +0100 Subject: [PATCH 53/85] Fix bug where we didn't pull out event ID --- synapse/storage/roommember.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/storage/roommember.py b/synapse/storage/roommember.py index 81a9ab6dc8..59a89fad60 100644 --- a/synapse/storage/roommember.py +++ b/synapse/storage/roommember.py @@ -612,7 +612,7 @@ class RoomMemberWorkerStore(EventsWorkerStore): table="room_memberships", column="event_id", iterable=event_ids, - retcols=("user_id", "display_name", "avatar_url"), + retcols=("user_id", "display_name", "avatar_url", "event_id"), keyvalues={"membership": Membership.JOIN}, batch_size=500, desc="_get_membership_from_event_ids", From c8e6c308c6358f20b1d0ef1292f8e9e2f36a549e Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Mon, 7 Oct 2019 13:15:35 +0100 Subject: [PATCH 54/85] Fix unique_user_filter_index schema update --- synapse/storage/schema/delta/56/unique_user_filter_index.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/synapse/storage/schema/delta/56/unique_user_filter_index.py b/synapse/storage/schema/delta/56/unique_user_filter_index.py index 4efc1a586f..60031f23ca 100644 --- a/synapse/storage/schema/delta/56/unique_user_filter_index.py +++ b/synapse/storage/schema/delta/56/unique_user_filter_index.py @@ -24,8 +24,8 @@ def run_upgrade(cur, database_engine, *args, **kwargs): DROP INDEX user_filters_by_user_id_filter_id; DELETE FROM user_filters; ALTER TABLE user_filters - ALTER COLUMN user_id SET NOT NULL - ALTER COLUMN filter_id SET NOT NULL + ALTER COLUMN user_id SET NOT NULL, + ALTER COLUMN filter_id SET NOT NULL, ALTER COLUMN filter_json SET NOT NULL; INSERT INTO user_filters(user_id, filter_id, filter_json) SELECT * FROM user_filters_migration; From aa7a003074e4e42c4ac8a571d2cd18ecfea3990f Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Mon, 7 Oct 2019 13:16:54 +0100 Subject: [PATCH 55/85] Changelog --- changelog.d/6175.bugfix | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/6175.bugfix diff --git a/changelog.d/6175.bugfix b/changelog.d/6175.bugfix new file mode 100644 index 0000000000..3cd9a99edf --- /dev/null +++ b/changelog.d/6175.bugfix @@ -0,0 +1 @@ +Fix syntax error in unique_user_filter_index schema update. From 276ae5c63eaef656d486e190298f7a5ec99a7a5b Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Mon, 7 Oct 2019 14:41:39 +0100 Subject: [PATCH 56/85] add some logging to the rooms stats updates, to try to track down a flaky test (#6167) --- changelog.d/6167.misc | 1 + synapse/handlers/stats.py | 1 + synapse/storage/stats.py | 3 +++ 3 files changed, 5 insertions(+) create mode 100644 changelog.d/6167.misc diff --git a/changelog.d/6167.misc b/changelog.d/6167.misc new file mode 100644 index 0000000000..32c96b3681 --- /dev/null +++ b/changelog.d/6167.misc @@ -0,0 +1 @@ +Add some logging to the rooms stats updates, to try to track down a flaky test. diff --git a/synapse/handlers/stats.py b/synapse/handlers/stats.py index cbac7c347a..c62b113115 100644 --- a/synapse/handlers/stats.py +++ b/synapse/handlers/stats.py @@ -293,6 +293,7 @@ class StatsHandler(StateDeltasHandler): room_state["guest_access"] = event_content.get("guest_access") for room_id, state in room_to_state_updates.items(): + logger.info("Updating room_stats_state for %s: %s", room_id, state) yield self.store.update_room_state(room_id, state) return room_to_stats_deltas, user_to_stats_deltas diff --git a/synapse/storage/stats.py b/synapse/storage/stats.py index 09190d684e..7c224cd3d9 100644 --- a/synapse/storage/stats.py +++ b/synapse/storage/stats.py @@ -332,6 +332,9 @@ class StatsStore(StateDeltasStore): def _bulk_update_stats_delta_txn(txn): for stats_type, stats_updates in updates.items(): for stats_id, fields in stats_updates.items(): + logger.info( + "Updating %s stats for %s: %s", stats_type, stats_id, fields + ) self._update_stats_delta_txn( txn, ts=ts, From 1992f21a9fa00a37963bb6ac11d0e678cc08557e Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Mon, 7 Oct 2019 14:54:36 +0100 Subject: [PATCH 57/85] Fix changelog for PR #6175 --- changelog.d/6175.bugfix | 1 - changelog.d/6175.misc | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) delete mode 100644 changelog.d/6175.bugfix create mode 100644 changelog.d/6175.misc diff --git a/changelog.d/6175.bugfix b/changelog.d/6175.bugfix deleted file mode 100644 index 3cd9a99edf..0000000000 --- a/changelog.d/6175.bugfix +++ /dev/null @@ -1 +0,0 @@ -Fix syntax error in unique_user_filter_index schema update. diff --git a/changelog.d/6175.misc b/changelog.d/6175.misc new file mode 100644 index 0000000000..5bb24f02fc --- /dev/null +++ b/changelog.d/6175.misc @@ -0,0 +1 @@ +Update `user_filters` table to have a unique index, and non-null columns. Thanks to @pik for contributing this From dc795ba709f2ffe41671d25d94f21d4b31a5301d Mon Sep 17 00:00:00 2001 From: Michael Kaye <1917473+michaelkaye@users.noreply.github.com> Date: Mon, 7 Oct 2019 15:41:25 +0100 Subject: [PATCH 58/85] Log responder we are using. (#6139) This prevents us logging "Responding to media request with responder %s". --- changelog.d/6139.misc | 1 + synapse/rest/media/v1/_base.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) create mode 100644 changelog.d/6139.misc diff --git a/changelog.d/6139.misc b/changelog.d/6139.misc new file mode 100644 index 0000000000..d4b65e7af8 --- /dev/null +++ b/changelog.d/6139.misc @@ -0,0 +1 @@ +Log responder when responding to media request. diff --git a/synapse/rest/media/v1/_base.py b/synapse/rest/media/v1/_base.py index 5fefee4dde..65bbf00073 100644 --- a/synapse/rest/media/v1/_base.py +++ b/synapse/rest/media/v1/_base.py @@ -195,7 +195,7 @@ def respond_with_responder(request, responder, media_type, file_size, upload_nam respond_404(request) return - logger.debug("Responding to media request with responder %s") + logger.debug("Responding to media request with responder %s", responder) add_file_headers(request, media_type, file_size, upload_name) try: with responder: From 88957199e7edbd33a66d419224df9ba0eb9e604d Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Thu, 3 Oct 2019 17:16:39 +0100 Subject: [PATCH 59/85] Move client_ips's bg updates to a dedicated store --- synapse/storage/client_ips.py | 200 ++++++++++++++++++---------------- 1 file changed, 106 insertions(+), 94 deletions(-) diff --git a/synapse/storage/client_ips.py b/synapse/storage/client_ips.py index bb135166ce..1d89b50f57 100644 --- a/synapse/storage/client_ips.py +++ b/synapse/storage/client_ips.py @@ -33,14 +33,14 @@ logger = logging.getLogger(__name__) LAST_SEEN_GRANULARITY = 120 * 1000 -class ClientIpStore(background_updates.BackgroundUpdateStore): +class ClientIpBackgroundUpdateStore(background_updates.BackgroundUpdateStore): def __init__(self, db_conn, hs): self.client_ip_last_seen = Cache( name="client_ip_last_seen", keylen=4, max_entries=50000 * CACHE_SIZE_FACTOR ) - super(ClientIpStore, self).__init__(db_conn, hs) + super(ClientIpBackgroundUpdateStore, self).__init__(db_conn, hs) self.user_ips_max_age = hs.config.user_ips_max_age @@ -92,19 +92,6 @@ class ClientIpStore(background_updates.BackgroundUpdateStore): "devices_last_seen", self._devices_last_seen_update ) - # (user_id, access_token, ip,) -> (user_agent, device_id, last_seen) - self._batch_row_update = {} - - self._client_ip_looper = self._clock.looping_call( - self._update_client_ips_batch, 5 * 1000 - ) - self.hs.get_reactor().addSystemEventTrigger( - "before", "shutdown", self._update_client_ips_batch - ) - - if self.user_ips_max_age: - self._clock.looping_call(self._prune_old_user_ips, 5 * 1000) - @defer.inlineCallbacks def _remove_user_ip_nonunique(self, progress, batch_size): def f(conn): @@ -303,6 +290,110 @@ class ClientIpStore(background_updates.BackgroundUpdateStore): return batch_size + @defer.inlineCallbacks + def _devices_last_seen_update(self, progress, batch_size): + """Background update to insert last seen info into devices table + """ + + last_user_id = progress.get("last_user_id", "") + last_device_id = progress.get("last_device_id", "") + + def _devices_last_seen_update_txn(txn): + # This consists of two queries: + # + # 1. The sub-query searches for the next N devices and joins + # against user_ips to find the max last_seen associated with + # that device. + # 2. The outer query then joins again against user_ips on + # user/device/last_seen. This *should* hopefully only + # return one row, but if it does return more than one then + # we'll just end up updating the same device row multiple + # times, which is fine. + + if self.database_engine.supports_tuple_comparison: + where_clause = "(user_id, device_id) > (?, ?)" + where_args = [last_user_id, last_device_id] + else: + # We explicitly do a `user_id >= ? AND (...)` here to ensure + # that an index is used, as doing `user_id > ? OR (user_id = ? AND ...)` + # makes it hard for query optimiser to tell that it can use the + # index on user_id + where_clause = "user_id >= ? AND (user_id > ? OR device_id > ?)" + where_args = [last_user_id, last_user_id, last_device_id] + + sql = """ + SELECT + last_seen, ip, user_agent, user_id, device_id + FROM ( + SELECT + user_id, device_id, MAX(u.last_seen) AS last_seen + FROM devices + INNER JOIN user_ips AS u USING (user_id, device_id) + WHERE %(where_clause)s + GROUP BY user_id, device_id + ORDER BY user_id ASC, device_id ASC + LIMIT ? + ) c + INNER JOIN user_ips AS u USING (user_id, device_id, last_seen) + """ % { + "where_clause": where_clause + } + txn.execute(sql, where_args + [batch_size]) + + rows = txn.fetchall() + if not rows: + return 0 + + sql = """ + UPDATE devices + SET last_seen = ?, ip = ?, user_agent = ? + WHERE user_id = ? AND device_id = ? + """ + txn.execute_batch(sql, rows) + + _, _, _, user_id, device_id = rows[-1] + self._background_update_progress_txn( + txn, + "devices_last_seen", + {"last_user_id": user_id, "last_device_id": device_id}, + ) + + return len(rows) + + updated = yield self.runInteraction( + "_devices_last_seen_update", _devices_last_seen_update_txn + ) + + if not updated: + yield self._end_background_update("devices_last_seen") + + return updated + + +class ClientIpStore(ClientIpBackgroundUpdateStore): + def __init__(self, db_conn, hs): + + self.client_ip_last_seen = Cache( + name="client_ip_last_seen", keylen=4, max_entries=50000 * CACHE_SIZE_FACTOR + ) + + super(ClientIpStore, self).__init__(db_conn, hs) + + self.user_ips_max_age = hs.config.user_ips_max_age + + # (user_id, access_token, ip,) -> (user_agent, device_id, last_seen) + self._batch_row_update = {} + + self._client_ip_looper = self._clock.looping_call( + self._update_client_ips_batch, 5 * 1000 + ) + self.hs.get_reactor().addSystemEventTrigger( + "before", "shutdown", self._update_client_ips_batch + ) + + if self.user_ips_max_age: + self._clock.looping_call(self._prune_old_user_ips, 5 * 1000) + @defer.inlineCallbacks def insert_client_ip( self, user_id, access_token, ip, user_agent, device_id, now=None @@ -454,85 +545,6 @@ class ClientIpStore(background_updates.BackgroundUpdateStore): for (access_token, ip), (user_agent, last_seen) in iteritems(results) ) - @defer.inlineCallbacks - def _devices_last_seen_update(self, progress, batch_size): - """Background update to insert last seen info into devices table - """ - - last_user_id = progress.get("last_user_id", "") - last_device_id = progress.get("last_device_id", "") - - def _devices_last_seen_update_txn(txn): - # This consists of two queries: - # - # 1. The sub-query searches for the next N devices and joins - # against user_ips to find the max last_seen associated with - # that device. - # 2. The outer query then joins again against user_ips on - # user/device/last_seen. This *should* hopefully only - # return one row, but if it does return more than one then - # we'll just end up updating the same device row multiple - # times, which is fine. - - if self.database_engine.supports_tuple_comparison: - where_clause = "(user_id, device_id) > (?, ?)" - where_args = [last_user_id, last_device_id] - else: - # We explicitly do a `user_id >= ? AND (...)` here to ensure - # that an index is used, as doing `user_id > ? OR (user_id = ? AND ...)` - # makes it hard for query optimiser to tell that it can use the - # index on user_id - where_clause = "user_id >= ? AND (user_id > ? OR device_id > ?)" - where_args = [last_user_id, last_user_id, last_device_id] - - sql = """ - SELECT - last_seen, ip, user_agent, user_id, device_id - FROM ( - SELECT - user_id, device_id, MAX(u.last_seen) AS last_seen - FROM devices - INNER JOIN user_ips AS u USING (user_id, device_id) - WHERE %(where_clause)s - GROUP BY user_id, device_id - ORDER BY user_id ASC, device_id ASC - LIMIT ? - ) c - INNER JOIN user_ips AS u USING (user_id, device_id, last_seen) - """ % { - "where_clause": where_clause - } - txn.execute(sql, where_args + [batch_size]) - - rows = txn.fetchall() - if not rows: - return 0 - - sql = """ - UPDATE devices - SET last_seen = ?, ip = ?, user_agent = ? - WHERE user_id = ? AND device_id = ? - """ - txn.execute_batch(sql, rows) - - _, _, _, user_id, device_id = rows[-1] - self._background_update_progress_txn( - txn, - "devices_last_seen", - {"last_user_id": user_id, "last_device_id": device_id}, - ) - - return len(rows) - - updated = yield self.runInteraction( - "_devices_last_seen_update", _devices_last_seen_update_txn - ) - - if not updated: - yield self._end_background_update("devices_last_seen") - - return updated - @wrap_as_background_process("prune_old_user_ips") async def _prune_old_user_ips(self): """Removes entries in user IPs older than the configured period. From 2d3b4f42f029da54132b26119e9ec0d902505eef Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Thu, 3 Oct 2019 17:19:25 +0100 Subject: [PATCH 60/85] Move deviceinbox's bg updates to a dedicated store --- synapse/storage/deviceinbox.py | 37 ++++++++++++++++++++-------------- 1 file changed, 22 insertions(+), 15 deletions(-) diff --git a/synapse/storage/deviceinbox.py b/synapse/storage/deviceinbox.py index 6b7458304e..70bc2bb2cc 100644 --- a/synapse/storage/deviceinbox.py +++ b/synapse/storage/deviceinbox.py @@ -208,11 +208,11 @@ class DeviceInboxWorkerStore(SQLBaseStore): ) -class DeviceInboxStore(DeviceInboxWorkerStore, BackgroundUpdateStore): +class DeviceInboxBackgroundUpdateStore(BackgroundUpdateStore): DEVICE_INBOX_STREAM_ID = "device_inbox_stream_drop" def __init__(self, db_conn, hs): - super(DeviceInboxStore, self).__init__(db_conn, hs) + super(DeviceInboxBackgroundUpdateStore, self).__init__(db_conn, hs) self.register_background_index_update( "device_inbox_stream_index", @@ -225,6 +225,26 @@ class DeviceInboxStore(DeviceInboxWorkerStore, BackgroundUpdateStore): self.DEVICE_INBOX_STREAM_ID, self._background_drop_index_device_inbox ) + @defer.inlineCallbacks + def _background_drop_index_device_inbox(self, progress, batch_size): + def reindex_txn(conn): + txn = conn.cursor() + txn.execute("DROP INDEX IF EXISTS device_inbox_stream_id") + txn.close() + + yield self.runWithConnection(reindex_txn) + + yield self._end_background_update(self.DEVICE_INBOX_STREAM_ID) + + return 1 + + +class DeviceInboxStore(DeviceInboxWorkerStore, DeviceInboxBackgroundUpdateStore): + DEVICE_INBOX_STREAM_ID = "device_inbox_stream_drop" + + def __init__(self, db_conn, hs): + super(DeviceInboxStore, self).__init__(db_conn, hs) + # Map of (user_id, device_id) to the last stream_id that has been # deleted up to. This is so that we can no op deletions. self._last_device_delete_cache = ExpiringCache( @@ -435,16 +455,3 @@ class DeviceInboxStore(DeviceInboxWorkerStore, BackgroundUpdateStore): return self.runInteraction( "get_all_new_device_messages", get_all_new_device_messages_txn ) - - @defer.inlineCallbacks - def _background_drop_index_device_inbox(self, progress, batch_size): - def reindex_txn(conn): - txn = conn.cursor() - txn.execute("DROP INDEX IF EXISTS device_inbox_stream_id") - txn.close() - - yield self.runWithConnection(reindex_txn) - - yield self._end_background_update(self.DEVICE_INBOX_STREAM_ID) - - return 1 From cef9f6753e51c1fb7b24f2122b0d0ada767d6e08 Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Thu, 3 Oct 2019 17:24:03 +0100 Subject: [PATCH 61/85] Move devices's bg updates to a dedicated store --- synapse/storage/devices.py | 49 +++++++++++++++++++++----------------- 1 file changed, 27 insertions(+), 22 deletions(-) diff --git a/synapse/storage/devices.py b/synapse/storage/devices.py index 79a58df591..111bfb3d64 100644 --- a/synapse/storage/devices.py +++ b/synapse/storage/devices.py @@ -512,17 +512,9 @@ class DeviceWorkerStore(SQLBaseStore): return results -class DeviceStore(DeviceWorkerStore, BackgroundUpdateStore): +class DeviceBackgroundUpdateStore(BackgroundUpdateStore): def __init__(self, db_conn, hs): - super(DeviceStore, self).__init__(db_conn, hs) - - # Map of (user_id, device_id) -> bool. If there is an entry that implies - # the device exists. - self.device_id_exists_cache = Cache( - name="device_id_exists", keylen=2, max_entries=10000 - ) - - self._clock.looping_call(self._prune_old_outbound_device_pokes, 60 * 60 * 1000) + super(DeviceBackgroundUpdateStore, self).__init__(db_conn, hs) self.register_background_index_update( "device_lists_stream_idx", @@ -555,6 +547,31 @@ class DeviceStore(DeviceWorkerStore, BackgroundUpdateStore): self._drop_device_list_streams_non_unique_indexes, ) + @defer.inlineCallbacks + def _drop_device_list_streams_non_unique_indexes(self, progress, batch_size): + def f(conn): + txn = conn.cursor() + txn.execute("DROP INDEX IF EXISTS device_lists_remote_cache_id") + txn.execute("DROP INDEX IF EXISTS device_lists_remote_extremeties_id") + txn.close() + + yield self.runWithConnection(f) + yield self._end_background_update(DROP_DEVICE_LIST_STREAMS_NON_UNIQUE_INDEXES) + return 1 + + +class DeviceStore(DeviceWorkerStore, DeviceBackgroundUpdateStore): + def __init__(self, db_conn, hs): + super(DeviceStore, self).__init__(db_conn, hs) + + # Map of (user_id, device_id) -> bool. If there is an entry that implies + # the device exists. + self.device_id_exists_cache = Cache( + name="device_id_exists", keylen=2, max_entries=10000 + ) + + self._clock.looping_call(self._prune_old_outbound_device_pokes, 60 * 60 * 1000) + @defer.inlineCallbacks def store_device(self, user_id, device_id, initial_device_display_name): """Ensure the given device is known; add it to the store if not @@ -910,15 +927,3 @@ class DeviceStore(DeviceWorkerStore, BackgroundUpdateStore): "_prune_old_outbound_device_pokes", _prune_txn, ) - - @defer.inlineCallbacks - def _drop_device_list_streams_non_unique_indexes(self, progress, batch_size): - def f(conn): - txn = conn.cursor() - txn.execute("DROP INDEX IF EXISTS device_lists_remote_cache_id") - txn.execute("DROP INDEX IF EXISTS device_lists_remote_extremeties_id") - txn.close() - - yield self.runWithConnection(f) - yield self._end_background_update(DROP_DEVICE_LIST_STREAMS_NON_UNIQUE_INDEXES) - return 1 From 54f87e07342ddbf82e9b8ee7c7ce227624c2f97b Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Thu, 3 Oct 2019 17:30:22 +0100 Subject: [PATCH 62/85] Move media_repository's bg updates to a dedicated store --- synapse/storage/media_repository.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/synapse/storage/media_repository.py b/synapse/storage/media_repository.py index 6b1238ce4a..2eb2740d07 100644 --- a/synapse/storage/media_repository.py +++ b/synapse/storage/media_repository.py @@ -15,11 +15,10 @@ from synapse.storage.background_updates import BackgroundUpdateStore -class MediaRepositoryStore(BackgroundUpdateStore): - """Persistence for attachments and avatars""" +class MediaRepositoryBackgroundUpdateStore(BackgroundUpdateStore): def __init__(self, db_conn, hs): - super(MediaRepositoryStore, self).__init__(db_conn, hs) + super(MediaRepositoryBackgroundUpdateStore, self).__init__(db_conn, hs) self.register_background_index_update( update_name="local_media_repository_url_idx", @@ -29,6 +28,13 @@ class MediaRepositoryStore(BackgroundUpdateStore): where_clause="url_cache IS NOT NULL", ) + +class MediaRepositoryStore(MediaRepositoryBackgroundUpdateStore): + """Persistence for attachments and avatars""" + + def __init__(self, db_conn, hs): + super(MediaRepositoryStore, self).__init__(db_conn, hs) + def get_local_media(self, media_id): """Get the metadata for a local piece of media Returns: From 81e6ffb536b19c662a81736f88ef2f243d425532 Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Thu, 3 Oct 2019 17:44:26 +0100 Subject: [PATCH 63/85] Move registration's bg updates to a dedicated store --- synapse/storage/registration.py | 198 +++++++++++++++++--------------- 1 file changed, 103 insertions(+), 95 deletions(-) diff --git a/synapse/storage/registration.py b/synapse/storage/registration.py index 1a859352b6..1f6c93b73b 100644 --- a/synapse/storage/registration.py +++ b/synapse/storage/registration.py @@ -37,7 +37,57 @@ THIRTY_MINUTES_IN_MS = 30 * 60 * 1000 logger = logging.getLogger(__name__) -class RegistrationWorkerStore(SQLBaseStore): +class RegistrationDeactivationStore(SQLBaseStore): + @cachedInlineCallbacks() + def get_user_deactivated_status(self, user_id): + """Retrieve the value for the `deactivated` property for the provided user. + + Args: + user_id (str): The ID of the user to retrieve the status for. + + Returns: + defer.Deferred(bool): The requested value. + """ + + res = yield self._simple_select_one_onecol( + table="users", + keyvalues={"name": user_id}, + retcol="deactivated", + desc="get_user_deactivated_status", + ) + + # Convert the integer into a boolean. + return res == 1 + + @defer.inlineCallbacks + def set_user_deactivated_status(self, user_id, deactivated): + """Set the `deactivated` property for the provided user to the provided value. + + Args: + user_id (str): The ID of the user to set the status for. + deactivated (bool): The value to set for `deactivated`. + """ + + yield self.runInteraction( + "set_user_deactivated_status", + self.set_user_deactivated_status_txn, + user_id, + deactivated, + ) + + def set_user_deactivated_status_txn(self, txn, user_id, deactivated): + self._simple_update_one_txn( + txn=txn, + table="users", + keyvalues={"name": user_id}, + updatevalues={"deactivated": 1 if deactivated else 0}, + ) + self._invalidate_cache_and_stream( + txn, self.get_user_deactivated_status, (user_id,) + ) + + +class RegistrationWorkerStore(RegistrationDeactivationStore): def __init__(self, db_conn, hs): super(RegistrationWorkerStore, self).__init__(db_conn, hs) @@ -673,27 +723,6 @@ class RegistrationWorkerStore(SQLBaseStore): desc="get_id_servers_user_bound", ) - @cachedInlineCallbacks() - def get_user_deactivated_status(self, user_id): - """Retrieve the value for the `deactivated` property for the provided user. - - Args: - user_id (str): The ID of the user to retrieve the status for. - - Returns: - defer.Deferred(bool): The requested value. - """ - - res = yield self._simple_select_one_onecol( - table="users", - keyvalues={"name": user_id}, - retcol="deactivated", - desc="get_user_deactivated_status", - ) - - # Convert the integer into a boolean. - return res == 1 - def get_threepid_validation_session( self, medium, client_secret, address=None, sid=None, validated=True ): @@ -787,13 +816,14 @@ class RegistrationWorkerStore(SQLBaseStore): ) -class RegistrationStore( - RegistrationWorkerStore, background_updates.BackgroundUpdateStore +class RegistrationBackgroundUpdateStore( + RegistrationDeactivationStore, background_updates.BackgroundUpdateStore ): def __init__(self, db_conn, hs): - super(RegistrationStore, self).__init__(db_conn, hs) + super(RegistrationBackgroundUpdateStore, self).__init__(db_conn, hs) self.clock = hs.get_clock() + self.config = hs.config self.register_background_index_update( "access_tokens_device_index", @@ -809,8 +839,6 @@ class RegistrationStore( columns=["creation_ts"], ) - self._account_validity = hs.config.account_validity - # we no longer use refresh tokens, but it's possible that some people # might have a background update queued to build this index. Just # clear the background update. @@ -824,17 +852,6 @@ class RegistrationStore( "users_set_deactivated_flag", self._background_update_set_deactivated_flag ) - # Create a background job for culling expired 3PID validity tokens - def start_cull(): - # run as a background process to make sure that the database transactions - # have a logcontext to report to - return run_as_background_process( - "cull_expired_threepid_validation_tokens", - self.cull_expired_threepid_validation_tokens, - ) - - hs.get_clock().looping_call(start_cull, THIRTY_MINUTES_IN_MS) - @defer.inlineCallbacks def _background_update_set_deactivated_flag(self, progress, batch_size): """Retrieves a list of all deactivated users and sets the 'deactivated' flag to 1 @@ -896,6 +913,54 @@ class RegistrationStore( return nb_processed + @defer.inlineCallbacks + def _bg_user_threepids_grandfather(self, progress, batch_size): + """We now track which identity servers a user binds their 3PID to, so + we need to handle the case of existing bindings where we didn't track + this. + + We do this by grandfathering in existing user threepids assuming that + they used one of the server configured trusted identity servers. + """ + id_servers = set(self.config.trusted_third_party_id_servers) + + def _bg_user_threepids_grandfather_txn(txn): + sql = """ + INSERT INTO user_threepid_id_server + (user_id, medium, address, id_server) + SELECT user_id, medium, address, ? + FROM user_threepids + """ + + txn.executemany(sql, [(id_server,) for id_server in id_servers]) + + if id_servers: + yield self.runInteraction( + "_bg_user_threepids_grandfather", _bg_user_threepids_grandfather_txn + ) + + yield self._end_background_update("user_threepids_grandfather") + + return 1 + + +class RegistrationStore(RegistrationWorkerStore, RegistrationBackgroundUpdateStore): + def __init__(self, db_conn, hs): + super(RegistrationStore, self).__init__(db_conn, hs) + + self._account_validity = hs.config.account_validity + + # Create a background job for culling expired 3PID validity tokens + def start_cull(): + # run as a background process to make sure that the database transactions + # have a logcontext to report to + return run_as_background_process( + "cull_expired_threepid_validation_tokens", + self.cull_expired_threepid_validation_tokens, + ) + + hs.get_clock().looping_call(start_cull, THIRTY_MINUTES_IN_MS) + @defer.inlineCallbacks def add_access_token_to_user(self, user_id, token, device_id, valid_until_ms): """Adds an access token for the given user. @@ -1244,36 +1309,6 @@ class RegistrationStore( desc="get_users_pending_deactivation", ) - @defer.inlineCallbacks - def _bg_user_threepids_grandfather(self, progress, batch_size): - """We now track which identity servers a user binds their 3PID to, so - we need to handle the case of existing bindings where we didn't track - this. - - We do this by grandfathering in existing user threepids assuming that - they used one of the server configured trusted identity servers. - """ - id_servers = set(self.config.trusted_third_party_id_servers) - - def _bg_user_threepids_grandfather_txn(txn): - sql = """ - INSERT INTO user_threepid_id_server - (user_id, medium, address, id_server) - SELECT user_id, medium, address, ? - FROM user_threepids - """ - - txn.executemany(sql, [(id_server,) for id_server in id_servers]) - - if id_servers: - yield self.runInteraction( - "_bg_user_threepids_grandfather", _bg_user_threepids_grandfather_txn - ) - - yield self._end_background_update("user_threepids_grandfather") - - return 1 - def validate_threepid_session(self, session_id, client_secret, token, current_ts): """Attempt to validate a threepid session using a token @@ -1464,30 +1499,3 @@ class RegistrationStore( cull_expired_threepid_validation_tokens_txn, self.clock.time_msec(), ) - - def set_user_deactivated_status_txn(self, txn, user_id, deactivated): - self._simple_update_one_txn( - txn=txn, - table="users", - keyvalues={"name": user_id}, - updatevalues={"deactivated": 1 if deactivated else 0}, - ) - self._invalidate_cache_and_stream( - txn, self.get_user_deactivated_status, (user_id,) - ) - - @defer.inlineCallbacks - def set_user_deactivated_status(self, user_id, deactivated): - """Set the `deactivated` property for the provided user to the provided value. - - Args: - user_id (str): The ID of the user to set the status for. - deactivated (bool): The value to set for `deactivated`. - """ - - yield self.runInteraction( - "set_user_deactivated_status", - self.set_user_deactivated_status_txn, - user_id, - deactivated, - ) From 841054ad96b44161d7a990bc1349ecc70fcd736c Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Thu, 3 Oct 2019 17:47:42 +0100 Subject: [PATCH 64/85] Move search's bg updates to a dedicated store --- synapse/storage/search.py | 56 ++++++++++++++++++++++----------------- 1 file changed, 31 insertions(+), 25 deletions(-) diff --git a/synapse/storage/search.py b/synapse/storage/search.py index df87ab6a6d..9a41e78002 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -36,7 +36,7 @@ SearchEntry = namedtuple( ) -class SearchStore(BackgroundUpdateStore): +class SearchBackgroundUpdateStore(BackgroundUpdateStore): EVENT_SEARCH_UPDATE_NAME = "event_search" EVENT_SEARCH_ORDER_UPDATE_NAME = "event_search_order" @@ -44,7 +44,7 @@ class SearchStore(BackgroundUpdateStore): EVENT_SEARCH_USE_GIN_POSTGRES_NAME = "event_search_postgres_gin" def __init__(self, db_conn, hs): - super(SearchStore, self).__init__(db_conn, hs) + super(SearchBackgroundUpdateStore, self).__init__(db_conn, hs) if not hs.config.enable_search: return @@ -289,29 +289,6 @@ class SearchStore(BackgroundUpdateStore): return num_rows - def store_event_search_txn(self, txn, event, key, value): - """Add event to the search table - - Args: - txn (cursor): - event (EventBase): - key (str): - value (str): - """ - self.store_search_entries_txn( - txn, - ( - SearchEntry( - key=key, - value=value, - event_id=event.event_id, - room_id=event.room_id, - stream_ordering=event.internal_metadata.stream_ordering, - origin_server_ts=event.origin_server_ts, - ), - ), - ) - def store_search_entries_txn(self, txn, entries): """Add entries to the search table @@ -358,6 +335,35 @@ class SearchStore(BackgroundUpdateStore): # This should be unreachable. raise Exception("Unrecognized database engine") + +class SearchStore(SearchBackgroundUpdateStore): + + def __init__(self, db_conn, hs): + super(SearchStore, self).__init__(db_conn, hs) + + def store_event_search_txn(self, txn, event, key, value): + """Add event to the search table + + Args: + txn (cursor): + event (EventBase): + key (str): + value (str): + """ + self.store_search_entries_txn( + txn, + ( + SearchEntry( + key=key, + value=value, + event_id=event.event_id, + room_id=event.room_id, + stream_ordering=event.internal_metadata.stream_ordering, + origin_server_ts=event.origin_server_ts, + ), + ), + ) + @defer.inlineCallbacks def search_msgs(self, room_ids, search_term, keys): """Performs a full text search over events with given keys. From cfccd2d78a0b8338f33a5631d8f0637d4ed07e7e Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Thu, 3 Oct 2019 17:56:16 +0100 Subject: [PATCH 65/85] Move state's bg updates to a dedicated store --- synapse/storage/state.py | 394 ++++++++++++++++++++------------------- 1 file changed, 204 insertions(+), 190 deletions(-) diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 1980a87108..71b533c006 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -353,8 +353,158 @@ class StateFilter(object): return member_filter, non_member_filter +class StateGroupBackgroundUpdateStore(SQLBaseStore): + """Defines functions related to state groups needed to run the state backgroud + updates. + """ + + def _count_state_group_hops_txn(self, txn, state_group): + """Given a state group, count how many hops there are in the tree. + + This is used to ensure the delta chains don't get too long. + """ + if isinstance(self.database_engine, PostgresEngine): + sql = """ + WITH RECURSIVE state(state_group) AS ( + VALUES(?::bigint) + UNION ALL + SELECT prev_state_group FROM state_group_edges e, state s + WHERE s.state_group = e.state_group + ) + SELECT count(*) FROM state; + """ + + txn.execute(sql, (state_group,)) + row = txn.fetchone() + if row and row[0]: + return row[0] + else: + return 0 + else: + # We don't use WITH RECURSIVE on sqlite3 as there are distributions + # that ship with an sqlite3 version that doesn't support it (e.g. wheezy) + next_group = state_group + count = 0 + + while next_group: + next_group = self._simple_select_one_onecol_txn( + txn, + table="state_group_edges", + keyvalues={"state_group": next_group}, + retcol="prev_state_group", + allow_none=True, + ) + if next_group: + count += 1 + + return count + + def _get_state_groups_from_groups_txn( + self, txn, groups, state_filter=StateFilter.all() + ): + results = {group: {} for group in groups} + + where_clause, where_args = state_filter.make_sql_filter_clause() + + # Unless the filter clause is empty, we're going to append it after an + # existing where clause + if where_clause: + where_clause = " AND (%s)" % (where_clause,) + + if isinstance(self.database_engine, PostgresEngine): + # Temporarily disable sequential scans in this transaction. This is + # a temporary hack until we can add the right indices in + txn.execute("SET LOCAL enable_seqscan=off") + + # The below query walks the state_group tree so that the "state" + # table includes all state_groups in the tree. It then joins + # against `state_groups_state` to fetch the latest state. + # It assumes that previous state groups are always numerically + # lesser. + # The PARTITION is used to get the event_id in the greatest state + # group for the given type, state_key. + # This may return multiple rows per (type, state_key), but last_value + # should be the same. + sql = """ + WITH RECURSIVE state(state_group) AS ( + VALUES(?::bigint) + UNION ALL + SELECT prev_state_group FROM state_group_edges e, state s + WHERE s.state_group = e.state_group + ) + SELECT DISTINCT type, state_key, last_value(event_id) OVER ( + PARTITION BY type, state_key ORDER BY state_group ASC + ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING + ) AS event_id FROM state_groups_state + WHERE state_group IN ( + SELECT state_group FROM state + ) + """ + + for group in groups: + args = [group] + args.extend(where_args) + + txn.execute(sql + where_clause, args) + for row in txn: + typ, state_key, event_id = row + key = (typ, state_key) + results[group][key] = event_id + else: + max_entries_returned = state_filter.max_entries_returned() + + # We don't use WITH RECURSIVE on sqlite3 as there are distributions + # that ship with an sqlite3 version that doesn't support it (e.g. wheezy) + for group in groups: + next_group = group + + while next_group: + # We did this before by getting the list of group ids, and + # then passing that list to sqlite to get latest event for + # each (type, state_key). However, that was terribly slow + # without the right indices (which we can't add until + # after we finish deduping state, which requires this func) + args = [next_group] + args.extend(where_args) + + txn.execute( + "SELECT type, state_key, event_id FROM state_groups_state" + " WHERE state_group = ? " + where_clause, + args, + ) + results[group].update( + ((typ, state_key), event_id) + for typ, state_key, event_id in txn + if (typ, state_key) not in results[group] + ) + + # If the number of entries in the (type,state_key)->event_id dict + # matches the number of (type,state_keys) types we were searching + # for, then we must have found them all, so no need to go walk + # further down the tree... UNLESS our types filter contained + # wildcards (i.e. Nones) in which case we have to do an exhaustive + # search + if ( + max_entries_returned is not None + and len(results[group]) == max_entries_returned + ): + break + + next_group = self._simple_select_one_onecol_txn( + txn, + table="state_group_edges", + keyvalues={"state_group": next_group}, + retcol="prev_state_group", + allow_none=True, + ) + + return results + + # this inherits from EventsWorkerStore because it calls self.get_events -class StateGroupWorkerStore(EventsWorkerStore, SQLBaseStore): +class StateGroupWorkerStore( + EventsWorkerStore, StateGroupBackgroundUpdateStore, SQLBaseStore +): """The parts of StateGroupStore that can be called from workers. """ @@ -694,107 +844,6 @@ class StateGroupWorkerStore(EventsWorkerStore, SQLBaseStore): return results - def _get_state_groups_from_groups_txn( - self, txn, groups, state_filter=StateFilter.all() - ): - results = {group: {} for group in groups} - - where_clause, where_args = state_filter.make_sql_filter_clause() - - # Unless the filter clause is empty, we're going to append it after an - # existing where clause - if where_clause: - where_clause = " AND (%s)" % (where_clause,) - - if isinstance(self.database_engine, PostgresEngine): - # Temporarily disable sequential scans in this transaction. This is - # a temporary hack until we can add the right indices in - txn.execute("SET LOCAL enable_seqscan=off") - - # The below query walks the state_group tree so that the "state" - # table includes all state_groups in the tree. It then joins - # against `state_groups_state` to fetch the latest state. - # It assumes that previous state groups are always numerically - # lesser. - # The PARTITION is used to get the event_id in the greatest state - # group for the given type, state_key. - # This may return multiple rows per (type, state_key), but last_value - # should be the same. - sql = """ - WITH RECURSIVE state(state_group) AS ( - VALUES(?::bigint) - UNION ALL - SELECT prev_state_group FROM state_group_edges e, state s - WHERE s.state_group = e.state_group - ) - SELECT DISTINCT type, state_key, last_value(event_id) OVER ( - PARTITION BY type, state_key ORDER BY state_group ASC - ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING - ) AS event_id FROM state_groups_state - WHERE state_group IN ( - SELECT state_group FROM state - ) - """ - - for group in groups: - args = [group] - args.extend(where_args) - - txn.execute(sql + where_clause, args) - for row in txn: - typ, state_key, event_id = row - key = (typ, state_key) - results[group][key] = event_id - else: - max_entries_returned = state_filter.max_entries_returned() - - # We don't use WITH RECURSIVE on sqlite3 as there are distributions - # that ship with an sqlite3 version that doesn't support it (e.g. wheezy) - for group in groups: - next_group = group - - while next_group: - # We did this before by getting the list of group ids, and - # then passing that list to sqlite to get latest event for - # each (type, state_key). However, that was terribly slow - # without the right indices (which we can't add until - # after we finish deduping state, which requires this func) - args = [next_group] - args.extend(where_args) - - txn.execute( - "SELECT type, state_key, event_id FROM state_groups_state" - " WHERE state_group = ? " + where_clause, - args, - ) - results[group].update( - ((typ, state_key), event_id) - for typ, state_key, event_id in txn - if (typ, state_key) not in results[group] - ) - - # If the number of entries in the (type,state_key)->event_id dict - # matches the number of (type,state_keys) types we were searching - # for, then we must have found them all, so no need to go walk - # further down the tree... UNLESS our types filter contained - # wildcards (i.e. Nones) in which case we have to do an exhaustive - # search - if ( - max_entries_returned is not None - and len(results[group]) == max_entries_returned - ): - break - - next_group = self._simple_select_one_onecol_txn( - txn, - table="state_group_edges", - keyvalues={"state_group": next_group}, - retcol="prev_state_group", - allow_none=True, - ) - - return results - @defer.inlineCallbacks def get_state_for_events(self, event_ids, state_filter=StateFilter.all()): """Given a list of event_ids and type tuples, return a list of state @@ -1238,66 +1287,8 @@ class StateGroupWorkerStore(EventsWorkerStore, SQLBaseStore): return self.runInteraction("store_state_group", _store_state_group_txn) - def _count_state_group_hops_txn(self, txn, state_group): - """Given a state group, count how many hops there are in the tree. - This is used to ensure the delta chains don't get too long. - """ - if isinstance(self.database_engine, PostgresEngine): - sql = """ - WITH RECURSIVE state(state_group) AS ( - VALUES(?::bigint) - UNION ALL - SELECT prev_state_group FROM state_group_edges e, state s - WHERE s.state_group = e.state_group - ) - SELECT count(*) FROM state; - """ - - txn.execute(sql, (state_group,)) - row = txn.fetchone() - if row and row[0]: - return row[0] - else: - return 0 - else: - # We don't use WITH RECURSIVE on sqlite3 as there are distributions - # that ship with an sqlite3 version that doesn't support it (e.g. wheezy) - next_group = state_group - count = 0 - - while next_group: - next_group = self._simple_select_one_onecol_txn( - txn, - table="state_group_edges", - keyvalues={"state_group": next_group}, - retcol="prev_state_group", - allow_none=True, - ) - if next_group: - count += 1 - - return count - - -class StateStore(StateGroupWorkerStore, BackgroundUpdateStore): - """ Keeps track of the state at a given event. - - This is done by the concept of `state groups`. Every event is a assigned - a state group (identified by an arbitrary string), which references a - collection of state events. The current state of an event is then the - collection of state events referenced by the event's state group. - - Hence, every change in the current state causes a new state group to be - generated. However, if no change happens (e.g., if we get a message event - with only one parent it inherits the state group from its parent.) - - There are three tables: - * `state_groups`: Stores group name, first event with in the group and - room id. - * `event_to_state_groups`: Maps events to state groups. - * `state_groups_state`: Maps state group to state events. - """ +class StateBackgroundUpdateStore(StateGroupBackgroundUpdateStore, BackgroundUpdateStore): STATE_GROUP_DEDUPLICATION_UPDATE_NAME = "state_group_state_deduplication" STATE_GROUP_INDEX_UPDATE_NAME = "state_group_state_type_index" @@ -1305,7 +1296,7 @@ class StateStore(StateGroupWorkerStore, BackgroundUpdateStore): EVENT_STATE_GROUP_INDEX_UPDATE_NAME = "event_to_state_groups_sg_index" def __init__(self, db_conn, hs): - super(StateStore, self).__init__(db_conn, hs) + super(StateBackgroundUpdateStore, self).__init__(db_conn, hs) self.register_background_update_handler( self.STATE_GROUP_DEDUPLICATION_UPDATE_NAME, self._background_deduplicate_state, @@ -1327,34 +1318,6 @@ class StateStore(StateGroupWorkerStore, BackgroundUpdateStore): columns=["state_group"], ) - def _store_event_state_mappings_txn(self, txn, events_and_contexts): - state_groups = {} - for event, context in events_and_contexts: - if event.internal_metadata.is_outlier(): - continue - - # if the event was rejected, just give it the same state as its - # predecessor. - if context.rejected: - state_groups[event.event_id] = context.prev_group - continue - - state_groups[event.event_id] = context.state_group - - self._simple_insert_many_txn( - txn, - table="event_to_state_groups", - values=[ - {"state_group": state_group_id, "event_id": event_id} - for event_id, state_group_id in iteritems(state_groups) - ], - ) - - for event_id, state_group_id in iteritems(state_groups): - txn.call_after( - self._get_state_group_for_event.prefill, (event_id,), state_group_id - ) - @defer.inlineCallbacks def _background_deduplicate_state(self, progress, batch_size): """This background update will slowly deduplicate state by reencoding @@ -1527,3 +1490,54 @@ class StateStore(StateGroupWorkerStore, BackgroundUpdateStore): yield self._end_background_update(self.STATE_GROUP_INDEX_UPDATE_NAME) return 1 + + +class StateStore(StateGroupWorkerStore, StateBackgroundUpdateStore): + """ Keeps track of the state at a given event. + + This is done by the concept of `state groups`. Every event is a assigned + a state group (identified by an arbitrary string), which references a + collection of state events. The current state of an event is then the + collection of state events referenced by the event's state group. + + Hence, every change in the current state causes a new state group to be + generated. However, if no change happens (e.g., if we get a message event + with only one parent it inherits the state group from its parent.) + + There are three tables: + * `state_groups`: Stores group name, first event with in the group and + room id. + * `event_to_state_groups`: Maps events to state groups. + * `state_groups_state`: Maps state group to state events. + """ + + def __init__(self, db_conn, hs): + super(StateStore, self).__init__(db_conn, hs) + + def _store_event_state_mappings_txn(self, txn, events_and_contexts): + state_groups = {} + for event, context in events_and_contexts: + if event.internal_metadata.is_outlier(): + continue + + # if the event was rejected, just give it the same state as its + # predecessor. + if context.rejected: + state_groups[event.event_id] = context.prev_group + continue + + state_groups[event.event_id] = context.state_group + + self._simple_insert_many_txn( + txn, + table="event_to_state_groups", + values=[ + {"state_group": state_group_id, "event_id": event_id} + for event_id, state_group_id in iteritems(state_groups) + ], + ) + + for event_id, state_group_id in iteritems(state_groups): + txn.call_after( + self._get_state_group_for_event.prefill, (event_id,), state_group_id + ) From e106a0e4db23fa1fedcce1c169985a8c91181c86 Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Thu, 3 Oct 2019 18:19:08 +0100 Subject: [PATCH 66/85] Move user_directory's bg updates to a dedicated store --- synapse/storage/user_directory.py | 178 ++++++++++++++++-------------- 1 file changed, 94 insertions(+), 84 deletions(-) diff --git a/synapse/storage/user_directory.py b/synapse/storage/user_directory.py index b5188d9bee..1b1e4751b9 100644 --- a/synapse/storage/user_directory.py +++ b/synapse/storage/user_directory.py @@ -32,14 +32,14 @@ logger = logging.getLogger(__name__) TEMP_TABLE = "_temp_populate_user_directory" -class UserDirectoryStore(StateDeltasStore, BackgroundUpdateStore): +class UserDirectoryBackgroundUpdateStore(StateDeltasStore, BackgroundUpdateStore): # How many records do we calculate before sending it to # add_users_who_share_private_rooms? SHARE_PRIVATE_WORKING_SET = 500 def __init__(self, db_conn, hs): - super(UserDirectoryStore, self).__init__(db_conn, hs) + super(UserDirectoryBackgroundUpdateStore, self).__init__(db_conn, hs) self.server_name = hs.hostname @@ -452,55 +452,6 @@ class UserDirectoryStore(StateDeltasStore, BackgroundUpdateStore): "update_profile_in_user_dir", _update_profile_in_user_dir_txn ) - def remove_from_user_dir(self, user_id): - def _remove_from_user_dir_txn(txn): - self._simple_delete_txn( - txn, table="user_directory", keyvalues={"user_id": user_id} - ) - self._simple_delete_txn( - txn, table="user_directory_search", keyvalues={"user_id": user_id} - ) - self._simple_delete_txn( - txn, table="users_in_public_rooms", keyvalues={"user_id": user_id} - ) - self._simple_delete_txn( - txn, - table="users_who_share_private_rooms", - keyvalues={"user_id": user_id}, - ) - self._simple_delete_txn( - txn, - table="users_who_share_private_rooms", - keyvalues={"other_user_id": user_id}, - ) - txn.call_after(self.get_user_in_directory.invalidate, (user_id,)) - - return self.runInteraction("remove_from_user_dir", _remove_from_user_dir_txn) - - @defer.inlineCallbacks - def get_users_in_dir_due_to_room(self, room_id): - """Get all user_ids that are in the room directory because they're - in the given room_id - """ - user_ids_share_pub = yield self._simple_select_onecol( - table="users_in_public_rooms", - keyvalues={"room_id": room_id}, - retcol="user_id", - desc="get_users_in_dir_due_to_room", - ) - - user_ids_share_priv = yield self._simple_select_onecol( - table="users_who_share_private_rooms", - keyvalues={"room_id": room_id}, - retcol="other_user_id", - desc="get_users_in_dir_due_to_room", - ) - - user_ids = set(user_ids_share_pub) - user_ids.update(user_ids_share_priv) - - return user_ids - def add_users_who_share_private_room(self, room_id, user_id_tuples): """Insert entries into the users_who_share_private_rooms table. The first user should be a local user. @@ -551,6 +502,98 @@ class UserDirectoryStore(StateDeltasStore, BackgroundUpdateStore): "add_users_in_public_rooms", _add_users_in_public_rooms_txn ) + def delete_all_from_user_dir(self): + """Delete the entire user directory + """ + + def _delete_all_from_user_dir_txn(txn): + txn.execute("DELETE FROM user_directory") + txn.execute("DELETE FROM user_directory_search") + txn.execute("DELETE FROM users_in_public_rooms") + txn.execute("DELETE FROM users_who_share_private_rooms") + txn.call_after(self.get_user_in_directory.invalidate_all) + + return self.runInteraction( + "delete_all_from_user_dir", _delete_all_from_user_dir_txn + ) + + @cached() + def get_user_in_directory(self, user_id): + return self._simple_select_one( + table="user_directory", + keyvalues={"user_id": user_id}, + retcols=("display_name", "avatar_url"), + allow_none=True, + desc="get_user_in_directory", + ) + + def update_user_directory_stream_pos(self, stream_id): + return self._simple_update_one( + table="user_directory_stream_pos", + keyvalues={}, + updatevalues={"stream_id": stream_id}, + desc="update_user_directory_stream_pos", + ) + + +class UserDirectoryStore(UserDirectoryBackgroundUpdateStore): + + # How many records do we calculate before sending it to + # add_users_who_share_private_rooms? + SHARE_PRIVATE_WORKING_SET = 500 + + def __init__(self, db_conn, hs): + super(UserDirectoryStore, self).__init__(db_conn, hs) + + def remove_from_user_dir(self, user_id): + def _remove_from_user_dir_txn(txn): + self._simple_delete_txn( + txn, table="user_directory", keyvalues={"user_id": user_id} + ) + self._simple_delete_txn( + txn, table="user_directory_search", keyvalues={"user_id": user_id} + ) + self._simple_delete_txn( + txn, table="users_in_public_rooms", keyvalues={"user_id": user_id} + ) + self._simple_delete_txn( + txn, + table="users_who_share_private_rooms", + keyvalues={"user_id": user_id}, + ) + self._simple_delete_txn( + txn, + table="users_who_share_private_rooms", + keyvalues={"other_user_id": user_id}, + ) + txn.call_after(self.get_user_in_directory.invalidate, (user_id,)) + + return self.runInteraction("remove_from_user_dir", _remove_from_user_dir_txn) + + @defer.inlineCallbacks + def get_users_in_dir_due_to_room(self, room_id): + """Get all user_ids that are in the room directory because they're + in the given room_id + """ + user_ids_share_pub = yield self._simple_select_onecol( + table="users_in_public_rooms", + keyvalues={"room_id": room_id}, + retcol="user_id", + desc="get_users_in_dir_due_to_room", + ) + + user_ids_share_priv = yield self._simple_select_onecol( + table="users_who_share_private_rooms", + keyvalues={"room_id": room_id}, + retcol="other_user_id", + desc="get_users_in_dir_due_to_room", + ) + + user_ids = set(user_ids_share_pub) + user_ids.update(user_ids_share_priv) + + return user_ids + def remove_user_who_share_room(self, user_id, room_id): """ Deletes entries in the users_who_share_*_rooms table. The first @@ -637,31 +680,6 @@ class UserDirectoryStore(StateDeltasStore, BackgroundUpdateStore): return [room_id for room_id, in rows] - def delete_all_from_user_dir(self): - """Delete the entire user directory - """ - - def _delete_all_from_user_dir_txn(txn): - txn.execute("DELETE FROM user_directory") - txn.execute("DELETE FROM user_directory_search") - txn.execute("DELETE FROM users_in_public_rooms") - txn.execute("DELETE FROM users_who_share_private_rooms") - txn.call_after(self.get_user_in_directory.invalidate_all) - - return self.runInteraction( - "delete_all_from_user_dir", _delete_all_from_user_dir_txn - ) - - @cached() - def get_user_in_directory(self, user_id): - return self._simple_select_one( - table="user_directory", - keyvalues={"user_id": user_id}, - retcols=("display_name", "avatar_url"), - allow_none=True, - desc="get_user_in_directory", - ) - def get_user_directory_stream_pos(self): return self._simple_select_one_onecol( table="user_directory_stream_pos", @@ -670,14 +688,6 @@ class UserDirectoryStore(StateDeltasStore, BackgroundUpdateStore): desc="get_user_directory_stream_pos", ) - def update_user_directory_stream_pos(self, stream_id): - return self._simple_update_one( - table="user_directory_stream_pos", - keyvalues={}, - updatevalues={"stream_id": stream_id}, - desc="update_user_directory_stream_pos", - ) - @defer.inlineCallbacks def search_user_dir(self, user_id, search_term, limit): """Searches for users in directory From 0496eafbf4277523430114cf965a254241b290e7 Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Thu, 3 Oct 2019 19:00:55 +0100 Subject: [PATCH 67/85] Move roommember's bg updates to a dedicated store --- synapse/storage/roommember.py | 222 +++++++++++++++++----------------- 1 file changed, 114 insertions(+), 108 deletions(-) diff --git a/synapse/storage/roommember.py b/synapse/storage/roommember.py index 59a89fad60..4e606a8380 100644 --- a/synapse/storage/roommember.py +++ b/synapse/storage/roommember.py @@ -27,6 +27,7 @@ from synapse.api.constants import EventTypes, Membership from synapse.metrics import LaterGauge from synapse.metrics.background_process_metrics import run_as_background_process from synapse.storage._base import LoggingTransaction +from synapse.storage.background_updates import BackgroundUpdateStore from synapse.storage.engines import Sqlite3Engine from synapse.storage.events_worker import EventsWorkerStore from synapse.types import get_domain_from_id @@ -820,9 +821,9 @@ class RoomMemberWorkerStore(EventsWorkerStore): return set(room_ids) -class RoomMemberStore(RoomMemberWorkerStore): +class RoomMemberBackgroundUpdateStore(BackgroundUpdateStore): def __init__(self, db_conn, hs): - super(RoomMemberStore, self).__init__(db_conn, hs) + super(RoomMemberBackgroundUpdateStore, self).__init__(db_conn, hs) self.register_background_update_handler( _MEMBERSHIP_PROFILE_UPDATE_NAME, self._background_add_membership_profile ) @@ -838,112 +839,6 @@ class RoomMemberStore(RoomMemberWorkerStore): where_clause="forgotten = 1", ) - def _store_room_members_txn(self, txn, events, backfilled): - """Store a room member in the database. - """ - self._simple_insert_many_txn( - txn, - table="room_memberships", - values=[ - { - "event_id": event.event_id, - "user_id": event.state_key, - "sender": event.user_id, - "room_id": event.room_id, - "membership": event.membership, - "display_name": event.content.get("displayname", None), - "avatar_url": event.content.get("avatar_url", None), - } - for event in events - ], - ) - - for event in events: - txn.call_after( - self._membership_stream_cache.entity_has_changed, - event.state_key, - event.internal_metadata.stream_ordering, - ) - txn.call_after( - self.get_invited_rooms_for_user.invalidate, (event.state_key,) - ) - - # We update the local_invites table only if the event is "current", - # i.e., its something that has just happened. If the event is an - # outlier it is only current if its an "out of band membership", - # like a remote invite or a rejection of a remote invite. - is_new_state = not backfilled and ( - not event.internal_metadata.is_outlier() - or event.internal_metadata.is_out_of_band_membership() - ) - is_mine = self.hs.is_mine_id(event.state_key) - if is_new_state and is_mine: - if event.membership == Membership.INVITE: - self._simple_insert_txn( - txn, - table="local_invites", - values={ - "event_id": event.event_id, - "invitee": event.state_key, - "inviter": event.sender, - "room_id": event.room_id, - "stream_id": event.internal_metadata.stream_ordering, - }, - ) - else: - sql = ( - "UPDATE local_invites SET stream_id = ?, replaced_by = ? WHERE" - " room_id = ? AND invitee = ? AND locally_rejected is NULL" - " AND replaced_by is NULL" - ) - - txn.execute( - sql, - ( - event.internal_metadata.stream_ordering, - event.event_id, - event.room_id, - event.state_key, - ), - ) - - @defer.inlineCallbacks - def locally_reject_invite(self, user_id, room_id): - sql = ( - "UPDATE local_invites SET stream_id = ?, locally_rejected = ? WHERE" - " room_id = ? AND invitee = ? AND locally_rejected is NULL" - " AND replaced_by is NULL" - ) - - def f(txn, stream_ordering): - txn.execute(sql, (stream_ordering, True, room_id, user_id)) - - with self._stream_id_gen.get_next() as stream_ordering: - yield self.runInteraction("locally_reject_invite", f, stream_ordering) - - def forget(self, user_id, room_id): - """Indicate that user_id wishes to discard history for room_id.""" - - def f(txn): - sql = ( - "UPDATE" - " room_memberships" - " SET" - " forgotten = 1" - " WHERE" - " user_id = ?" - " AND" - " room_id = ?" - ) - txn.execute(sql, (user_id, room_id)) - - self._invalidate_cache_and_stream(txn, self.did_forget, (user_id, room_id)) - self._invalidate_cache_and_stream( - txn, self.get_forgotten_rooms_for_user, (user_id,) - ) - - return self.runInteraction("forget_membership", f) - @defer.inlineCallbacks def _background_add_membership_profile(self, progress, batch_size): target_min_stream_id = progress.get( @@ -1078,6 +973,117 @@ class RoomMemberStore(RoomMemberWorkerStore): return row_count +class RoomMemberStore(RoomMemberWorkerStore, RoomMemberBackgroundUpdateStore): + def __init__(self, db_conn, hs): + super(RoomMemberStore, self).__init__(db_conn, hs) + + def _store_room_members_txn(self, txn, events, backfilled): + """Store a room member in the database. + """ + self._simple_insert_many_txn( + txn, + table="room_memberships", + values=[ + { + "event_id": event.event_id, + "user_id": event.state_key, + "sender": event.user_id, + "room_id": event.room_id, + "membership": event.membership, + "display_name": event.content.get("displayname", None), + "avatar_url": event.content.get("avatar_url", None), + } + for event in events + ], + ) + + for event in events: + txn.call_after( + self._membership_stream_cache.entity_has_changed, + event.state_key, + event.internal_metadata.stream_ordering, + ) + txn.call_after( + self.get_invited_rooms_for_user.invalidate, (event.state_key,) + ) + + # We update the local_invites table only if the event is "current", + # i.e., its something that has just happened. If the event is an + # outlier it is only current if its an "out of band membership", + # like a remote invite or a rejection of a remote invite. + is_new_state = not backfilled and ( + not event.internal_metadata.is_outlier() + or event.internal_metadata.is_out_of_band_membership() + ) + is_mine = self.hs.is_mine_id(event.state_key) + if is_new_state and is_mine: + if event.membership == Membership.INVITE: + self._simple_insert_txn( + txn, + table="local_invites", + values={ + "event_id": event.event_id, + "invitee": event.state_key, + "inviter": event.sender, + "room_id": event.room_id, + "stream_id": event.internal_metadata.stream_ordering, + }, + ) + else: + sql = ( + "UPDATE local_invites SET stream_id = ?, replaced_by = ? WHERE" + " room_id = ? AND invitee = ? AND locally_rejected is NULL" + " AND replaced_by is NULL" + ) + + txn.execute( + sql, + ( + event.internal_metadata.stream_ordering, + event.event_id, + event.room_id, + event.state_key, + ), + ) + + @defer.inlineCallbacks + def locally_reject_invite(self, user_id, room_id): + sql = ( + "UPDATE local_invites SET stream_id = ?, locally_rejected = ? WHERE" + " room_id = ? AND invitee = ? AND locally_rejected is NULL" + " AND replaced_by is NULL" + ) + + def f(txn, stream_ordering): + txn.execute(sql, (stream_ordering, True, room_id, user_id)) + + with self._stream_id_gen.get_next() as stream_ordering: + yield self.runInteraction("locally_reject_invite", f, stream_ordering) + + def forget(self, user_id, room_id): + """Indicate that user_id wishes to discard history for room_id.""" + + def f(txn): + sql = ( + "UPDATE" + " room_memberships" + " SET" + " forgotten = 1" + " WHERE" + " user_id = ?" + " AND" + " room_id = ?" + ) + txn.execute(sql, (user_id, room_id)) + + self._invalidate_cache_and_stream(txn, self.did_forget, (user_id, room_id)) + self._invalidate_cache_and_stream( + txn, self.get_forgotten_rooms_for_user, (user_id,) + ) + + return self.runInteraction("forget_membership", f) + + class _JoinedHostsCache(object): """Cache for joined hosts in a room that is optimised to handle updates via state deltas. From cc2e19ad4b4fb55306f060354f74d1750e4b6001 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Mon, 7 Oct 2019 17:37:55 +0100 Subject: [PATCH 68/85] fix changelog --- changelog.d/6175.misc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/changelog.d/6175.misc b/changelog.d/6175.misc index 5bb24f02fc..30b3e56082 100644 --- a/changelog.d/6175.misc +++ b/changelog.d/6175.misc @@ -1 +1 @@ -Update `user_filters` table to have a unique index, and non-null columns. Thanks to @pik for contributing this +Update `user_filters` table to have a unique index, and non-null columns. Thanks to @pik for contributing this. \ No newline at end of file From 66ebea17235d9d3988d56cd1355656bbb508b3be Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Thu, 3 Oct 2019 18:23:05 +0100 Subject: [PATCH 69/85] Lint --- synapse/storage/media_repository.py | 1 - synapse/storage/search.py | 1 - synapse/storage/state.py | 4 +++- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/synapse/storage/media_repository.py b/synapse/storage/media_repository.py index 2eb2740d07..84b5f3ad5e 100644 --- a/synapse/storage/media_repository.py +++ b/synapse/storage/media_repository.py @@ -16,7 +16,6 @@ from synapse.storage.background_updates import BackgroundUpdateStore class MediaRepositoryBackgroundUpdateStore(BackgroundUpdateStore): - def __init__(self, db_conn, hs): super(MediaRepositoryBackgroundUpdateStore, self).__init__(db_conn, hs) diff --git a/synapse/storage/search.py b/synapse/storage/search.py index 9a41e78002..6ba4190f1a 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -337,7 +337,6 @@ class SearchBackgroundUpdateStore(BackgroundUpdateStore): class SearchStore(SearchBackgroundUpdateStore): - def __init__(self, db_conn, hs): super(SearchStore, self).__init__(db_conn, hs) diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 71b533c006..a941a5ae3f 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -1288,7 +1288,9 @@ class StateGroupWorkerStore( return self.runInteraction("store_state_group", _store_state_group_txn) -class StateBackgroundUpdateStore(StateGroupBackgroundUpdateStore, BackgroundUpdateStore): +class StateBackgroundUpdateStore( + StateGroupBackgroundUpdateStore, BackgroundUpdateStore +): STATE_GROUP_DEDUPLICATION_UPDATE_NAME = "state_group_state_deduplication" STATE_GROUP_INDEX_UPDATE_NAME = "state_group_state_type_index" From 21b5d8b1076354c7c6ee8849491f3fc886cc8189 Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Mon, 7 Oct 2019 18:00:31 +0100 Subject: [PATCH 70/85] Changelog --- changelog.d/6178.bugfix | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/6178.bugfix diff --git a/changelog.d/6178.bugfix b/changelog.d/6178.bugfix new file mode 100644 index 0000000000..cd288c2a44 --- /dev/null +++ b/changelog.d/6178.bugfix @@ -0,0 +1 @@ +Make the `synapse_port_db` script create the right indexes on a new PostgreSQL database. From b94a401852a5b6d87455285ea050c4e0731dd6ab Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Tue, 8 Oct 2019 09:35:37 +0100 Subject: [PATCH 71/85] Fix /federation/v1/state for recent room versions (#6170) * Fix /federation/v1/state for recent room versions Turns out this endpoint was completely broken for v3 rooms. Hopefully this re-signing code is irrelevant nowadays anyway. --- changelog.d/6170.bugfix | 1 + synapse/federation/federation_server.py | 13 ------------- 2 files changed, 1 insertion(+), 13 deletions(-) create mode 100644 changelog.d/6170.bugfix diff --git a/changelog.d/6170.bugfix b/changelog.d/6170.bugfix new file mode 100644 index 0000000000..52f7ea233c --- /dev/null +++ b/changelog.d/6170.bugfix @@ -0,0 +1 @@ +Fix /federation/v1/state endpoint for recent room versions. diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py index da06ab379d..21e52c9695 100644 --- a/synapse/federation/federation_server.py +++ b/synapse/federation/federation_server.py @@ -36,7 +36,6 @@ from synapse.api.errors import ( UnsupportedRoomVersionError, ) from synapse.api.room_versions import KNOWN_ROOM_VERSIONS -from synapse.crypto.event_signing import compute_event_signature from synapse.events import room_version_to_event_format from synapse.federation.federation_base import FederationBase, event_from_pdu_json from synapse.federation.persistence import TransactionActions @@ -322,18 +321,6 @@ class FederationServer(FederationBase): pdus = yield self.handler.get_state_for_pdu(room_id, event_id) auth_chain = yield self.store.get_auth_chain([pdu.event_id for pdu in pdus]) - for event in auth_chain: - # We sign these again because there was a bug where we - # incorrectly signed things the first time round - if self.hs.is_mine_id(event.event_id): - event.signatures.update( - compute_event_signature( - event.get_pdu_json(), - self.hs.hostname, - self.hs.config.signing_key[0], - ) - ) - return { "pdus": [pdu.get_pdu_json() for pdu in pdus], "auth_chain": [pdu.get_pdu_json() for pdu in auth_chain], From ea7d938bca2d5fa0d6a54412ecdf036c5a3fc3a7 Mon Sep 17 00:00:00 2001 From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> Date: Tue, 8 Oct 2019 13:51:25 +0100 Subject: [PATCH 72/85] Remove unused public room list timeout param (#6179) * Remove unused public room list timeout param * Add changelog --- changelog.d/6179.misc | 1 + synapse/handlers/room_list.py | 13 +------------ 2 files changed, 2 insertions(+), 12 deletions(-) create mode 100644 changelog.d/6179.misc diff --git a/changelog.d/6179.misc b/changelog.d/6179.misc new file mode 100644 index 0000000000..01c4e71ea3 --- /dev/null +++ b/changelog.d/6179.misc @@ -0,0 +1 @@ +Remove unused `timeout` parameter from `_get_public_room_list`. \ No newline at end of file diff --git a/synapse/handlers/room_list.py b/synapse/handlers/room_list.py index cfed344d4d..c615206df1 100644 --- a/synapse/handlers/room_list.py +++ b/synapse/handlers/room_list.py @@ -88,16 +88,8 @@ class RoomListHandler(BaseHandler): # appservice specific lists. logger.info("Bypassing cache as search request.") - # XXX: Quick hack to stop room directory queries taking too long. - # Timeout request after 60s. Probably want a more fundamental - # solution at some point - timeout = self.clock.time() + 60 return self._get_public_room_list( - limit, - since_token, - search_filter, - network_tuple=network_tuple, - timeout=timeout, + limit, since_token, search_filter, network_tuple=network_tuple ) key = (limit, since_token, network_tuple) @@ -118,7 +110,6 @@ class RoomListHandler(BaseHandler): search_filter=None, network_tuple=EMPTY_THIRD_PARTY_ID, from_federation=False, - timeout=None, ): """Generate a public room list. Args: @@ -131,8 +122,6 @@ class RoomListHandler(BaseHandler): Setting to None returns all public rooms across all lists. from_federation (bool): Whether this request originated from a federating server or a client. Used for room filtering. - timeout (int|None): Amount of seconds to wait for a response before - timing out. TODO """ # Pagination tokens work by storing the room ID sent in the last batch, From 474abf1eb6852ca488fbf86d3da0622a457efef1 Mon Sep 17 00:00:00 2001 From: Anshul Angaria Date: Tue, 8 Oct 2019 18:25:16 +0530 Subject: [PATCH 73/85] add M_TOO_LARGE error code for uploading a too large file (#6151) Fixes #6109 --- changelog.d/6109.bugfix | 1 + synapse/rest/media/v1/upload_resource.py | 8 ++++++-- 2 files changed, 7 insertions(+), 2 deletions(-) create mode 100644 changelog.d/6109.bugfix diff --git a/changelog.d/6109.bugfix b/changelog.d/6109.bugfix new file mode 100644 index 0000000000..da7ac1be4e --- /dev/null +++ b/changelog.d/6109.bugfix @@ -0,0 +1 @@ +Fix bug when uploading a large file: Synapse responds with `M_UNKNOWN` while it should be `M_TOO_LARGE` according to spec. Contributed by Anshul Angaria. diff --git a/synapse/rest/media/v1/upload_resource.py b/synapse/rest/media/v1/upload_resource.py index 5d76bbdf68..83d005812d 100644 --- a/synapse/rest/media/v1/upload_resource.py +++ b/synapse/rest/media/v1/upload_resource.py @@ -17,7 +17,7 @@ import logging from twisted.web.server import NOT_DONE_YET -from synapse.api.errors import SynapseError +from synapse.api.errors import Codes, SynapseError from synapse.http.server import ( DirectServeResource, respond_with_json, @@ -56,7 +56,11 @@ class UploadResource(DirectServeResource): if content_length is None: raise SynapseError(msg="Request must specify a Content-Length", code=400) if int(content_length) > self.max_upload_size: - raise SynapseError(msg="Upload request body is too large", code=413) + raise SynapseError( + msg="Upload request body is too large", + code=413, + errcode=Codes.TOO_LARGE, + ) upload_name = parse_string(request, b"filename", encoding=None) if upload_name: From 8f1b385accbf8be15c35b6f06b18eb6d998544e4 Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Tue, 8 Oct 2019 14:36:33 +0100 Subject: [PATCH 74/85] Don't end up with 4 classes in registration --- synapse/storage/registration.py | 102 ++++++++++++++++---------------- 1 file changed, 50 insertions(+), 52 deletions(-) diff --git a/synapse/storage/registration.py b/synapse/storage/registration.py index 1f6c93b73b..524b5eeaba 100644 --- a/synapse/storage/registration.py +++ b/synapse/storage/registration.py @@ -37,57 +37,7 @@ THIRTY_MINUTES_IN_MS = 30 * 60 * 1000 logger = logging.getLogger(__name__) -class RegistrationDeactivationStore(SQLBaseStore): - @cachedInlineCallbacks() - def get_user_deactivated_status(self, user_id): - """Retrieve the value for the `deactivated` property for the provided user. - - Args: - user_id (str): The ID of the user to retrieve the status for. - - Returns: - defer.Deferred(bool): The requested value. - """ - - res = yield self._simple_select_one_onecol( - table="users", - keyvalues={"name": user_id}, - retcol="deactivated", - desc="get_user_deactivated_status", - ) - - # Convert the integer into a boolean. - return res == 1 - - @defer.inlineCallbacks - def set_user_deactivated_status(self, user_id, deactivated): - """Set the `deactivated` property for the provided user to the provided value. - - Args: - user_id (str): The ID of the user to set the status for. - deactivated (bool): The value to set for `deactivated`. - """ - - yield self.runInteraction( - "set_user_deactivated_status", - self.set_user_deactivated_status_txn, - user_id, - deactivated, - ) - - def set_user_deactivated_status_txn(self, txn, user_id, deactivated): - self._simple_update_one_txn( - txn=txn, - table="users", - keyvalues={"name": user_id}, - updatevalues={"deactivated": 1 if deactivated else 0}, - ) - self._invalidate_cache_and_stream( - txn, self.get_user_deactivated_status, (user_id,) - ) - - -class RegistrationWorkerStore(RegistrationDeactivationStore): +class RegistrationWorkerStore(SQLBaseStore): def __init__(self, db_conn, hs): super(RegistrationWorkerStore, self).__init__(db_conn, hs) @@ -723,6 +673,27 @@ class RegistrationWorkerStore(RegistrationDeactivationStore): desc="get_id_servers_user_bound", ) + @cachedInlineCallbacks() + def get_user_deactivated_status(self, user_id): + """Retrieve the value for the `deactivated` property for the provided user. + + Args: + user_id (str): The ID of the user to retrieve the status for. + + Returns: + defer.Deferred(bool): The requested value. + """ + + res = yield self._simple_select_one_onecol( + table="users", + keyvalues={"name": user_id}, + retcol="deactivated", + desc="get_user_deactivated_status", + ) + + # Convert the integer into a boolean. + return res == 1 + def get_threepid_validation_session( self, medium, client_secret, address=None, sid=None, validated=True ): @@ -817,7 +788,7 @@ class RegistrationWorkerStore(RegistrationDeactivationStore): class RegistrationBackgroundUpdateStore( - RegistrationDeactivationStore, background_updates.BackgroundUpdateStore + RegistrationWorkerStore, background_updates.BackgroundUpdateStore ): def __init__(self, db_conn, hs): super(RegistrationBackgroundUpdateStore, self).__init__(db_conn, hs) @@ -1499,3 +1470,30 @@ class RegistrationStore(RegistrationWorkerStore, RegistrationBackgroundUpdateSto cull_expired_threepid_validation_tokens_txn, self.clock.time_msec(), ) + + @defer.inlineCallbacks + def set_user_deactivated_status(self, user_id, deactivated): + """Set the `deactivated` property for the provided user to the provided value. + + Args: + user_id (str): The ID of the user to set the status for. + deactivated (bool): The value to set for `deactivated`. + """ + + yield self.runInteraction( + "set_user_deactivated_status", + self.set_user_deactivated_status_txn, + user_id, + deactivated, + ) + + def set_user_deactivated_status_txn(self, txn, user_id, deactivated): + self._simple_update_one_txn( + txn=txn, + table="users", + keyvalues={"name": user_id}, + updatevalues={"deactivated": 1 if deactivated else 0}, + ) + self._invalidate_cache_and_stream( + txn, self.get_user_deactivated_status, (user_id,) + ) From b1c0a4ceb3c2c5ca51f0b32efcd58d8493fd9b99 Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Tue, 8 Oct 2019 14:38:14 +0100 Subject: [PATCH 75/85] Cleanup client_ips --- synapse/storage/client_ips.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/synapse/storage/client_ips.py b/synapse/storage/client_ips.py index 1d89b50f57..067820a5da 100644 --- a/synapse/storage/client_ips.py +++ b/synapse/storage/client_ips.py @@ -35,15 +35,8 @@ LAST_SEEN_GRANULARITY = 120 * 1000 class ClientIpBackgroundUpdateStore(background_updates.BackgroundUpdateStore): def __init__(self, db_conn, hs): - - self.client_ip_last_seen = Cache( - name="client_ip_last_seen", keylen=4, max_entries=50000 * CACHE_SIZE_FACTOR - ) - super(ClientIpBackgroundUpdateStore, self).__init__(db_conn, hs) - self.user_ips_max_age = hs.config.user_ips_max_age - self.register_background_index_update( "user_ips_device_index", index_name="user_ips_device_id", From c69324ffb588f72786c37b864d510abd279e47a2 Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Tue, 8 Oct 2019 14:48:33 +0100 Subject: [PATCH 76/85] Fix RegistrationStore --- synapse/storage/registration.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/storage/registration.py b/synapse/storage/registration.py index 524b5eeaba..6c5b29288a 100644 --- a/synapse/storage/registration.py +++ b/synapse/storage/registration.py @@ -915,7 +915,7 @@ class RegistrationBackgroundUpdateStore( return 1 -class RegistrationStore(RegistrationWorkerStore, RegistrationBackgroundUpdateStore): +class RegistrationStore(RegistrationBackgroundUpdateStore): def __init__(self, db_conn, hs): super(RegistrationStore, self).__init__(db_conn, hs) From de26678724cd5c19dcc77c0d55fd89320cee38d4 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 9 Oct 2019 15:13:02 +0100 Subject: [PATCH 77/85] Update changelog.d/6185.bugfix Co-Authored-By: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> --- changelog.d/6185.bugfix | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/changelog.d/6185.bugfix b/changelog.d/6185.bugfix index 199ec69032..9d1c669b88 100644 --- a/changelog.d/6185.bugfix +++ b/changelog.d/6185.bugfix @@ -1 +1 @@ -Fix bug where we were updating censored events as bytes rather than text, occaisonally causing invalid JSON being inserted breaking APIs that attempted to fetch such events. +Fix bug where redacted events were sometimes incorrectly censored in the database, breaking APIs that attempted to fetch such events. From f743108a94658eb1dbaf168d39874272f756a386 Mon Sep 17 00:00:00 2001 From: Amber Brown Date: Thu, 10 Oct 2019 09:39:35 +0100 Subject: [PATCH 78/85] Refactor HomeserverConfig so it can be typechecked (#6137) --- changelog.d/6137.misc | 1 + mypy.ini | 16 +- synapse/config/_base.py | 191 +++++++++++++++++----- synapse/config/_base.pyi | 135 +++++++++++++++ synapse/config/api.py | 2 + synapse/config/appservice.py | 2 + synapse/config/captcha.py | 2 + synapse/config/cas.py | 2 + synapse/config/consent_config.py | 3 + synapse/config/database.py | 2 + synapse/config/emailconfig.py | 2 + synapse/config/groups.py | 2 + synapse/config/homeserver.py | 68 ++++---- synapse/config/jwt_config.py | 2 + synapse/config/key.py | 2 + synapse/config/logger.py | 2 + synapse/config/metrics.py | 2 + synapse/config/password.py | 2 + synapse/config/password_auth_providers.py | 2 + synapse/config/push.py | 2 + synapse/config/ratelimiting.py | 2 + synapse/config/registration.py | 4 + synapse/config/repository.py | 2 + synapse/config/room_directory.py | 2 + synapse/config/saml2_config.py | 2 + synapse/config/server.py | 2 + synapse/config/server_notices_config.py | 2 + synapse/config/spam_checker.py | 2 + synapse/config/stats.py | 2 + synapse/config/third_party_event_rules.py | 2 + synapse/config/tls.py | 9 +- synapse/config/tracer.py | 2 + synapse/config/user_directory.py | 2 + synapse/config/voip.py | 2 + synapse/config/workers.py | 2 + tests/config/test_tls.py | 25 ++- tox.ini | 3 +- 37 files changed, 415 insertions(+), 94 deletions(-) create mode 100644 changelog.d/6137.misc create mode 100644 synapse/config/_base.pyi diff --git a/changelog.d/6137.misc b/changelog.d/6137.misc new file mode 100644 index 0000000000..92a02e71c3 --- /dev/null +++ b/changelog.d/6137.misc @@ -0,0 +1 @@ +Refactor configuration loading to allow better typechecking. diff --git a/mypy.ini b/mypy.ini index 8788574ee3..ffadaddc0b 100644 --- a/mypy.ini +++ b/mypy.ini @@ -4,10 +4,6 @@ plugins=mypy_zope:plugin follow_imports=skip mypy_path=stubs -[mypy-synapse.config.homeserver] -# this is a mess because of the metaclass shenanigans -ignore_errors = True - [mypy-zope] ignore_missing_imports = True @@ -52,3 +48,15 @@ ignore_missing_imports = True [mypy-signedjson.*] ignore_missing_imports = True + +[mypy-prometheus_client.*] +ignore_missing_imports = True + +[mypy-service_identity.*] +ignore_missing_imports = True + +[mypy-daemonize] +ignore_missing_imports = True + +[mypy-sentry_sdk] +ignore_missing_imports = True diff --git a/synapse/config/_base.py b/synapse/config/_base.py index 31f6530978..08619404bb 100644 --- a/synapse/config/_base.py +++ b/synapse/config/_base.py @@ -18,7 +18,9 @@ import argparse import errno import os +from collections import OrderedDict from textwrap import dedent +from typing import Any, MutableMapping, Optional from six import integer_types @@ -51,7 +53,56 @@ Missing mandatory `server_name` config option. """ +def path_exists(file_path): + """Check if a file exists + + Unlike os.path.exists, this throws an exception if there is an error + checking if the file exists (for example, if there is a perms error on + the parent dir). + + Returns: + bool: True if the file exists; False if not. + """ + try: + os.stat(file_path) + return True + except OSError as e: + if e.errno != errno.ENOENT: + raise e + return False + + class Config(object): + """ + A configuration section, containing configuration keys and values. + + Attributes: + section (str): The section title of this config object, such as + "tls" or "logger". This is used to refer to it on the root + logger (for example, `config.tls.some_option`). Must be + defined in subclasses. + """ + + section = None + + def __init__(self, root_config=None): + self.root = root_config + + def __getattr__(self, item: str) -> Any: + """ + Try and fetch a configuration option that does not exist on this class. + + This is so that existing configs that rely on `self.value`, where value + is actually from a different config section, continue to work. + """ + if item in ["generate_config_section", "read_config"]: + raise AttributeError(item) + + if self.root is None: + raise AttributeError(item) + else: + return self.root._get_unclassed_config(self.section, item) + @staticmethod def parse_size(value): if isinstance(value, integer_types): @@ -88,22 +139,7 @@ class Config(object): @classmethod def path_exists(cls, file_path): - """Check if a file exists - - Unlike os.path.exists, this throws an exception if there is an error - checking if the file exists (for example, if there is a perms error on - the parent dir). - - Returns: - bool: True if the file exists; False if not. - """ - try: - os.stat(file_path) - return True - except OSError as e: - if e.errno != errno.ENOENT: - raise e - return False + return path_exists(file_path) @classmethod def check_file(cls, file_path, config_name): @@ -136,42 +172,106 @@ class Config(object): with open(file_path) as file_stream: return file_stream.read() - def invoke_all(self, name, *args, **kargs): - """Invoke all instance methods with the given name and arguments in the - class's MRO. + +class RootConfig(object): + """ + Holder of an application's configuration. + + What configuration this object holds is defined by `config_classes`, a list + of Config classes that will be instantiated and given the contents of a + configuration file to read. They can then be accessed on this class by their + section name, defined in the Config or dynamically set to be the name of the + class, lower-cased and with "Config" removed. + """ + + config_classes = [] + + def __init__(self): + self._configs = OrderedDict() + + for config_class in self.config_classes: + if config_class.section is None: + raise ValueError("%r requires a section name" % (config_class,)) + + try: + conf = config_class(self) + except Exception as e: + raise Exception("Failed making %s: %r" % (config_class.section, e)) + self._configs[config_class.section] = conf + + def __getattr__(self, item: str) -> Any: + """ + Redirect lookups on this object either to config objects, or values on + config objects, so that `config.tls.blah` works, as well as legacy uses + of things like `config.server_name`. It will first look up the config + section name, and then values on those config classes. + """ + if item in self._configs.keys(): + return self._configs[item] + + return self._get_unclassed_config(None, item) + + def _get_unclassed_config(self, asking_section: Optional[str], item: str): + """ + Fetch a config value from one of the instantiated config classes that + has not been fetched directly. Args: - name (str): Name of function to invoke + asking_section: If this check is coming from a Config child, which + one? This section will not be asked if it has the value. + item: The configuration value key. + + Raises: + AttributeError if no config classes have the config key. The body + will contain what sections were checked. + """ + for key, val in self._configs.items(): + if key == asking_section: + continue + + if item in dir(val): + return getattr(val, item) + + raise AttributeError(item, "not found in %s" % (list(self._configs.keys()),)) + + def invoke_all(self, func_name: str, *args, **kwargs) -> MutableMapping[str, Any]: + """ + Invoke a function on all instantiated config objects this RootConfig is + configured to use. + + Args: + func_name: Name of function to invoke *args **kwargs - Returns: - list: The list of the return values from each method called + ordered dictionary of config section name and the result of the + function from it. """ - results = [] - for cls in type(self).mro(): - if name in cls.__dict__: - results.append(getattr(cls, name)(self, *args, **kargs)) - return results + res = OrderedDict() + + for name, config in self._configs.items(): + if hasattr(config, func_name): + res[name] = getattr(config, func_name)(*args, **kwargs) + + return res @classmethod - def invoke_all_static(cls, name, *args, **kargs): - """Invoke all static methods with the given name and arguments in the - class's MRO. + def invoke_all_static(cls, func_name: str, *args, **kwargs): + """ + Invoke a static function on config objects this RootConfig is + configured to use. Args: - name (str): Name of function to invoke + func_name: Name of function to invoke *args **kwargs - Returns: - list: The list of the return values from each method called + ordered dictionary of config section name and the result of the + function from it. """ - results = [] - for c in cls.mro(): - if name in c.__dict__: - results.append(getattr(c, name)(*args, **kargs)) - return results + for config in cls.config_classes: + if hasattr(config, func_name): + getattr(config, func_name)(*args, **kwargs) def generate_config( self, @@ -187,7 +287,8 @@ class Config(object): tls_private_key_path=None, acme_domain=None, ): - """Build a default configuration file + """ + Build a default configuration file This is used when the user explicitly asks us to generate a config file (eg with --generate_config). @@ -242,6 +343,7 @@ class Config(object): Returns: str: the yaml config file """ + return "\n\n".join( dedent(conf) for conf in self.invoke_all( @@ -257,7 +359,7 @@ class Config(object): tls_certificate_path=tls_certificate_path, tls_private_key_path=tls_private_key_path, acme_domain=acme_domain, - ) + ).values() ) @classmethod @@ -444,7 +546,7 @@ class Config(object): ) (config_path,) = config_files - if not cls.path_exists(config_path): + if not path_exists(config_path): print("Generating config file %s" % (config_path,)) if config_args.data_directory: @@ -469,7 +571,7 @@ class Config(object): open_private_ports=config_args.open_private_ports, ) - if not cls.path_exists(config_dir_path): + if not path_exists(config_dir_path): os.makedirs(config_dir_path) with open(config_path, "w") as config_file: config_file.write("# vim:ft=yaml\n\n") @@ -518,7 +620,7 @@ class Config(object): return obj - def parse_config_dict(self, config_dict, config_dir_path, data_dir_path): + def parse_config_dict(self, config_dict, config_dir_path=None, data_dir_path=None): """Read the information from the config dict into this Config object. Args: @@ -607,3 +709,6 @@ def find_config_files(search_paths): else: config_files.append(config_path) return config_files + + +__all__ = ["Config", "RootConfig"] diff --git a/synapse/config/_base.pyi b/synapse/config/_base.pyi new file mode 100644 index 0000000000..86bc965ee4 --- /dev/null +++ b/synapse/config/_base.pyi @@ -0,0 +1,135 @@ +from typing import Any, List, Optional + +from synapse.config import ( + api, + appservice, + captcha, + cas, + consent_config, + database, + emailconfig, + groups, + jwt_config, + key, + logger, + metrics, + password, + password_auth_providers, + push, + ratelimiting, + registration, + repository, + room_directory, + saml2_config, + server, + server_notices_config, + spam_checker, + stats, + third_party_event_rules, + tls, + tracer, + user_directory, + voip, + workers, +) + +class ConfigError(Exception): ... + +MISSING_REPORT_STATS_CONFIG_INSTRUCTIONS: str +MISSING_REPORT_STATS_SPIEL: str +MISSING_SERVER_NAME: str + +def path_exists(file_path: str): ... + +class RootConfig: + server: server.ServerConfig + tls: tls.TlsConfig + database: database.DatabaseConfig + logging: logger.LoggingConfig + ratelimit: ratelimiting.RatelimitConfig + media: repository.ContentRepositoryConfig + captcha: captcha.CaptchaConfig + voip: voip.VoipConfig + registration: registration.RegistrationConfig + metrics: metrics.MetricsConfig + api: api.ApiConfig + appservice: appservice.AppServiceConfig + key: key.KeyConfig + saml2: saml2_config.SAML2Config + cas: cas.CasConfig + jwt: jwt_config.JWTConfig + password: password.PasswordConfig + email: emailconfig.EmailConfig + worker: workers.WorkerConfig + authproviders: password_auth_providers.PasswordAuthProviderConfig + push: push.PushConfig + spamchecker: spam_checker.SpamCheckerConfig + groups: groups.GroupsConfig + userdirectory: user_directory.UserDirectoryConfig + consent: consent_config.ConsentConfig + stats: stats.StatsConfig + servernotices: server_notices_config.ServerNoticesConfig + roomdirectory: room_directory.RoomDirectoryConfig + thirdpartyrules: third_party_event_rules.ThirdPartyRulesConfig + tracer: tracer.TracerConfig + + config_classes: List = ... + def __init__(self) -> None: ... + def invoke_all(self, func_name: str, *args: Any, **kwargs: Any): ... + @classmethod + def invoke_all_static(cls, func_name: str, *args: Any, **kwargs: Any) -> None: ... + def __getattr__(self, item: str): ... + def parse_config_dict( + self, + config_dict: Any, + config_dir_path: Optional[Any] = ..., + data_dir_path: Optional[Any] = ..., + ) -> None: ... + read_config: Any = ... + def generate_config( + self, + config_dir_path: str, + data_dir_path: str, + server_name: str, + generate_secrets: bool = ..., + report_stats: Optional[str] = ..., + open_private_ports: bool = ..., + listeners: Optional[Any] = ..., + database_conf: Optional[Any] = ..., + tls_certificate_path: Optional[str] = ..., + tls_private_key_path: Optional[str] = ..., + acme_domain: Optional[str] = ..., + ): ... + @classmethod + def load_or_generate_config(cls, description: Any, argv: Any): ... + @classmethod + def load_config(cls, description: Any, argv: Any): ... + @classmethod + def add_arguments_to_parser(cls, config_parser: Any) -> None: ... + @classmethod + def load_config_with_parser(cls, parser: Any, argv: Any): ... + def generate_missing_files( + self, config_dict: dict, config_dir_path: str + ) -> None: ... + +class Config: + root: RootConfig + def __init__(self, root_config: Optional[RootConfig] = ...) -> None: ... + def __getattr__(self, item: str, from_root: bool = ...): ... + @staticmethod + def parse_size(value: Any): ... + @staticmethod + def parse_duration(value: Any): ... + @staticmethod + def abspath(file_path: Optional[str]): ... + @classmethod + def path_exists(cls, file_path: str): ... + @classmethod + def check_file(cls, file_path: str, config_name: str): ... + @classmethod + def ensure_directory(cls, dir_path: str): ... + @classmethod + def read_file(cls, file_path: str, config_name: str): ... + +def read_config_files(config_files: List[str]): ... +def find_config_files(search_paths: List[str]): ... diff --git a/synapse/config/api.py b/synapse/config/api.py index dddea79a8a..74cd53a8ed 100644 --- a/synapse/config/api.py +++ b/synapse/config/api.py @@ -18,6 +18,8 @@ from ._base import Config class ApiConfig(Config): + section = "api" + def read_config(self, config, **kwargs): self.room_invite_state_types = config.get( "room_invite_state_types", diff --git a/synapse/config/appservice.py b/synapse/config/appservice.py index 28d36b1bc3..9b4682222d 100644 --- a/synapse/config/appservice.py +++ b/synapse/config/appservice.py @@ -30,6 +30,8 @@ logger = logging.getLogger(__name__) class AppServiceConfig(Config): + section = "appservice" + def read_config(self, config, **kwargs): self.app_service_config_files = config.get("app_service_config_files", []) self.notify_appservices = config.get("notify_appservices", True) diff --git a/synapse/config/captcha.py b/synapse/config/captcha.py index 8dac8152cf..44bd5c6799 100644 --- a/synapse/config/captcha.py +++ b/synapse/config/captcha.py @@ -16,6 +16,8 @@ from ._base import Config class CaptchaConfig(Config): + section = "captcha" + def read_config(self, config, **kwargs): self.recaptcha_private_key = config.get("recaptcha_private_key") self.recaptcha_public_key = config.get("recaptcha_public_key") diff --git a/synapse/config/cas.py b/synapse/config/cas.py index ebe34d933b..b916c3aa66 100644 --- a/synapse/config/cas.py +++ b/synapse/config/cas.py @@ -22,6 +22,8 @@ class CasConfig(Config): cas_server_url: URL of CAS server """ + section = "cas" + def read_config(self, config, **kwargs): cas_config = config.get("cas_config", None) if cas_config: diff --git a/synapse/config/consent_config.py b/synapse/config/consent_config.py index 48976e17b1..62c4c44d60 100644 --- a/synapse/config/consent_config.py +++ b/synapse/config/consent_config.py @@ -73,6 +73,9 @@ DEFAULT_CONFIG = """\ class ConsentConfig(Config): + + section = "consent" + def __init__(self, *args): super(ConsentConfig, self).__init__(*args) diff --git a/synapse/config/database.py b/synapse/config/database.py index 118aafbd4a..0e2509f0b1 100644 --- a/synapse/config/database.py +++ b/synapse/config/database.py @@ -21,6 +21,8 @@ from ._base import Config class DatabaseConfig(Config): + section = "database" + def read_config(self, config, **kwargs): self.event_cache_size = self.parse_size(config.get("event_cache_size", "10K")) diff --git a/synapse/config/emailconfig.py b/synapse/config/emailconfig.py index d9b43de660..658897a77e 100644 --- a/synapse/config/emailconfig.py +++ b/synapse/config/emailconfig.py @@ -28,6 +28,8 @@ from ._base import Config, ConfigError class EmailConfig(Config): + section = "email" + def read_config(self, config, **kwargs): # TODO: We should separate better the email configuration from the notification # and account validity config. diff --git a/synapse/config/groups.py b/synapse/config/groups.py index 2a522b5f44..d6862d9a64 100644 --- a/synapse/config/groups.py +++ b/synapse/config/groups.py @@ -17,6 +17,8 @@ from ._base import Config class GroupsConfig(Config): + section = "groups" + def read_config(self, config, **kwargs): self.enable_group_creation = config.get("enable_group_creation", False) self.group_creation_prefix = config.get("group_creation_prefix", "") diff --git a/synapse/config/homeserver.py b/synapse/config/homeserver.py index 72acad4f18..6e348671c7 100644 --- a/synapse/config/homeserver.py +++ b/synapse/config/homeserver.py @@ -14,6 +14,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +from ._base import RootConfig from .api import ApiConfig from .appservice import AppServiceConfig from .captcha import CaptchaConfig @@ -46,36 +47,37 @@ from .voip import VoipConfig from .workers import WorkerConfig -class HomeServerConfig( - ServerConfig, - TlsConfig, - DatabaseConfig, - LoggingConfig, - RatelimitConfig, - ContentRepositoryConfig, - CaptchaConfig, - VoipConfig, - RegistrationConfig, - MetricsConfig, - ApiConfig, - AppServiceConfig, - KeyConfig, - SAML2Config, - CasConfig, - JWTConfig, - PasswordConfig, - EmailConfig, - WorkerConfig, - PasswordAuthProviderConfig, - PushConfig, - SpamCheckerConfig, - GroupsConfig, - UserDirectoryConfig, - ConsentConfig, - StatsConfig, - ServerNoticesConfig, - RoomDirectoryConfig, - ThirdPartyRulesConfig, - TracerConfig, -): - pass +class HomeServerConfig(RootConfig): + + config_classes = [ + ServerConfig, + TlsConfig, + DatabaseConfig, + LoggingConfig, + RatelimitConfig, + ContentRepositoryConfig, + CaptchaConfig, + VoipConfig, + RegistrationConfig, + MetricsConfig, + ApiConfig, + AppServiceConfig, + KeyConfig, + SAML2Config, + CasConfig, + JWTConfig, + PasswordConfig, + EmailConfig, + WorkerConfig, + PasswordAuthProviderConfig, + PushConfig, + SpamCheckerConfig, + GroupsConfig, + UserDirectoryConfig, + ConsentConfig, + StatsConfig, + ServerNoticesConfig, + RoomDirectoryConfig, + ThirdPartyRulesConfig, + TracerConfig, + ] diff --git a/synapse/config/jwt_config.py b/synapse/config/jwt_config.py index 36d87cef03..a568726985 100644 --- a/synapse/config/jwt_config.py +++ b/synapse/config/jwt_config.py @@ -23,6 +23,8 @@ MISSING_JWT = """Missing jwt library. This is required for jwt login. class JWTConfig(Config): + section = "jwt" + def read_config(self, config, **kwargs): jwt_config = config.get("jwt_config", None) if jwt_config: diff --git a/synapse/config/key.py b/synapse/config/key.py index f039f96e9c..ec5d430afb 100644 --- a/synapse/config/key.py +++ b/synapse/config/key.py @@ -92,6 +92,8 @@ class TrustedKeyServer(object): class KeyConfig(Config): + section = "key" + def read_config(self, config, config_dir_path, **kwargs): # the signing key can be specified inline or in a separate file if "signing_key" in config: diff --git a/synapse/config/logger.py b/synapse/config/logger.py index 767ecfdf09..d609ec111b 100644 --- a/synapse/config/logger.py +++ b/synapse/config/logger.py @@ -84,6 +84,8 @@ root: class LoggingConfig(Config): + section = "logging" + def read_config(self, config, **kwargs): self.log_config = self.abspath(config.get("log_config")) self.no_redirect_stdio = config.get("no_redirect_stdio", False) diff --git a/synapse/config/metrics.py b/synapse/config/metrics.py index ec35a6b868..282a43bddb 100644 --- a/synapse/config/metrics.py +++ b/synapse/config/metrics.py @@ -34,6 +34,8 @@ class MetricsFlags(object): class MetricsConfig(Config): + section = "metrics" + def read_config(self, config, **kwargs): self.enable_metrics = config.get("enable_metrics", False) self.report_stats = config.get("report_stats", None) diff --git a/synapse/config/password.py b/synapse/config/password.py index d5b5953f2f..2a634ac751 100644 --- a/synapse/config/password.py +++ b/synapse/config/password.py @@ -20,6 +20,8 @@ class PasswordConfig(Config): """Password login configuration """ + section = "password" + def read_config(self, config, **kwargs): password_config = config.get("password_config", {}) if password_config is None: diff --git a/synapse/config/password_auth_providers.py b/synapse/config/password_auth_providers.py index c50e244394..9746bbc681 100644 --- a/synapse/config/password_auth_providers.py +++ b/synapse/config/password_auth_providers.py @@ -23,6 +23,8 @@ LDAP_PROVIDER = "ldap_auth_provider.LdapAuthProvider" class PasswordAuthProviderConfig(Config): + section = "authproviders" + def read_config(self, config, **kwargs): self.password_providers = [] # type: List[Any] providers = [] diff --git a/synapse/config/push.py b/synapse/config/push.py index 1b932722a5..0910958649 100644 --- a/synapse/config/push.py +++ b/synapse/config/push.py @@ -18,6 +18,8 @@ from ._base import Config class PushConfig(Config): + section = "push" + def read_config(self, config, **kwargs): push_config = config.get("push", {}) self.push_include_content = push_config.get("include_content", True) diff --git a/synapse/config/ratelimiting.py b/synapse/config/ratelimiting.py index 587e2862b7..947f653e03 100644 --- a/synapse/config/ratelimiting.py +++ b/synapse/config/ratelimiting.py @@ -36,6 +36,8 @@ class FederationRateLimitConfig(object): class RatelimitConfig(Config): + section = "ratelimiting" + def read_config(self, config, **kwargs): # Load the new-style messages config if it exists. Otherwise fall back diff --git a/synapse/config/registration.py b/synapse/config/registration.py index bef89e2bf4..b3e3e6dda2 100644 --- a/synapse/config/registration.py +++ b/synapse/config/registration.py @@ -24,6 +24,8 @@ from synapse.util.stringutils import random_string_with_symbols class AccountValidityConfig(Config): + section = "accountvalidity" + def __init__(self, config, synapse_config): self.enabled = config.get("enabled", False) self.renew_by_email_enabled = "renew_at" in config @@ -77,6 +79,8 @@ class AccountValidityConfig(Config): class RegistrationConfig(Config): + section = "registration" + def read_config(self, config, **kwargs): self.enable_registration = bool( strtobool(str(config.get("enable_registration", False))) diff --git a/synapse/config/repository.py b/synapse/config/repository.py index 14740891f3..d0205e14b9 100644 --- a/synapse/config/repository.py +++ b/synapse/config/repository.py @@ -78,6 +78,8 @@ def parse_thumbnail_requirements(thumbnail_sizes): class ContentRepositoryConfig(Config): + section = "media" + def read_config(self, config, **kwargs): # Only enable the media repo if either the media repo is enabled or the diff --git a/synapse/config/room_directory.py b/synapse/config/room_directory.py index a92693017b..7c9f05bde4 100644 --- a/synapse/config/room_directory.py +++ b/synapse/config/room_directory.py @@ -19,6 +19,8 @@ from ._base import Config, ConfigError class RoomDirectoryConfig(Config): + section = "roomdirectory" + def read_config(self, config, **kwargs): self.enable_room_list_search = config.get("enable_room_list_search", True) diff --git a/synapse/config/saml2_config.py b/synapse/config/saml2_config.py index ab34b41ca8..c407e13680 100644 --- a/synapse/config/saml2_config.py +++ b/synapse/config/saml2_config.py @@ -55,6 +55,8 @@ def _dict_merge(merge_dict, into_dict): class SAML2Config(Config): + section = "saml2" + def read_config(self, config, **kwargs): self.saml2_enabled = False diff --git a/synapse/config/server.py b/synapse/config/server.py index 709bd387e5..afc4d6a4ab 100644 --- a/synapse/config/server.py +++ b/synapse/config/server.py @@ -58,6 +58,8 @@ on how to configure the new listener. class ServerConfig(Config): + section = "server" + def read_config(self, config, **kwargs): self.server_name = config["server_name"] self.server_context = config.get("server_context", None) diff --git a/synapse/config/server_notices_config.py b/synapse/config/server_notices_config.py index 6d4285ef93..6ea2ea8869 100644 --- a/synapse/config/server_notices_config.py +++ b/synapse/config/server_notices_config.py @@ -59,6 +59,8 @@ class ServerNoticesConfig(Config): None if server notices are not enabled. """ + section = "servernotices" + def __init__(self, *args): super(ServerNoticesConfig, self).__init__(*args) self.server_notices_mxid = None diff --git a/synapse/config/spam_checker.py b/synapse/config/spam_checker.py index e40797ab50..36e0ddab5c 100644 --- a/synapse/config/spam_checker.py +++ b/synapse/config/spam_checker.py @@ -19,6 +19,8 @@ from ._base import Config class SpamCheckerConfig(Config): + section = "spamchecker" + def read_config(self, config, **kwargs): self.spam_checker = None diff --git a/synapse/config/stats.py b/synapse/config/stats.py index b18ddbd1fa..62485189ea 100644 --- a/synapse/config/stats.py +++ b/synapse/config/stats.py @@ -25,6 +25,8 @@ class StatsConfig(Config): Configuration for the behaviour of synapse's stats engine """ + section = "stats" + def read_config(self, config, **kwargs): self.stats_enabled = True self.stats_bucket_size = 86400 * 1000 diff --git a/synapse/config/third_party_event_rules.py b/synapse/config/third_party_event_rules.py index b3431441b9..10a99c792e 100644 --- a/synapse/config/third_party_event_rules.py +++ b/synapse/config/third_party_event_rules.py @@ -19,6 +19,8 @@ from ._base import Config class ThirdPartyRulesConfig(Config): + section = "thirdpartyrules" + def read_config(self, config, **kwargs): self.third_party_event_rules = None diff --git a/synapse/config/tls.py b/synapse/config/tls.py index fc47ba3e9a..f06341eb67 100644 --- a/synapse/config/tls.py +++ b/synapse/config/tls.py @@ -18,6 +18,7 @@ import os import warnings from datetime import datetime from hashlib import sha256 +from typing import List import six @@ -33,7 +34,9 @@ logger = logging.getLogger(__name__) class TlsConfig(Config): - def read_config(self, config, config_dir_path, **kwargs): + section = "tls" + + def read_config(self, config: dict, config_dir_path: str, **kwargs): acme_config = config.get("acme", None) if acme_config is None: @@ -57,7 +60,7 @@ class TlsConfig(Config): self.tls_certificate_file = self.abspath(config.get("tls_certificate_path")) self.tls_private_key_file = self.abspath(config.get("tls_private_key_path")) - if self.has_tls_listener(): + if self.root.server.has_tls_listener(): if not self.tls_certificate_file: raise ConfigError( "tls_certificate_path must be specified if TLS-enabled listeners are " @@ -108,7 +111,7 @@ class TlsConfig(Config): ) # Support globs (*) in whitelist values - self.federation_certificate_verification_whitelist = [] + self.federation_certificate_verification_whitelist = [] # type: List[str] for entry in fed_whitelist_entries: try: entry_regex = glob_to_regex(entry.encode("ascii").decode("ascii")) diff --git a/synapse/config/tracer.py b/synapse/config/tracer.py index 85d99a3166..8be1346113 100644 --- a/synapse/config/tracer.py +++ b/synapse/config/tracer.py @@ -19,6 +19,8 @@ from ._base import Config, ConfigError class TracerConfig(Config): + section = "tracing" + def read_config(self, config, **kwargs): opentracing_config = config.get("opentracing") if opentracing_config is None: diff --git a/synapse/config/user_directory.py b/synapse/config/user_directory.py index f6313e17d4..c8d19c5d6b 100644 --- a/synapse/config/user_directory.py +++ b/synapse/config/user_directory.py @@ -21,6 +21,8 @@ class UserDirectoryConfig(Config): Configuration for the behaviour of the /user_directory API """ + section = "userdirectory" + def read_config(self, config, **kwargs): self.user_directory_search_enabled = True self.user_directory_search_all_users = False diff --git a/synapse/config/voip.py b/synapse/config/voip.py index 2ca0e1cf70..a68a3068aa 100644 --- a/synapse/config/voip.py +++ b/synapse/config/voip.py @@ -16,6 +16,8 @@ from ._base import Config class VoipConfig(Config): + section = "voip" + def read_config(self, config, **kwargs): self.turn_uris = config.get("turn_uris", []) self.turn_shared_secret = config.get("turn_shared_secret") diff --git a/synapse/config/workers.py b/synapse/config/workers.py index 1ec4998625..fef72ed974 100644 --- a/synapse/config/workers.py +++ b/synapse/config/workers.py @@ -21,6 +21,8 @@ class WorkerConfig(Config): They have their own pid_file and listener configuration. They use the replication_url to talk to the main synapse process.""" + section = "worker" + def read_config(self, config, **kwargs): self.worker_app = config.get("worker_app") diff --git a/tests/config/test_tls.py b/tests/config/test_tls.py index b02780772a..1be6ff563b 100644 --- a/tests/config/test_tls.py +++ b/tests/config/test_tls.py @@ -21,17 +21,24 @@ import yaml from OpenSSL import SSL +from synapse.config._base import Config, RootConfig from synapse.config.tls import ConfigError, TlsConfig from synapse.crypto.context_factory import ClientTLSOptionsFactory from tests.unittest import TestCase -class TestConfig(TlsConfig): +class FakeServer(Config): + section = "server" + def has_tls_listener(self): return False +class TestConfig(RootConfig): + config_classes = [FakeServer, TlsConfig] + + class TLSConfigTests(TestCase): def test_warn_self_signed(self): """ @@ -202,13 +209,13 @@ s4niecZKPBizL6aucT59CsunNmmb5Glq8rlAcU+1ZTZZzGYqVYhF6axB9Qg= conf = TestConfig() conf.read_config( yaml.safe_load( - TestConfig().generate_config_section( + TestConfig().generate_config( "/config_dir_path", "my_super_secure_server", "/data_dir_path", - "/tls_cert_path", - "tls_private_key", - None, # This is the acme_domain + tls_certificate_path="/tls_cert_path", + tls_private_key_path="tls_private_key", + acme_domain=None, # This is the acme_domain ) ), "/config_dir_path", @@ -223,13 +230,13 @@ s4niecZKPBizL6aucT59CsunNmmb5Glq8rlAcU+1ZTZZzGYqVYhF6axB9Qg= conf = TestConfig() conf.read_config( yaml.safe_load( - TestConfig().generate_config_section( + TestConfig().generate_config( "/config_dir_path", "my_super_secure_server", "/data_dir_path", - "/tls_cert_path", - "tls_private_key", - "my_supe_secure_server", # This is the acme_domain + tls_certificate_path="/tls_cert_path", + tls_private_key_path="tls_private_key", + acme_domain="my_supe_secure_server", # This is the acme_domain ) ), "/config_dir_path", diff --git a/tox.ini b/tox.ini index 1bce10a4ce..367cc2ccf2 100644 --- a/tox.ini +++ b/tox.ini @@ -163,10 +163,9 @@ deps = {[base]deps} mypy mypy-zope - typeshed env = MYPYPATH = stubs/ extras = all -commands = mypy --show-traceback \ +commands = mypy --show-traceback --check-untyped-defs --show-error-codes --follow-imports=normal \ synapse/logging/ \ synapse/config/ From da815c1f695ceca56643d7814c96f7a3cfa3c70a Mon Sep 17 00:00:00 2001 From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> Date: Thu, 10 Oct 2019 10:06:45 +0100 Subject: [PATCH 79/85] Move tag/push rules room upgrade checking ealier (#6155) It turns out that _local_membership_update doesn't run when you join a new, remote room. It only runs if you're joining a room that your server already knows about. This would explain #4703 and #5295 and why the transfer would work in testing and some rooms, but not others. This would especially hit single-user homeservers. The check has been moved to right after the room has been joined, and works much more reliably. (Though it may still be a bit awkward of a place). --- changelog.d/6155.bugfix | 1 + synapse/handlers/room_member.py | 62 +++++++++++++++++++++++---------- 2 files changed, 45 insertions(+), 18 deletions(-) create mode 100644 changelog.d/6155.bugfix diff --git a/changelog.d/6155.bugfix b/changelog.d/6155.bugfix new file mode 100644 index 0000000000..e32c0dce09 --- /dev/null +++ b/changelog.d/6155.bugfix @@ -0,0 +1 @@ +Fix transferring notifications and tags when joining an upgraded room that is new to your server. \ No newline at end of file diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py index 95a244d86c..380e2fad5e 100644 --- a/synapse/handlers/room_member.py +++ b/synapse/handlers/room_member.py @@ -203,23 +203,11 @@ class RoomMemberHandler(object): prev_member_event = yield self.store.get_event(prev_member_event_id) newly_joined = prev_member_event.membership != Membership.JOIN if newly_joined: + # Copy over user state if we're joining an upgraded room + yield self.copy_user_state_if_room_upgrade( + room_id, requester.user.to_string() + ) yield self._user_joined_room(target, room_id) - - # Copy over direct message status and room tags if this is a join - # on an upgraded room - - # Check if this is an upgraded room - predecessor = yield self.store.get_room_predecessor(room_id) - - if predecessor: - # It is an upgraded room. Copy over old tags - self.copy_room_tags_and_direct_to_room( - predecessor["room_id"], room_id, user_id - ) - # Copy over push rules - yield self.store.copy_push_rules_from_room_to_room_for_user( - predecessor["room_id"], room_id, user_id - ) elif event.membership == Membership.LEAVE: if prev_member_event_id: prev_member_event = yield self.store.get_event(prev_member_event_id) @@ -463,10 +451,16 @@ class RoomMemberHandler(object): if requester.is_guest: content["kind"] = "guest" - ret = yield self._remote_join( + remote_join_response = yield self._remote_join( requester, remote_room_hosts, room_id, target, content ) - return ret + + # Copy over user state if this is a join on an remote upgraded room + yield self.copy_user_state_if_room_upgrade( + room_id, requester.user.to_string() + ) + + return remote_join_response elif effective_membership_state == Membership.LEAVE: if not is_host_in_room: @@ -503,6 +497,38 @@ class RoomMemberHandler(object): ) return res + @defer.inlineCallbacks + def copy_user_state_if_room_upgrade(self, new_room_id, user_id): + """Copy user-specific information when they join a new room if that new room is the + result of a room upgrade + + Args: + new_room_id (str): The ID of the room the user is joining + user_id (str): The ID of the user + + Returns: + Deferred + """ + # Check if the new room is an upgraded room + predecessor = yield self.store.get_room_predecessor(new_room_id) + if not predecessor: + return + + logger.debug( + "Found predecessor for %s: %s. Copying over room tags and push " "rules", + new_room_id, + predecessor, + ) + + # It is an upgraded room. Copy over old tags + yield self.copy_room_tags_and_direct_to_room( + predecessor["room_id"], new_room_id, user_id + ) + # Copy over push rules + yield self.store.copy_push_rules_from_room_to_room_for_user( + predecessor["room_id"], new_room_id, user_id + ) + @defer.inlineCallbacks def send_membership_event(self, requester, event, context, ratelimit=True): """ From 562b4e51dd0e7d4a6f776502b9ac357ed3428445 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Thu, 10 Oct 2019 11:28:23 +0100 Subject: [PATCH 80/85] Rewrite the user_filter migration again (#6184) you can't plausibly ALTER TABLE in sqlite, so we create the new table with the right schema to start with. --- changelog.d/6184.misc | 1 + .../delta/56/unique_user_filter_index.py | 58 ++++++++++--------- 2 files changed, 33 insertions(+), 26 deletions(-) create mode 100644 changelog.d/6184.misc diff --git a/changelog.d/6184.misc b/changelog.d/6184.misc new file mode 100644 index 0000000000..30b3e56082 --- /dev/null +++ b/changelog.d/6184.misc @@ -0,0 +1 @@ +Update `user_filters` table to have a unique index, and non-null columns. Thanks to @pik for contributing this. \ No newline at end of file diff --git a/synapse/storage/schema/delta/56/unique_user_filter_index.py b/synapse/storage/schema/delta/56/unique_user_filter_index.py index 60031f23ca..1de8b54961 100644 --- a/synapse/storage/schema/delta/56/unique_user_filter_index.py +++ b/synapse/storage/schema/delta/56/unique_user_filter_index.py @@ -5,42 +5,48 @@ from synapse.storage.engines import PostgresEngine logger = logging.getLogger(__name__) +""" +This migration updates the user_filters table as follows: + + - drops any (user_id, filter_id) duplicates + - makes the columns NON-NULLable + - turns the index into a UNIQUE index +""" + + def run_upgrade(cur, database_engine, *args, **kwargs): + pass + + +def run_create(cur, database_engine, *args, **kwargs): if isinstance(database_engine, PostgresEngine): select_clause = """ - CREATE TEMPORARY TABLE user_filters_migration AS SELECT DISTINCT ON (user_id, filter_id) user_id, filter_id, filter_json - FROM user_filters; + FROM user_filters """ else: select_clause = """ - CREATE TEMPORARY TABLE user_filters_migration AS - SELECT * FROM user_filters GROUP BY user_id, filter_id; + SELECT * FROM user_filters GROUP BY user_id, filter_id """ - sql = ( - """ - BEGIN; - %s - DROP INDEX user_filters_by_user_id_filter_id; - DELETE FROM user_filters; - ALTER TABLE user_filters - ALTER COLUMN user_id SET NOT NULL, - ALTER COLUMN filter_id SET NOT NULL, - ALTER COLUMN filter_json SET NOT NULL; - INSERT INTO user_filters(user_id, filter_id, filter_json) - SELECT * FROM user_filters_migration; - DROP TABLE user_filters_migration; - CREATE UNIQUE INDEX user_filters_by_user_id_filter_id_unique - ON user_filters(user_id, filter_id); - END; - """ - % select_clause + sql = """ + DROP TABLE IF EXISTS user_filters_migration; + DROP INDEX IF EXISTS user_filters_unique; + CREATE TABLE user_filters_migration ( + user_id TEXT NOT NULL, + filter_id BIGINT NOT NULL, + filter_json BYTEA NOT NULL + ); + INSERT INTO user_filters_migration (user_id, filter_id, filter_json) + %s; + CREATE UNIQUE INDEX user_filters_unique ON user_filters_migration + (user_id, filter_id); + DROP TABLE user_filters; + ALTER TABLE user_filters_migration RENAME TO user_filters; + """ % ( + select_clause, ) + if isinstance(database_engine, PostgresEngine): cur.execute(sql) else: cur.executescript(sql) - - -def run_create(cur, database_engine, *args, **kwargs): - pass From a139420a3cfda6a4a4ee4750611b31dd71fc33f3 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Thu, 10 Oct 2019 11:29:01 +0100 Subject: [PATCH 81/85] Fix races in room stats (and other) updates. (#6187) Hopefully this will fix the occasional failures we were seeing in the room directory. The problem was that events are not necessarily persisted (and `current_state_delta_stream` updated) in the same order as their stream_id. So for instance current_state_delta 9 might be persisted *before* current_state_delta 8. Then, when the room stats saw stream_id 9, it assumed it had done everything up to 9, and never came back to do stream_id 8. We can solve this easily by only processing up to the stream_id where we know all events have been persisted. --- changelog.d/6187.bugfix | 1 + synapse/handlers/presence.py | 16 +++++++++---- synapse/handlers/stats.py | 12 ++++++---- synapse/handlers/user_directory.py | 17 +++++++++---- synapse/storage/state_deltas.py | 38 +++++++++++++++++++++++------- tests/handlers/test_typing.py | 2 +- tests/rest/admin/test_admin.py | 2 +- 7 files changed, 63 insertions(+), 25 deletions(-) create mode 100644 changelog.d/6187.bugfix diff --git a/changelog.d/6187.bugfix b/changelog.d/6187.bugfix new file mode 100644 index 0000000000..6142c5b98d --- /dev/null +++ b/changelog.d/6187.bugfix @@ -0,0 +1 @@ +Fix occasional missed updates in the room and user directories. \ No newline at end of file diff --git a/synapse/handlers/presence.py b/synapse/handlers/presence.py index 053cf66b28..2a5f1a007d 100644 --- a/synapse/handlers/presence.py +++ b/synapse/handlers/presence.py @@ -803,17 +803,25 @@ class PresenceHandler(object): # Loop round handling deltas until we're up to date while True: with Measure(self.clock, "presence_delta"): - deltas = yield self.store.get_current_state_deltas(self._event_pos) - if not deltas: + room_max_stream_ordering = self.store.get_room_max_stream_ordering() + if self._event_pos == room_max_stream_ordering: return + logger.debug( + "Processing presence stats %s->%s", + self._event_pos, + room_max_stream_ordering, + ) + max_pos, deltas = yield self.store.get_current_state_deltas( + self._event_pos, room_max_stream_ordering + ) yield self._handle_state_delta(deltas) - self._event_pos = deltas[-1]["stream_id"] + self._event_pos = max_pos # Expose current event processing position to prometheus synapse.metrics.event_processing_positions.labels("presence").set( - self._event_pos + max_pos ) @defer.inlineCallbacks diff --git a/synapse/handlers/stats.py b/synapse/handlers/stats.py index c62b113115..466daf9202 100644 --- a/synapse/handlers/stats.py +++ b/synapse/handlers/stats.py @@ -87,21 +87,23 @@ class StatsHandler(StateDeltasHandler): # Be sure to read the max stream_ordering *before* checking if there are any outstanding # deltas, since there is otherwise a chance that we could miss updates which arrive # after we check the deltas. - room_max_stream_ordering = yield self.store.get_room_max_stream_ordering() + room_max_stream_ordering = self.store.get_room_max_stream_ordering() if self.pos == room_max_stream_ordering: break - deltas = yield self.store.get_current_state_deltas(self.pos) + logger.debug( + "Processing room stats %s->%s", self.pos, room_max_stream_ordering + ) + max_pos, deltas = yield self.store.get_current_state_deltas( + self.pos, room_max_stream_ordering + ) if deltas: logger.debug("Handling %d state deltas", len(deltas)) room_deltas, user_deltas = yield self._handle_deltas(deltas) - - max_pos = deltas[-1]["stream_id"] else: room_deltas = {} user_deltas = {} - max_pos = room_max_stream_ordering # Then count deltas for total_events and total_event_bytes. room_count, user_count = yield self.store.get_changes_room_total_events_and_bytes( diff --git a/synapse/handlers/user_directory.py b/synapse/handlers/user_directory.py index e53669e40d..624f05ab5b 100644 --- a/synapse/handlers/user_directory.py +++ b/synapse/handlers/user_directory.py @@ -138,21 +138,28 @@ class UserDirectoryHandler(StateDeltasHandler): # Loop round handling deltas until we're up to date while True: with Measure(self.clock, "user_dir_delta"): - deltas = yield self.store.get_current_state_deltas(self.pos) - if not deltas: + room_max_stream_ordering = self.store.get_room_max_stream_ordering() + if self.pos == room_max_stream_ordering: return + logger.debug( + "Processing user stats %s->%s", self.pos, room_max_stream_ordering + ) + max_pos, deltas = yield self.store.get_current_state_deltas( + self.pos, room_max_stream_ordering + ) + logger.info("Handling %d state deltas", len(deltas)) yield self._handle_deltas(deltas) - self.pos = deltas[-1]["stream_id"] + self.pos = max_pos # Expose current event processing position to prometheus synapse.metrics.event_processing_positions.labels("user_dir").set( - self.pos + max_pos ) - yield self.store.update_user_directory_stream_pos(self.pos) + yield self.store.update_user_directory_stream_pos(max_pos) @defer.inlineCallbacks def _handle_deltas(self, deltas): diff --git a/synapse/storage/state_deltas.py b/synapse/storage/state_deltas.py index 5fdb442104..28f33ec18f 100644 --- a/synapse/storage/state_deltas.py +++ b/synapse/storage/state_deltas.py @@ -21,7 +21,7 @@ logger = logging.getLogger(__name__) class StateDeltasStore(SQLBaseStore): - def get_current_state_deltas(self, prev_stream_id): + def get_current_state_deltas(self, prev_stream_id: int, max_stream_id: int): """Fetch a list of room state changes since the given stream id Each entry in the result contains the following fields: @@ -36,15 +36,27 @@ class StateDeltasStore(SQLBaseStore): Args: prev_stream_id (int): point to get changes since (exclusive) + max_stream_id (int): the point that we know has been correctly persisted + - ie, an upper limit to return changes from. Returns: - Deferred[list[dict]]: results + Deferred[tuple[int, list[dict]]: A tuple consisting of: + - the stream id which these results go up to + - list of current_state_delta_stream rows. If it is empty, we are + up to date. """ prev_stream_id = int(prev_stream_id) + + # check we're not going backwards + assert prev_stream_id <= max_stream_id + if not self._curr_state_delta_stream_cache.has_any_entity_changed( prev_stream_id ): - return [] + # if the CSDs haven't changed between prev_stream_id and now, we + # know for certain that they haven't changed between prev_stream_id and + # max_stream_id. + return max_stream_id, [] def get_current_state_deltas_txn(txn): # First we calculate the max stream id that will give us less than @@ -54,21 +66,29 @@ class StateDeltasStore(SQLBaseStore): sql = """ SELECT stream_id, count(*) FROM current_state_delta_stream - WHERE stream_id > ? + WHERE stream_id > ? AND stream_id <= ? GROUP BY stream_id ORDER BY stream_id ASC LIMIT 100 """ - txn.execute(sql, (prev_stream_id,)) + txn.execute(sql, (prev_stream_id, max_stream_id)) total = 0 - max_stream_id = prev_stream_id - for max_stream_id, count in txn: + + for stream_id, count in txn: total += count if total > 100: # We arbitarily limit to 100 entries to ensure we don't # select toooo many. + logger.debug( + "Clipping current_state_delta_stream rows to stream_id %i", + stream_id, + ) + clipped_stream_id = stream_id break + else: + # if there's no problem, we may as well go right up to the max_stream_id + clipped_stream_id = max_stream_id # Now actually get the deltas sql = """ @@ -77,8 +97,8 @@ class StateDeltasStore(SQLBaseStore): WHERE ? < stream_id AND stream_id <= ? ORDER BY stream_id ASC """ - txn.execute(sql, (prev_stream_id, max_stream_id)) - return self.cursor_to_dict(txn) + txn.execute(sql, (prev_stream_id, clipped_stream_id)) + return clipped_stream_id, self.cursor_to_dict(txn) return self.runInteraction( "get_current_state_deltas", get_current_state_deltas_txn diff --git a/tests/handlers/test_typing.py b/tests/handlers/test_typing.py index 1f2ef5d01f..67f1013051 100644 --- a/tests/handlers/test_typing.py +++ b/tests/handlers/test_typing.py @@ -139,7 +139,7 @@ class TypingNotificationsTestCase(unittest.HomeserverTestCase): defer.succeed(1) ) - self.datastore.get_current_state_deltas.return_value = None + self.datastore.get_current_state_deltas.return_value = (0, None) self.datastore.get_to_device_stream_token = lambda: 0 self.datastore.get_new_device_msgs_for_remote = lambda *args, **kargs: ([], 0) diff --git a/tests/rest/admin/test_admin.py b/tests/rest/admin/test_admin.py index 5877bb2133..d3a4f717f7 100644 --- a/tests/rest/admin/test_admin.py +++ b/tests/rest/admin/test_admin.py @@ -62,7 +62,7 @@ class UserRegisterTestCase(unittest.HomeserverTestCase): self.device_handler.check_device_registered = Mock(return_value="FAKE") self.datastore = Mock(return_value=Mock()) - self.datastore.get_current_state_deltas = Mock(return_value=[]) + self.datastore.get_current_state_deltas = Mock(return_value=(0, [])) self.secrets = Mock() From 0aee4900131bf97dde0f0ff5d1f7133147ff5bc7 Mon Sep 17 00:00:00 2001 From: James Date: Thu, 10 Oct 2019 21:59:06 +1100 Subject: [PATCH 82/85] Add snapcraft packaging information (#6084) --- .gitignore | 1 + changelog.d/6084.misc | 1 + snap/snapcraft.yaml | 22 ++++++++++++++++++++++ 3 files changed, 24 insertions(+) create mode 100644 changelog.d/6084.misc create mode 100644 snap/snapcraft.yaml diff --git a/.gitignore b/.gitignore index 747b8714d7..af36c00cfa 100644 --- a/.gitignore +++ b/.gitignore @@ -7,6 +7,7 @@ *.egg-info *.lock *.pyc +*.snap *.tac _trial_temp/ _trial_temp*/ diff --git a/changelog.d/6084.misc b/changelog.d/6084.misc new file mode 100644 index 0000000000..3c33701651 --- /dev/null +++ b/changelog.d/6084.misc @@ -0,0 +1 @@ +Add snapcraft packaging information. Contributed by @devec0. diff --git a/snap/snapcraft.yaml b/snap/snapcraft.yaml new file mode 100644 index 0000000000..1f7df71db2 --- /dev/null +++ b/snap/snapcraft.yaml @@ -0,0 +1,22 @@ +name: matrix-synapse +base: core18 +version: git +summary: Reference Matrix homeserver +description: | + Synapse is the reference Matrix homeserver. + Matrix is a federated and decentralised instant messaging and VoIP system. + +grade: stable +confinement: strict + +apps: + matrix-synapse: + command: synctl --no-daemonize start $SNAP_COMMON/homeserver.yaml + stop-command: synctl -c $SNAP_COMMON stop + plugs: [network-bind, network] + daemon: simple +parts: + matrix-synapse: + source: . + plugin: python + python-version: python3 From 2efd050c9db2e96fd96535dc9b1c6f54acbd163d Mon Sep 17 00:00:00 2001 From: krombel Date: Thu, 10 Oct 2019 13:59:55 +0200 Subject: [PATCH 83/85] send 404 as http-status when filter-id is unknown to the server (#2380) This fixed the weirdness of 400 vs 404 as http status code in the case the filter id is not known by the server. As e.g. matrix-js-sdk expects 404 to catch this situation this leads to unwanted behaviour. --- changelog.d/2380.bugfix | 1 + synapse/rest/client/v2_alpha/filter.py | 12 ++++--- synapse/rest/client/v2_alpha/sync.py | 41 +++++++++++++---------- tests/rest/client/v2_alpha/test_filter.py | 2 +- 4 files changed, 33 insertions(+), 23 deletions(-) create mode 100644 changelog.d/2380.bugfix diff --git a/changelog.d/2380.bugfix b/changelog.d/2380.bugfix new file mode 100644 index 0000000000..eae3206031 --- /dev/null +++ b/changelog.d/2380.bugfix @@ -0,0 +1 @@ +Return an HTTP 404 instead of 400 when requesting a filter by ID that is unknown to the server. Thanks to @krombel for contributing this! diff --git a/synapse/rest/client/v2_alpha/filter.py b/synapse/rest/client/v2_alpha/filter.py index c6ddf24c8d..17a8bc7366 100644 --- a/synapse/rest/client/v2_alpha/filter.py +++ b/synapse/rest/client/v2_alpha/filter.py @@ -17,7 +17,7 @@ import logging from twisted.internet import defer -from synapse.api.errors import AuthError, Codes, StoreError, SynapseError +from synapse.api.errors import AuthError, NotFoundError, StoreError, SynapseError from synapse.http.servlet import RestServlet, parse_json_object_from_request from synapse.types import UserID @@ -52,13 +52,15 @@ class GetFilterRestServlet(RestServlet): raise SynapseError(400, "Invalid filter_id") try: - filter = yield self.filtering.get_user_filter( + filter_collection = yield self.filtering.get_user_filter( user_localpart=target_user.localpart, filter_id=filter_id ) + except StoreError as e: + if e.code != 404: + raise + raise NotFoundError("No such filter") - return 200, filter.get_filter_json() - except (KeyError, StoreError): - raise SynapseError(400, "No such filter", errcode=Codes.NOT_FOUND) + return 200, filter_collection.get_filter_json() class CreateFilterRestServlet(RestServlet): diff --git a/synapse/rest/client/v2_alpha/sync.py b/synapse/rest/client/v2_alpha/sync.py index c98c5a3802..a883c8adda 100644 --- a/synapse/rest/client/v2_alpha/sync.py +++ b/synapse/rest/client/v2_alpha/sync.py @@ -21,7 +21,7 @@ from canonicaljson import json from twisted.internet import defer from synapse.api.constants import PresenceState -from synapse.api.errors import SynapseError +from synapse.api.errors import Codes, StoreError, SynapseError from synapse.api.filtering import DEFAULT_FILTER_COLLECTION, FilterCollection from synapse.events.utils import ( format_event_for_client_v2_without_room_id, @@ -119,25 +119,32 @@ class SyncRestServlet(RestServlet): request_key = (user, timeout, since, filter_id, full_state, device_id) - if filter_id: - if filter_id.startswith("{"): - try: - filter_object = json.loads(filter_id) - set_timeline_upper_limit( - filter_object, self.hs.config.filter_timeline_limit - ) - except Exception: - raise SynapseError(400, "Invalid filter JSON") - self.filtering.check_valid_filter(filter_object) - filter = FilterCollection(filter_object) - else: - filter = yield self.filtering.get_user_filter(user.localpart, filter_id) + if filter_id is None: + filter_collection = DEFAULT_FILTER_COLLECTION + elif filter_id.startswith("{"): + try: + filter_object = json.loads(filter_id) + set_timeline_upper_limit( + filter_object, self.hs.config.filter_timeline_limit + ) + except Exception: + raise SynapseError(400, "Invalid filter JSON") + self.filtering.check_valid_filter(filter_object) + filter_collection = FilterCollection(filter_object) else: - filter = DEFAULT_FILTER_COLLECTION + try: + filter_collection = yield self.filtering.get_user_filter( + user.localpart, filter_id + ) + except StoreError as err: + if err.code != 404: + raise + # fix up the description and errcode to be more useful + raise SynapseError(400, "No such filter", errcode=Codes.INVALID_PARAM) sync_config = SyncConfig( user=user, - filter_collection=filter, + filter_collection=filter_collection, is_guest=requester.is_guest, request_key=request_key, device_id=device_id, @@ -171,7 +178,7 @@ class SyncRestServlet(RestServlet): time_now = self.clock.time_msec() response_content = yield self.encode_response( - time_now, sync_result, requester.access_token_id, filter + time_now, sync_result, requester.access_token_id, filter_collection ) return 200, response_content diff --git a/tests/rest/client/v2_alpha/test_filter.py b/tests/rest/client/v2_alpha/test_filter.py index f42a8efbf4..e0e9e94fbf 100644 --- a/tests/rest/client/v2_alpha/test_filter.py +++ b/tests/rest/client/v2_alpha/test_filter.py @@ -92,7 +92,7 @@ class FilterTestCase(unittest.HomeserverTestCase): ) self.render(request) - self.assertEqual(channel.result["code"], b"400") + self.assertEqual(channel.result["code"], b"404") self.assertEquals(channel.json_body["errcode"], Codes.NOT_FOUND) # Currently invalid params do not have an appropriate errcode From 9a84d74417a1c9fbcd6c57e7ef23e5590e04ef49 Mon Sep 17 00:00:00 2001 From: Michael Telatynski <7t3chguy@gmail.com> Date: Thu, 10 Oct 2019 13:03:44 +0100 Subject: [PATCH 84/85] before fulfilling a group invite,check if user is already joined/invited (#3436) Fixes vector-im/riot-web#5645 --- changelog.d/3436.bugfix | 1 + synapse/groups/groups_server.py | 20 ++++++++++++++++---- 2 files changed, 17 insertions(+), 4 deletions(-) create mode 100644 changelog.d/3436.bugfix diff --git a/changelog.d/3436.bugfix b/changelog.d/3436.bugfix new file mode 100644 index 0000000000..15714a11e0 --- /dev/null +++ b/changelog.d/3436.bugfix @@ -0,0 +1 @@ +Fix a problem where users could be invited twice to the same group. diff --git a/synapse/groups/groups_server.py b/synapse/groups/groups_server.py index d50e691436..8f10b6adbb 100644 --- a/synapse/groups/groups_server.py +++ b/synapse/groups/groups_server.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- # Copyright 2017 Vector Creations Ltd # Copyright 2018 New Vector Ltd +# Copyright 2019 Michael Telatynski <7t3chguy@gmail.com> # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -20,16 +21,16 @@ from six import string_types from twisted.internet import defer -from synapse.api.errors import SynapseError +from synapse.api.errors import Codes, SynapseError from synapse.types import GroupID, RoomID, UserID, get_domain_from_id from synapse.util.async_helpers import concurrently_execute logger = logging.getLogger(__name__) -# TODO: Allow users to "knock" or simpkly join depending on rules +# TODO: Allow users to "knock" or simply join depending on rules # TODO: Federation admin APIs -# TODO: is_priveged flag to users and is_public to users and rooms +# TODO: is_privileged flag to users and is_public to users and rooms # TODO: Audit log for admins (profile updates, membership changes, users who tried # to join but were rejected, etc) # TODO: Flairs @@ -590,7 +591,18 @@ class GroupsServerHandler(object): ) # TODO: Check if user knocked - # TODO: Check if user is already invited + + invited_users = yield self.store.get_invited_users_in_group(group_id) + if user_id in invited_users: + raise SynapseError( + 400, "User already invited to group", errcode=Codes.BAD_STATE + ) + + user_results = yield self.store.get_users_in_group( + group_id, include_private=True + ) + if user_id in [user_result["user_id"] for user_result in user_results]: + raise SynapseError(400, "User already in group") content = { "profile": {"name": group["name"], "avatar_url": group["avatar_url"]}, From b5b03b7079a9baa34a25915d6a569e383e8307c3 Mon Sep 17 00:00:00 2001 From: werner291 Date: Thu, 10 Oct 2019 14:05:48 +0200 Subject: [PATCH 85/85] Add domain validation when creating room with list of invitees (#6121) --- changelog.d/4088.bugfix | 1 + synapse/handlers/room.py | 4 +++- tests/rest/client/v1/test_rooms.py | 9 +++++++++ 3 files changed, 13 insertions(+), 1 deletion(-) create mode 100644 changelog.d/4088.bugfix diff --git a/changelog.d/4088.bugfix b/changelog.d/4088.bugfix new file mode 100644 index 0000000000..61722b6224 --- /dev/null +++ b/changelog.d/4088.bugfix @@ -0,0 +1 @@ +Added domain validation when including a list of invitees upon room creation. \ No newline at end of file diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py index 970be3c846..2816bd8f87 100644 --- a/synapse/handlers/room.py +++ b/synapse/handlers/room.py @@ -28,6 +28,7 @@ from twisted.internet import defer from synapse.api.constants import EventTypes, JoinRules, RoomCreationPreset from synapse.api.errors import AuthError, Codes, NotFoundError, StoreError, SynapseError from synapse.api.room_versions import KNOWN_ROOM_VERSIONS +from synapse.http.endpoint import parse_and_validate_server_name from synapse.storage.state import StateFilter from synapse.types import RoomAlias, RoomID, RoomStreamToken, StreamToken, UserID from synapse.util import stringutils @@ -554,7 +555,8 @@ class RoomCreationHandler(BaseHandler): invite_list = config.get("invite", []) for i in invite_list: try: - UserID.from_string(i) + uid = UserID.from_string(i) + parse_and_validate_server_name(uid.domain) except Exception: raise SynapseError(400, "Invalid user_id: %s" % (i,)) diff --git a/tests/rest/client/v1/test_rooms.py b/tests/rest/client/v1/test_rooms.py index fe741637f5..2f2ca74611 100644 --- a/tests/rest/client/v1/test_rooms.py +++ b/tests/rest/client/v1/test_rooms.py @@ -484,6 +484,15 @@ class RoomsCreateTestCase(RoomBase): self.render(request) self.assertEquals(400, channel.code) + def test_post_room_invitees_invalid_mxid(self): + # POST with invalid invitee, see https://github.com/matrix-org/synapse/issues/4088 + # Note the trailing space in the MXID here! + request, channel = self.make_request( + "POST", "/createRoom", b'{"invite":["@alice:example.com "]}' + ) + self.render(request) + self.assertEquals(400, channel.code) + class RoomTopicTestCase(RoomBase): """ Tests /rooms/$room_id/topic REST events. """