From 0eb029472e5410b780156f12db13434b003f42ae Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 20 Jun 2024 14:34:10 -0500 Subject: [PATCH 01/84] Remove unused `IncludeOldRooms` class --- synapse/types/rest/client/__init__.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/synapse/types/rest/client/__init__.py b/synapse/types/rest/client/__init__.py index 25fbd772f6..5d453769b5 100644 --- a/synapse/types/rest/client/__init__.py +++ b/synapse/types/rest/client/__init__.py @@ -154,10 +154,6 @@ class SlidingSyncBody(RequestBodyModel): (Max 1000 messages) """ - class IncludeOldRooms(RequestBodyModel): - timeline_limit: StrictInt - required_state: List[Tuple[StrictStr, StrictStr]] - required_state: List[Tuple[StrictStr, StrictStr]] # mypy workaround via https://github.com/pydantic/pydantic/issues/156#issuecomment-1130883884 if TYPE_CHECKING: From 13ed84c5738c3a4b25866df64e48b9266b6507fb Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 21 Jun 2024 13:41:44 +0100 Subject: [PATCH 02/84] Bump authlib from 1.3.0 to 1.3.1 (#17343) --- poetry.lock | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/poetry.lock b/poetry.lock index 58981ff6e1..dc26846f3d 100644 --- a/poetry.lock +++ b/poetry.lock @@ -35,13 +35,13 @@ tests-no-zope = ["attrs[tests-mypy]", "cloudpickle", "hypothesis", "pympler", "p [[package]] name = "authlib" -version = "1.3.0" +version = "1.3.1" description = "The ultimate Python library in building OAuth and OpenID Connect servers and clients." optional = true python-versions = ">=3.8" files = [ - {file = "Authlib-1.3.0-py2.py3-none-any.whl", hash = "sha256:9637e4de1fb498310a56900b3e2043a206b03cb11c05422014b0302cbc814be3"}, - {file = "Authlib-1.3.0.tar.gz", hash = "sha256:959ea62a5b7b5123c5059758296122b57cd2585ae2ed1c0622c21b371ffdae06"}, + {file = "Authlib-1.3.1-py2.py3-none-any.whl", hash = "sha256:d35800b973099bbadc49b42b256ecb80041ad56b7fe1216a362c7943c088f377"}, + {file = "authlib-1.3.1.tar.gz", hash = "sha256:7ae843f03c06c5c0debd63c9db91f9fda64fa62a42a77419fa15fbb7e7a58917"}, ] [package.dependencies] From f8d57ce656a7f6f3a6629cf17339ebcfbe3f2dba Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 21 Jun 2024 13:41:53 +0100 Subject: [PATCH 03/84] Bump tornado from 6.4 to 6.4.1 (#17344) --- poetry.lock | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/poetry.lock b/poetry.lock index dc26846f3d..d3a37944b5 100644 --- a/poetry.lock +++ b/poetry.lock @@ -2598,22 +2598,22 @@ files = [ [[package]] name = "tornado" -version = "6.4" +version = "6.4.1" description = "Tornado is a Python web framework and asynchronous networking library, originally developed at FriendFeed." -optional = true -python-versions = ">= 3.8" +optional = false +python-versions = ">=3.8" files = [ - {file = "tornado-6.4-cp38-abi3-macosx_10_9_universal2.whl", hash = "sha256:02ccefc7d8211e5a7f9e8bc3f9e5b0ad6262ba2fbb683a6443ecc804e5224ce0"}, - {file = "tornado-6.4-cp38-abi3-macosx_10_9_x86_64.whl", hash = "sha256:27787de946a9cffd63ce5814c33f734c627a87072ec7eed71f7fc4417bb16263"}, - {file = "tornado-6.4-cp38-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f7894c581ecdcf91666a0912f18ce5e757213999e183ebfc2c3fdbf4d5bd764e"}, - {file = "tornado-6.4-cp38-abi3-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e43bc2e5370a6a8e413e1e1cd0c91bedc5bd62a74a532371042a18ef19e10579"}, - {file = "tornado-6.4-cp38-abi3-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f0251554cdd50b4b44362f73ad5ba7126fc5b2c2895cc62b14a1c2d7ea32f212"}, - {file = "tornado-6.4-cp38-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:fd03192e287fbd0899dd8f81c6fb9cbbc69194d2074b38f384cb6fa72b80e9c2"}, - {file = "tornado-6.4-cp38-abi3-musllinux_1_1_i686.whl", hash = "sha256:88b84956273fbd73420e6d4b8d5ccbe913c65d31351b4c004ae362eba06e1f78"}, - {file = "tornado-6.4-cp38-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:71ddfc23a0e03ef2df1c1397d859868d158c8276a0603b96cf86892bff58149f"}, - {file = "tornado-6.4-cp38-abi3-win32.whl", hash = "sha256:6f8a6c77900f5ae93d8b4ae1196472d0ccc2775cc1dfdc9e7727889145c45052"}, - {file = "tornado-6.4-cp38-abi3-win_amd64.whl", hash = "sha256:10aeaa8006333433da48dec9fe417877f8bcc21f48dda8d661ae79da357b2a63"}, - {file = "tornado-6.4.tar.gz", hash = "sha256:72291fa6e6bc84e626589f1c29d90a5a6d593ef5ae68052ee2ef000dfd273dee"}, + {file = "tornado-6.4.1-cp38-abi3-macosx_10_9_universal2.whl", hash = "sha256:163b0aafc8e23d8cdc3c9dfb24c5368af84a81e3364745ccb4427669bf84aec8"}, + {file = "tornado-6.4.1-cp38-abi3-macosx_10_9_x86_64.whl", hash = "sha256:6d5ce3437e18a2b66fbadb183c1d3364fb03f2be71299e7d10dbeeb69f4b2a14"}, + {file = "tornado-6.4.1-cp38-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e2e20b9113cd7293f164dc46fffb13535266e713cdb87bd2d15ddb336e96cfc4"}, + {file = "tornado-6.4.1-cp38-abi3-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8ae50a504a740365267b2a8d1a90c9fbc86b780a39170feca9bcc1787ff80842"}, + {file = "tornado-6.4.1-cp38-abi3-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:613bf4ddf5c7a95509218b149b555621497a6cc0d46ac341b30bd9ec19eac7f3"}, + {file = "tornado-6.4.1-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:25486eb223babe3eed4b8aecbac33b37e3dd6d776bc730ca14e1bf93888b979f"}, + {file = "tornado-6.4.1-cp38-abi3-musllinux_1_2_i686.whl", hash = "sha256:454db8a7ecfcf2ff6042dde58404164d969b6f5d58b926da15e6b23817950fc4"}, + {file = "tornado-6.4.1-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:a02a08cc7a9314b006f653ce40483b9b3c12cda222d6a46d4ac63bb6c9057698"}, + {file = "tornado-6.4.1-cp38-abi3-win32.whl", hash = "sha256:d9a566c40b89757c9aa8e6f032bcdb8ca8795d7c1a9762910c722b1635c9de4d"}, + {file = "tornado-6.4.1-cp38-abi3-win_amd64.whl", hash = "sha256:b24b8982ed444378d7f21d563f4180a2de31ced9d8d84443907a0a64da2072e7"}, + {file = "tornado-6.4.1.tar.gz", hash = "sha256:92d3ab53183d8c50f8204a51e6f91d18a15d5ef261e84d452800d4ff6fc504e9"}, ] [[package]] From 7c5fb13f7b0776e20eccede75827e515fdaa1146 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 21 Jun 2024 13:42:01 +0100 Subject: [PATCH 04/84] Bump requests from 2.31.0 to 2.32.2 (#17345) --- poetry.lock | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/poetry.lock b/poetry.lock index d3a37944b5..0e5195bf34 100644 --- a/poetry.lock +++ b/poetry.lock @@ -2157,13 +2157,13 @@ rpds-py = ">=0.7.0" [[package]] name = "requests" -version = "2.31.0" +version = "2.32.2" description = "Python HTTP for Humans." optional = false -python-versions = ">=3.7" +python-versions = ">=3.8" files = [ - {file = "requests-2.31.0-py3-none-any.whl", hash = "sha256:58cd2187c01e70e6e26505bca751777aa9f2ee0b7f4300988b709f44e013003f"}, - {file = "requests-2.31.0.tar.gz", hash = "sha256:942c5a758f98d790eaed1a29cb6eefc7ffb0d1cf7af05c3d2791656dbd6ad1e1"}, + {file = "requests-2.32.2-py3-none-any.whl", hash = "sha256:fc06670dd0ed212426dfeb94fc1b983d917c4f9847c863f313c9dfaaffb7c23c"}, + {file = "requests-2.32.2.tar.gz", hash = "sha256:dd951ff5ecf3e3b3aa26b40703ba77495dab41da839ae72ef3c8e5d8e2433289"}, ] [package.dependencies] From adeedb7b7c80842665a0b7d46c9188a2c49076fb Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 21 Jun 2024 13:42:09 +0100 Subject: [PATCH 05/84] Bump urllib3 from 2.0.7 to 2.2.2 (#17346) --- poetry.lock | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/poetry.lock b/poetry.lock index 0e5195bf34..27e9fe5f4e 100644 --- a/poetry.lock +++ b/poetry.lock @@ -2939,18 +2939,18 @@ files = [ [[package]] name = "urllib3" -version = "2.0.7" +version = "2.2.2" description = "HTTP library with thread-safe connection pooling, file post, and more." optional = false -python-versions = ">=3.7" +python-versions = ">=3.8" files = [ - {file = "urllib3-2.0.7-py3-none-any.whl", hash = "sha256:fdb6d215c776278489906c2f8916e6e7d4f5a9b602ccbcfdf7f016fc8da0596e"}, - {file = "urllib3-2.0.7.tar.gz", hash = "sha256:c97dfde1f7bd43a71c8d2a58e369e9b2bf692d1334ea9f9cae55add7d0dd0f84"}, + {file = "urllib3-2.2.2-py3-none-any.whl", hash = "sha256:a448b2f64d686155468037e1ace9f2d2199776e17f0a46610480d311f73e3472"}, + {file = "urllib3-2.2.2.tar.gz", hash = "sha256:dd505485549a7a552833da5e6063639d0d177c04f23bc3864e41e5dc5f612168"}, ] [package.extras] brotli = ["brotli (>=1.0.9)", "brotlicffi (>=0.8.0)"] -secure = ["certifi", "cryptography (>=1.9)", "idna (>=2.0.0)", "pyopenssl (>=17.1.0)", "urllib3-secure-extra"] +h2 = ["h2 (>=4,<5)"] socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"] zstd = ["zstandard (>=0.18.0)"] From 452a59f887dd306df01359676ca8efe7d107a106 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 24 Jun 2024 13:31:54 +0100 Subject: [PATCH 06/84] Bump sentry-sdk from 2.3.1 to 2.6.0 (#17351) --- poetry.lock | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/poetry.lock b/poetry.lock index 27e9fe5f4e..ebb3efcf88 100644 --- a/poetry.lock +++ b/poetry.lock @@ -2387,13 +2387,13 @@ doc = ["Sphinx", "sphinx-rtd-theme"] [[package]] name = "sentry-sdk" -version = "2.3.1" +version = "2.6.0" description = "Python client for Sentry (https://sentry.io)" optional = true python-versions = ">=3.6" files = [ - {file = "sentry_sdk-2.3.1-py2.py3-none-any.whl", hash = "sha256:c5aeb095ba226391d337dd42a6f9470d86c9fc236ecc71cfc7cd1942b45010c6"}, - {file = "sentry_sdk-2.3.1.tar.gz", hash = "sha256:139a71a19f5e9eb5d3623942491ce03cf8ebc14ea2e39ba3e6fe79560d8a5b1f"}, + {file = "sentry_sdk-2.6.0-py2.py3-none-any.whl", hash = "sha256:422b91cb49378b97e7e8d0e8d5a1069df23689d45262b86f54988a7db264e874"}, + {file = "sentry_sdk-2.6.0.tar.gz", hash = "sha256:65cc07e9c6995c5e316109f138570b32da3bd7ff8d0d0ee4aaf2628c3dd8127d"}, ] [package.dependencies] @@ -2600,7 +2600,7 @@ files = [ name = "tornado" version = "6.4.1" description = "Tornado is a Python web framework and asynchronous networking library, originally developed at FriendFeed." -optional = false +optional = true python-versions = ">=3.8" files = [ {file = "tornado-6.4.1-cp38-abi3-macosx_10_9_universal2.whl", hash = "sha256:163b0aafc8e23d8cdc3c9dfb24c5368af84a81e3364745ccb4427669bf84aec8"}, From 7a6186b8880a37a6f891659ea70110c2b8ad0139 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 24 Jun 2024 13:32:05 +0100 Subject: [PATCH 07/84] Bump packaging from 24.0 to 24.1 (#17352) --- poetry.lock | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/poetry.lock b/poetry.lock index ebb3efcf88..e1eba225cd 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1488,13 +1488,13 @@ tests = ["Sphinx", "doubles", "flake8", "flake8-quotes", "gevent", "mock", "pyte [[package]] name = "packaging" -version = "24.0" +version = "24.1" description = "Core utilities for Python packages" optional = false -python-versions = ">=3.7" +python-versions = ">=3.8" files = [ - {file = "packaging-24.0-py3-none-any.whl", hash = "sha256:2ddfb553fdf02fb784c234c7ba6ccc288296ceabec964ad2eae3777778130bc5"}, - {file = "packaging-24.0.tar.gz", hash = "sha256:eb82c5e3e56209074766e6885bb04b8c38a0c015d0a30036ebe7ece34c9989e9"}, + {file = "packaging-24.1-py3-none-any.whl", hash = "sha256:5b8f2217dbdbd2f7f384c41c628544e6d52f2d0f53c6d0c3ea61aa5d1d7ff124"}, + {file = "packaging-24.1.tar.gz", hash = "sha256:026ed72c8ed3fcce5bf8950572258698927fd1dbda10a5e981cdf0ac37f4f002"}, ] [[package]] From 118b734081af78e89fdb8d2212ff651a9666b343 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 24 Jun 2024 13:32:14 +0100 Subject: [PATCH 08/84] Bump netaddr from 1.2.1 to 1.3.0 (#17353) --- poetry.lock | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/poetry.lock b/poetry.lock index e1eba225cd..7690ea2b26 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1461,13 +1461,13 @@ test = ["lxml", "pytest (>=4.6)", "pytest-cov"] [[package]] name = "netaddr" -version = "1.2.1" +version = "1.3.0" description = "A network address manipulation library for Python" optional = false python-versions = ">=3.7" files = [ - {file = "netaddr-1.2.1-py3-none-any.whl", hash = "sha256:bd9e9534b0d46af328cf64f0e5a23a5a43fca292df221c85580b27394793496e"}, - {file = "netaddr-1.2.1.tar.gz", hash = "sha256:6eb8fedf0412c6d294d06885c110de945cf4d22d2b510d0404f4e06950857987"}, + {file = "netaddr-1.3.0-py3-none-any.whl", hash = "sha256:c2c6a8ebe5554ce33b7d5b3a306b71bbb373e000bbbf2350dd5213cc56e3dbbe"}, + {file = "netaddr-1.3.0.tar.gz", hash = "sha256:5c3c3d9895b551b763779ba7db7a03487dc1f8e3b385af819af341ae9ef6e48a"}, ] [package.extras] From 7c2d8f1f0171b89a0e280bf18d522acd8bdf610e Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 24 Jun 2024 13:32:44 +0100 Subject: [PATCH 09/84] Bump typing-extensions from 4.11.0 to 4.12.2 (#17354) --- poetry.lock | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/poetry.lock b/poetry.lock index 7690ea2b26..1bae0ea388 100644 --- a/poetry.lock +++ b/poetry.lock @@ -2917,13 +2917,13 @@ files = [ [[package]] name = "typing-extensions" -version = "4.11.0" +version = "4.12.2" description = "Backported and Experimental Type Hints for Python 3.8+" optional = false python-versions = ">=3.8" files = [ - {file = "typing_extensions-4.11.0-py3-none-any.whl", hash = "sha256:c1f94d72897edaf4ce775bb7558d5b79d8126906a14ea5ed1635921406c0387a"}, - {file = "typing_extensions-4.11.0.tar.gz", hash = "sha256:83f085bd5ca59c80295fc2a82ab5dac679cbe02b9f33f7d83af68e241bea51b0"}, + {file = "typing_extensions-4.12.2-py3-none-any.whl", hash = "sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d"}, + {file = "typing_extensions-4.12.2.tar.gz", hash = "sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8"}, ] [[package]] From 1e74b50dc63d79887168b19a9f3ad240bec96590 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 24 Jun 2024 13:34:56 +0100 Subject: [PATCH 10/84] Bump lazy_static from 1.4.0 to 1.5.0 (#17355) --- Cargo.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 7472e16291..1955c1a4e7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -212,9 +212,9 @@ dependencies = [ [[package]] name = "lazy_static" -version = "1.4.0" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" [[package]] name = "libc" From 700d2cc4a0d457642edb43bc3714d212f15d797f Mon Sep 17 00:00:00 2001 From: Denis Kasak Date: Mon, 24 Jun 2024 15:12:14 +0200 Subject: [PATCH 11/84] Tidy up integer parsing (#17339) The parse_integer function was previously made to reject negative values by default in https://github.com/element-hq/synapse/pull/16920, but the documentation stated otherwise. This fixes the documentation and also: - Removes explicit negative=False parameters from call sites. - Brings the negative default of parse_integer_from_args in alignment with parse_integer. --- changelog.d/17339.misc | 1 + synapse/http/servlet.py | 12 +++++++----- synapse/rest/admin/federation.py | 8 ++++---- synapse/rest/admin/media.py | 12 ++++++------ synapse/rest/admin/statistics.py | 8 ++++---- synapse/rest/admin/users.py | 4 ++-- synapse/rest/client/room.py | 11 +---------- synapse/streams/config.py | 3 --- 8 files changed, 25 insertions(+), 34 deletions(-) create mode 100644 changelog.d/17339.misc diff --git a/changelog.d/17339.misc b/changelog.d/17339.misc new file mode 100644 index 0000000000..1d7cb96c8b --- /dev/null +++ b/changelog.d/17339.misc @@ -0,0 +1 @@ +Tidy up `parse_integer` docs and call sites to reflect the fact that they require non-negative integers by default, and bring `parse_integer_from_args` default in alignment. Contributed by Denis Kasak (@dkasak). diff --git a/synapse/http/servlet.py b/synapse/http/servlet.py index ab12951da8..08b8ff7afd 100644 --- a/synapse/http/servlet.py +++ b/synapse/http/servlet.py @@ -119,14 +119,15 @@ def parse_integer( default: value to use if the parameter is absent, defaults to None. required: whether to raise a 400 SynapseError if the parameter is absent, defaults to False. - negative: whether to allow negative integers, defaults to True. + negative: whether to allow negative integers, defaults to False (disallowing + negatives). Returns: An int value or the default. Raises: SynapseError: if the parameter is absent and required, if the parameter is present and not an integer, or if the - parameter is illegitimate negative. + parameter is illegitimately negative. """ args: Mapping[bytes, Sequence[bytes]] = request.args # type: ignore return parse_integer_from_args(args, name, default, required, negative) @@ -164,7 +165,7 @@ def parse_integer_from_args( name: str, default: Optional[int] = None, required: bool = False, - negative: bool = True, + negative: bool = False, ) -> Optional[int]: """Parse an integer parameter from the request string @@ -174,7 +175,8 @@ def parse_integer_from_args( default: value to use if the parameter is absent, defaults to None. required: whether to raise a 400 SynapseError if the parameter is absent, defaults to False. - negative: whether to allow negative integers, defaults to True. + negative: whether to allow negative integers, defaults to False (disallowing + negatives). Returns: An int value or the default. @@ -182,7 +184,7 @@ def parse_integer_from_args( Raises: SynapseError: if the parameter is absent and required, if the parameter is present and not an integer, or if the - parameter is illegitimate negative. + parameter is illegitimately negative. """ name_bytes = name.encode("ascii") diff --git a/synapse/rest/admin/federation.py b/synapse/rest/admin/federation.py index 14ab4644cb..d85a04b825 100644 --- a/synapse/rest/admin/federation.py +++ b/synapse/rest/admin/federation.py @@ -61,8 +61,8 @@ class ListDestinationsRestServlet(RestServlet): async def on_GET(self, request: SynapseRequest) -> Tuple[int, JsonDict]: await assert_requester_is_admin(self._auth, request) - start = parse_integer(request, "from", default=0, negative=False) - limit = parse_integer(request, "limit", default=100, negative=False) + start = parse_integer(request, "from", default=0) + limit = parse_integer(request, "limit", default=100) destination = parse_string(request, "destination") @@ -181,8 +181,8 @@ class DestinationMembershipRestServlet(RestServlet): if not await self._store.is_destination_known(destination): raise NotFoundError("Unknown destination") - start = parse_integer(request, "from", default=0, negative=False) - limit = parse_integer(request, "limit", default=100, negative=False) + start = parse_integer(request, "from", default=0) + limit = parse_integer(request, "limit", default=100) direction = parse_enum(request, "dir", Direction, default=Direction.FORWARDS) diff --git a/synapse/rest/admin/media.py b/synapse/rest/admin/media.py index a05b7252ec..ee6a681285 100644 --- a/synapse/rest/admin/media.py +++ b/synapse/rest/admin/media.py @@ -311,8 +311,8 @@ class DeleteMediaByDateSize(RestServlet): ) -> Tuple[int, JsonDict]: await assert_requester_is_admin(self.auth, request) - before_ts = parse_integer(request, "before_ts", required=True, negative=False) - size_gt = parse_integer(request, "size_gt", default=0, negative=False) + before_ts = parse_integer(request, "before_ts", required=True) + size_gt = parse_integer(request, "size_gt", default=0) keep_profiles = parse_boolean(request, "keep_profiles", default=True) if before_ts < 30000000000: # Dec 1970 in milliseconds, Aug 2920 in seconds @@ -377,8 +377,8 @@ class UserMediaRestServlet(RestServlet): if user is None: raise NotFoundError("Unknown user") - start = parse_integer(request, "from", default=0, negative=False) - limit = parse_integer(request, "limit", default=100, negative=False) + start = parse_integer(request, "from", default=0) + limit = parse_integer(request, "limit", default=100) # If neither `order_by` nor `dir` is set, set the default order # to newest media is on top for backward compatibility. @@ -421,8 +421,8 @@ class UserMediaRestServlet(RestServlet): if user is None: raise NotFoundError("Unknown user") - start = parse_integer(request, "from", default=0, negative=False) - limit = parse_integer(request, "limit", default=100, negative=False) + start = parse_integer(request, "from", default=0) + limit = parse_integer(request, "limit", default=100) # If neither `order_by` nor `dir` is set, set the default order # to newest media is on top for backward compatibility. diff --git a/synapse/rest/admin/statistics.py b/synapse/rest/admin/statistics.py index dc27a41dd9..0adc5b7005 100644 --- a/synapse/rest/admin/statistics.py +++ b/synapse/rest/admin/statistics.py @@ -63,10 +63,10 @@ class UserMediaStatisticsRestServlet(RestServlet): ), ) - start = parse_integer(request, "from", default=0, negative=False) - limit = parse_integer(request, "limit", default=100, negative=False) - from_ts = parse_integer(request, "from_ts", default=0, negative=False) - until_ts = parse_integer(request, "until_ts", negative=False) + start = parse_integer(request, "from", default=0) + limit = parse_integer(request, "limit", default=100) + from_ts = parse_integer(request, "from_ts", default=0) + until_ts = parse_integer(request, "until_ts") if until_ts is not None: if until_ts <= from_ts: diff --git a/synapse/rest/admin/users.py b/synapse/rest/admin/users.py index 5bf12c4979..f7cb9e02cc 100644 --- a/synapse/rest/admin/users.py +++ b/synapse/rest/admin/users.py @@ -90,8 +90,8 @@ class UsersRestServletV2(RestServlet): async def on_GET(self, request: SynapseRequest) -> Tuple[int, JsonDict]: await assert_requester_is_admin(self.auth, request) - start = parse_integer(request, "from", default=0, negative=False) - limit = parse_integer(request, "limit", default=100, negative=False) + start = parse_integer(request, "from", default=0) + limit = parse_integer(request, "limit", default=100) user_id = parse_string(request, "user_id") name = parse_string(request, "name", encoding="utf-8") diff --git a/synapse/rest/client/room.py b/synapse/rest/client/room.py index c98241f6ce..bd65cf4b83 100644 --- a/synapse/rest/client/room.py +++ b/synapse/rest/client/room.py @@ -510,7 +510,7 @@ class PublicRoomListRestServlet(RestServlet): if server: raise e - limit: Optional[int] = parse_integer(request, "limit", 0, negative=False) + limit: Optional[int] = parse_integer(request, "limit", 0) since_token = parse_string(request, "since") if limit == 0: @@ -1430,16 +1430,7 @@ class RoomHierarchyRestServlet(RestServlet): requester = await self._auth.get_user_by_req(request, allow_guest=True) max_depth = parse_integer(request, "max_depth") - if max_depth is not None and max_depth < 0: - raise SynapseError( - 400, "'max_depth' must be a non-negative integer", Codes.BAD_JSON - ) - limit = parse_integer(request, "limit") - if limit is not None and limit <= 0: - raise SynapseError( - 400, "'limit' must be a positive integer", Codes.BAD_JSON - ) return 200, await self._room_summary_handler.get_room_hierarchy( requester, diff --git a/synapse/streams/config.py b/synapse/streams/config.py index eeafe889de..9fee5bfb92 100644 --- a/synapse/streams/config.py +++ b/synapse/streams/config.py @@ -75,9 +75,6 @@ class PaginationConfig: raise SynapseError(400, "'to' parameter is invalid") limit = parse_integer(request, "limit", default=default_limit) - if limit < 0: - raise SynapseError(400, "Limit must be 0 or above") - limit = min(limit, MAX_LIMIT) try: From cf711ac03cd88b70568b3ac9df4aed4de5b33523 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 24 Jun 2024 14:15:13 +0100 Subject: [PATCH 12/84] Reduce device lists replication traffic. (#17333) Reduce the replication traffic of device lists, by not sending every destination that needs to be sent the device list update over replication. Instead a "hosts to send to have been calculated" notification over replication, and then federation senders read the destinations from the DB. For non federation senders this should heavily reduce the impact of a user in many large rooms changing a device. --- changelog.d/17333.misc | 1 + synapse/replication/tcp/client.py | 19 ++-- synapse/replication/tcp/streams/_base.py | 12 ++- synapse/storage/databases/main/devices.py | 93 ++++++++++++------- .../storage/databases/main/end_to_end_keys.py | 4 +- tests/storage/test_devices.py | 8 ++ 6 files changed, 89 insertions(+), 48 deletions(-) create mode 100644 changelog.d/17333.misc diff --git a/changelog.d/17333.misc b/changelog.d/17333.misc new file mode 100644 index 0000000000..d3ef0b3777 --- /dev/null +++ b/changelog.d/17333.misc @@ -0,0 +1 @@ +Handle device lists notifications for large accounts more efficiently in worker mode. diff --git a/synapse/replication/tcp/client.py b/synapse/replication/tcp/client.py index 2d6d49eed7..3dddbb70b4 100644 --- a/synapse/replication/tcp/client.py +++ b/synapse/replication/tcp/client.py @@ -114,13 +114,19 @@ class ReplicationDataHandler: """ all_room_ids: Set[str] = set() if stream_name == DeviceListsStream.NAME: - if any(row.entity.startswith("@") and not row.is_signature for row in rows): + if any(not row.is_signature and not row.hosts_calculated for row in rows): prev_token = self.store.get_device_stream_token() all_room_ids = await self.store.get_all_device_list_changes( prev_token, token ) self.store.device_lists_in_rooms_have_changed(all_room_ids, token) + # If we're sending federation we need to update the device lists + # outbound pokes stream change cache with updated hosts. + if self.send_handler and any(row.hosts_calculated for row in rows): + hosts = await self.store.get_destinations_for_device(token) + self.store.device_lists_outbound_pokes_have_changed(hosts, token) + self.store.process_replication_rows(stream_name, instance_name, token, rows) # NOTE: this must be called after process_replication_rows to ensure any # cache invalidations are first handled before any stream ID advances. @@ -433,12 +439,11 @@ class FederationSenderHandler: # The entities are either user IDs (starting with '@') whose devices # have changed, or remote servers that we need to tell about # changes. - hosts = { - row.entity - for row in rows - if not row.entity.startswith("@") and not row.is_signature - } - await self.federation_sender.send_device_messages(hosts, immediate=False) + if any(row.hosts_calculated for row in rows): + hosts = await self.store.get_destinations_for_device(token) + await self.federation_sender.send_device_messages( + hosts, immediate=False + ) elif stream_name == ToDeviceStream.NAME: # The to_device stream includes stuff to be pushed to both local diff --git a/synapse/replication/tcp/streams/_base.py b/synapse/replication/tcp/streams/_base.py index 661206c841..d021904de7 100644 --- a/synapse/replication/tcp/streams/_base.py +++ b/synapse/replication/tcp/streams/_base.py @@ -549,10 +549,14 @@ class DeviceListsStream(_StreamFromIdGen): @attr.s(slots=True, frozen=True, auto_attribs=True) class DeviceListsStreamRow: - entity: str + user_id: str # Indicates that a user has signed their own device with their user-signing key is_signature: bool + # Indicates if this is a notification that we've calculated the hosts we + # need to send the update to. + hosts_calculated: bool + NAME = "device_lists" ROW_TYPE = DeviceListsStreamRow @@ -594,13 +598,13 @@ class DeviceListsStream(_StreamFromIdGen): upper_limit_token = min(upper_limit_token, signatures_to_token) device_updates = [ - (stream_id, (entity, False)) - for stream_id, (entity,) in device_updates + (stream_id, (entity, False, hosts)) + for stream_id, (entity, hosts) in device_updates if stream_id <= upper_limit_token ] signatures_updates = [ - (stream_id, (entity, True)) + (stream_id, (entity, True, False)) for stream_id, (entity,) in signatures_updates if stream_id <= upper_limit_token ] diff --git a/synapse/storage/databases/main/devices.py b/synapse/storage/databases/main/devices.py index 40187496e2..5eeca6165d 100644 --- a/synapse/storage/databases/main/devices.py +++ b/synapse/storage/databases/main/devices.py @@ -164,22 +164,24 @@ class DeviceWorkerStore(RoomMemberWorkerStore, EndToEndKeyWorkerStore): prefilled_cache=user_signature_stream_prefill, ) - ( - device_list_federation_prefill, - device_list_federation_list_id, - ) = self.db_pool.get_cache_dict( - db_conn, - "device_lists_outbound_pokes", - entity_column="destination", - stream_column="stream_id", - max_value=device_list_max, - limit=10000, - ) - self._device_list_federation_stream_cache = StreamChangeCache( - "DeviceListFederationStreamChangeCache", - device_list_federation_list_id, - prefilled_cache=device_list_federation_prefill, - ) + self._device_list_federation_stream_cache = None + if hs.should_send_federation(): + ( + device_list_federation_prefill, + device_list_federation_list_id, + ) = self.db_pool.get_cache_dict( + db_conn, + "device_lists_outbound_pokes", + entity_column="destination", + stream_column="stream_id", + max_value=device_list_max, + limit=10000, + ) + self._device_list_federation_stream_cache = StreamChangeCache( + "DeviceListFederationStreamChangeCache", + device_list_federation_list_id, + prefilled_cache=device_list_federation_prefill, + ) if hs.config.worker.run_background_tasks: self._clock.looping_call( @@ -207,23 +209,30 @@ class DeviceWorkerStore(RoomMemberWorkerStore, EndToEndKeyWorkerStore): ) -> None: for row in rows: if row.is_signature: - self._user_signature_stream_cache.entity_has_changed(row.entity, token) + self._user_signature_stream_cache.entity_has_changed(row.user_id, token) continue # The entities are either user IDs (starting with '@') whose devices # have changed, or remote servers that we need to tell about # changes. - if row.entity.startswith("@"): - self._device_list_stream_cache.entity_has_changed(row.entity, token) - self.get_cached_devices_for_user.invalidate((row.entity,)) - self._get_cached_user_device.invalidate((row.entity,)) - self.get_device_list_last_stream_id_for_remote.invalidate((row.entity,)) - - else: - self._device_list_federation_stream_cache.entity_has_changed( - row.entity, token + if not row.hosts_calculated: + self._device_list_stream_cache.entity_has_changed(row.user_id, token) + self.get_cached_devices_for_user.invalidate((row.user_id,)) + self._get_cached_user_device.invalidate((row.user_id,)) + self.get_device_list_last_stream_id_for_remote.invalidate( + (row.user_id,) ) + def device_lists_outbound_pokes_have_changed( + self, destinations: StrCollection, token: int + ) -> None: + assert self._device_list_federation_stream_cache is not None + + for destination in destinations: + self._device_list_federation_stream_cache.entity_has_changed( + destination, token + ) + def device_lists_in_rooms_have_changed( self, room_ids: StrCollection, token: int ) -> None: @@ -363,6 +372,11 @@ class DeviceWorkerStore(RoomMemberWorkerStore, EndToEndKeyWorkerStore): EDU contents. """ now_stream_id = self.get_device_stream_token() + if from_stream_id == now_stream_id: + return now_stream_id, [] + + if self._device_list_federation_stream_cache is None: + raise Exception("Func can only be used on federation senders") has_changed = self._device_list_federation_stream_cache.has_entity_changed( destination, int(from_stream_id) @@ -1018,10 +1032,10 @@ class DeviceWorkerStore(RoomMemberWorkerStore, EndToEndKeyWorkerStore): # This query Does The Right Thing where it'll correctly apply the # bounds to the inner queries. sql = """ - SELECT stream_id, entity FROM ( - SELECT stream_id, user_id AS entity FROM device_lists_stream + SELECT stream_id, user_id, hosts FROM ( + SELECT stream_id, user_id, false AS hosts FROM device_lists_stream UNION ALL - SELECT stream_id, destination AS entity FROM device_lists_outbound_pokes + SELECT DISTINCT stream_id, user_id, true AS hosts FROM device_lists_outbound_pokes ) AS e WHERE ? < stream_id AND stream_id <= ? ORDER BY stream_id ASC @@ -1577,6 +1591,14 @@ class DeviceWorkerStore(RoomMemberWorkerStore, EndToEndKeyWorkerStore): get_device_list_changes_in_room_txn, ) + async def get_destinations_for_device(self, stream_id: int) -> StrCollection: + return await self.db_pool.simple_select_onecol( + table="device_lists_outbound_pokes", + keyvalues={"stream_id": stream_id}, + retcol="destination", + desc="get_destinations_for_device", + ) + class DeviceBackgroundUpdateStore(SQLBaseStore): def __init__( @@ -2112,12 +2134,13 @@ class DeviceStore(DeviceWorkerStore, DeviceBackgroundUpdateStore): stream_ids: List[int], context: Optional[Dict[str, str]], ) -> None: - for host in hosts: - txn.call_after( - self._device_list_federation_stream_cache.entity_has_changed, - host, - stream_ids[-1], - ) + if self._device_list_federation_stream_cache: + for host in hosts: + txn.call_after( + self._device_list_federation_stream_cache.entity_has_changed, + host, + stream_ids[-1], + ) now = self._clock.time_msec() stream_id_iterator = iter(stream_ids) diff --git a/synapse/storage/databases/main/end_to_end_keys.py b/synapse/storage/databases/main/end_to_end_keys.py index 38d8785faa..9e6c9561ae 100644 --- a/synapse/storage/databases/main/end_to_end_keys.py +++ b/synapse/storage/databases/main/end_to_end_keys.py @@ -123,9 +123,9 @@ class EndToEndKeyWorkerStore(EndToEndKeyBackgroundStore, CacheInvalidationWorker if stream_name == DeviceListsStream.NAME: for row in rows: assert isinstance(row, DeviceListsStream.DeviceListsStreamRow) - if row.entity.startswith("@"): + if not row.hosts_calculated: self._get_e2e_device_keys_for_federation_query_inner.invalidate( - (row.entity,) + (row.user_id,) ) super().process_replication_rows(stream_name, instance_name, token, rows) diff --git a/tests/storage/test_devices.py b/tests/storage/test_devices.py index 7f975d04ff..ba01b038ab 100644 --- a/tests/storage/test_devices.py +++ b/tests/storage/test_devices.py @@ -36,6 +36,14 @@ class DeviceStoreTestCase(HomeserverTestCase): def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None: self.store = hs.get_datastores().main + def default_config(self) -> JsonDict: + config = super().default_config() + + # We 'enable' federation otherwise `get_device_updates_by_remote` will + # throw an exception. + config["federation_sender_instances"] = ["master"] + return config + def add_device_change(self, user_id: str, device_ids: List[str], host: str) -> None: """Add a device list change for the given device to `device_lists_outbound_pokes` table. From 7a11c0ac4fd3bab42d6edb17593c9d7ed8371001 Mon Sep 17 00:00:00 2001 From: Shay Date: Mon, 24 Jun 2024 06:21:22 -0700 Subject: [PATCH 13/84] Add support for MSC3823 - Account Suspension Part 2 (#17255) --- changelog.d/17255.feature | 1 + synapse/config/experimental.py | 4 ++ synapse/handlers/message.py | 11 ++++ synapse/rest/admin/__init__.py | 3 + synapse/rest/admin/users.py | 39 ++++++++++++ synapse/rest/client/profile.py | 26 ++++++++ synapse/rest/client/room.py | 14 +++++ tests/rest/admin/test_user.py | 84 +++++++++++++++++++++++++ tests/rest/client/test_rooms.py | 105 ++++++++++++++++++++++++++++++++ 9 files changed, 287 insertions(+) create mode 100644 changelog.d/17255.feature diff --git a/changelog.d/17255.feature b/changelog.d/17255.feature new file mode 100644 index 0000000000..4093de1146 --- /dev/null +++ b/changelog.d/17255.feature @@ -0,0 +1 @@ +Add support for [MSC823](https://github.com/matrix-org/matrix-spec-proposals/pull/3823) - Account suspension. \ No newline at end of file diff --git a/synapse/config/experimental.py b/synapse/config/experimental.py index 23e96da6a3..1b72727b75 100644 --- a/synapse/config/experimental.py +++ b/synapse/config/experimental.py @@ -433,6 +433,10 @@ class ExperimentalConfig(Config): ("experimental", "msc4108_delegation_endpoint"), ) + self.msc3823_account_suspension = experimental.get( + "msc3823_account_suspension", False + ) + self.msc3916_authenticated_media_enabled = experimental.get( "msc3916_authenticated_media_enabled", False ) diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 16d01efc67..5aa48230ec 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -642,6 +642,17 @@ class EventCreationHandler: """ await self.auth_blocking.check_auth_blocking(requester=requester) + if event_dict["type"] == EventTypes.Message: + requester_suspended = await self.store.get_user_suspended_status( + requester.user.to_string() + ) + if requester_suspended: + raise SynapseError( + 403, + "Sending messages while account is suspended is not allowed.", + Codes.USER_ACCOUNT_SUSPENDED, + ) + if event_dict["type"] == EventTypes.Create and event_dict["state_key"] == "": room_version_id = event_dict["content"]["room_version"] maybe_room_version_obj = KNOWN_ROOM_VERSIONS.get(room_version_id) diff --git a/synapse/rest/admin/__init__.py b/synapse/rest/admin/__init__.py index 6da1d79168..cdaee17451 100644 --- a/synapse/rest/admin/__init__.py +++ b/synapse/rest/admin/__init__.py @@ -101,6 +101,7 @@ from synapse.rest.admin.users import ( ResetPasswordRestServlet, SearchUsersRestServlet, ShadowBanRestServlet, + SuspendAccountRestServlet, UserAdminServlet, UserByExternalId, UserByThreePid, @@ -327,6 +328,8 @@ def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None: BackgroundUpdateRestServlet(hs).register(http_server) BackgroundUpdateStartJobRestServlet(hs).register(http_server) ExperimentalFeaturesRestServlet(hs).register(http_server) + if hs.config.experimental.msc3823_account_suspension: + SuspendAccountRestServlet(hs).register(http_server) def register_servlets_for_client_rest_resource( diff --git a/synapse/rest/admin/users.py b/synapse/rest/admin/users.py index f7cb9e02cc..ad515bd5a3 100644 --- a/synapse/rest/admin/users.py +++ b/synapse/rest/admin/users.py @@ -27,11 +27,13 @@ from typing import TYPE_CHECKING, Dict, List, Optional, Tuple, Union import attr +from synapse._pydantic_compat import HAS_PYDANTIC_V2 from synapse.api.constants import Direction, UserTypes from synapse.api.errors import Codes, NotFoundError, SynapseError from synapse.http.servlet import ( RestServlet, assert_params_in_dict, + parse_and_validate_json_object_from_request, parse_boolean, parse_enum, parse_integer, @@ -49,10 +51,17 @@ from synapse.rest.client._base import client_patterns from synapse.storage.databases.main.registration import ExternalIDReuseException from synapse.storage.databases.main.stats import UserSortOrder from synapse.types import JsonDict, JsonMapping, UserID +from synapse.types.rest import RequestBodyModel if TYPE_CHECKING: from synapse.server import HomeServer +if TYPE_CHECKING or HAS_PYDANTIC_V2: + from pydantic.v1 import StrictBool +else: + from pydantic import StrictBool + + logger = logging.getLogger(__name__) @@ -732,6 +741,36 @@ class DeactivateAccountRestServlet(RestServlet): return HTTPStatus.OK, {"id_server_unbind_result": id_server_unbind_result} +class SuspendAccountRestServlet(RestServlet): + PATTERNS = admin_patterns("/suspend/(?P[^/]*)$") + + def __init__(self, hs: "HomeServer"): + self.auth = hs.get_auth() + self.is_mine = hs.is_mine + self.store = hs.get_datastores().main + + class PutBody(RequestBodyModel): + suspend: StrictBool + + async def on_PUT( + self, request: SynapseRequest, target_user_id: str + ) -> Tuple[int, JsonDict]: + requester = await self.auth.get_user_by_req(request) + await assert_user_is_admin(self.auth, requester) + + if not self.is_mine(UserID.from_string(target_user_id)): + raise SynapseError(HTTPStatus.BAD_REQUEST, "Can only suspend local users") + + if not await self.store.get_user_by_id(target_user_id): + raise NotFoundError("User not found") + + body = parse_and_validate_json_object_from_request(request, self.PutBody) + suspend = body.suspend + await self.store.set_user_suspended_status(target_user_id, suspend) + + return HTTPStatus.OK, {f"user_{target_user_id}_suspended": suspend} + + class AccountValidityRenewServlet(RestServlet): PATTERNS = admin_patterns("/account_validity/validity$") diff --git a/synapse/rest/client/profile.py b/synapse/rest/client/profile.py index 0323f6afa1..c1a80c5c3d 100644 --- a/synapse/rest/client/profile.py +++ b/synapse/rest/client/profile.py @@ -108,6 +108,19 @@ class ProfileDisplaynameRestServlet(RestServlet): propagate = _read_propagate(self.hs, request) + requester_suspended = ( + await self.hs.get_datastores().main.get_user_suspended_status( + requester.user.to_string() + ) + ) + + if requester_suspended: + raise SynapseError( + 403, + "Updating displayname while account is suspended is not allowed.", + Codes.USER_ACCOUNT_SUSPENDED, + ) + await self.profile_handler.set_displayname( user, requester, new_name, is_admin, propagate=propagate ) @@ -167,6 +180,19 @@ class ProfileAvatarURLRestServlet(RestServlet): propagate = _read_propagate(self.hs, request) + requester_suspended = ( + await self.hs.get_datastores().main.get_user_suspended_status( + requester.user.to_string() + ) + ) + + if requester_suspended: + raise SynapseError( + 403, + "Updating avatar URL while account is suspended is not allowed.", + Codes.USER_ACCOUNT_SUSPENDED, + ) + await self.profile_handler.set_avatar_url( user, requester, new_avatar_url, is_admin, propagate=propagate ) diff --git a/synapse/rest/client/room.py b/synapse/rest/client/room.py index bd65cf4b83..903c74f6d8 100644 --- a/synapse/rest/client/room.py +++ b/synapse/rest/client/room.py @@ -1120,6 +1120,20 @@ class RoomRedactEventRestServlet(TransactionRestServlet): ) -> Tuple[int, JsonDict]: content = parse_json_object_from_request(request) + requester_suspended = await self._store.get_user_suspended_status( + requester.user.to_string() + ) + + if requester_suspended: + event = await self._store.get_event(event_id, allow_none=True) + if event: + if event.sender != requester.user.to_string(): + raise SynapseError( + 403, + "You can only redact your own events while account is suspended.", + Codes.USER_ACCOUNT_SUSPENDED, + ) + # Ensure the redacts property in the content matches the one provided in # the URL. room_version = await self._store.get_room_version(room_id) diff --git a/tests/rest/admin/test_user.py b/tests/rest/admin/test_user.py index c5da1e9686..16bb4349f5 100644 --- a/tests/rest/admin/test_user.py +++ b/tests/rest/admin/test_user.py @@ -37,6 +37,7 @@ from synapse.api.constants import ApprovalNoticeMedium, LoginType, UserTypes from synapse.api.errors import Codes, HttpResponseException, ResourceLimitError from synapse.api.room_versions import RoomVersions from synapse.media.filepath import MediaFilePaths +from synapse.rest import admin from synapse.rest.client import ( devices, login, @@ -5005,3 +5006,86 @@ class AllowCrossSigningReplacementTestCase(unittest.HomeserverTestCase): ) assert timestamp is not None self.assertGreater(timestamp, self.clock.time_msec()) + + +class UserSuspensionTestCase(unittest.HomeserverTestCase): + servlets = [ + synapse.rest.admin.register_servlets, + login.register_servlets, + admin.register_servlets, + ] + + def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None: + self.admin = self.register_user("thomas", "hackme", True) + self.admin_tok = self.login("thomas", "hackme") + + self.bad_user = self.register_user("teresa", "hackme") + self.bad_user_tok = self.login("teresa", "hackme") + + self.store = hs.get_datastores().main + + @override_config({"experimental_features": {"msc3823_account_suspension": True}}) + def test_suspend_user(self) -> None: + # test that suspending user works + channel = self.make_request( + "PUT", + f"/_synapse/admin/v1/suspend/{self.bad_user}", + {"suspend": True}, + access_token=self.admin_tok, + ) + self.assertEqual(channel.code, 200) + self.assertEqual(channel.json_body, {f"user_{self.bad_user}_suspended": True}) + + res = self.get_success(self.store.get_user_suspended_status(self.bad_user)) + self.assertEqual(True, res) + + # test that un-suspending user works + channel2 = self.make_request( + "PUT", + f"/_synapse/admin/v1/suspend/{self.bad_user}", + {"suspend": False}, + access_token=self.admin_tok, + ) + self.assertEqual(channel2.code, 200) + self.assertEqual(channel2.json_body, {f"user_{self.bad_user}_suspended": False}) + + res2 = self.get_success(self.store.get_user_suspended_status(self.bad_user)) + self.assertEqual(False, res2) + + # test that trying to un-suspend user who isn't suspended doesn't cause problems + channel3 = self.make_request( + "PUT", + f"/_synapse/admin/v1/suspend/{self.bad_user}", + {"suspend": False}, + access_token=self.admin_tok, + ) + self.assertEqual(channel3.code, 200) + self.assertEqual(channel3.json_body, {f"user_{self.bad_user}_suspended": False}) + + res3 = self.get_success(self.store.get_user_suspended_status(self.bad_user)) + self.assertEqual(False, res3) + + # test that trying to suspend user who is already suspended doesn't cause problems + channel4 = self.make_request( + "PUT", + f"/_synapse/admin/v1/suspend/{self.bad_user}", + {"suspend": True}, + access_token=self.admin_tok, + ) + self.assertEqual(channel4.code, 200) + self.assertEqual(channel4.json_body, {f"user_{self.bad_user}_suspended": True}) + + res4 = self.get_success(self.store.get_user_suspended_status(self.bad_user)) + self.assertEqual(True, res4) + + channel5 = self.make_request( + "PUT", + f"/_synapse/admin/v1/suspend/{self.bad_user}", + {"suspend": True}, + access_token=self.admin_tok, + ) + self.assertEqual(channel5.code, 200) + self.assertEqual(channel5.json_body, {f"user_{self.bad_user}_suspended": True}) + + res5 = self.get_success(self.store.get_user_suspended_status(self.bad_user)) + self.assertEqual(True, res5) diff --git a/tests/rest/client/test_rooms.py b/tests/rest/client/test_rooms.py index d398cead1c..c559dfda83 100644 --- a/tests/rest/client/test_rooms.py +++ b/tests/rest/client/test_rooms.py @@ -3819,3 +3819,108 @@ class TimestampLookupTestCase(unittest.HomeserverTestCase): # Make sure the outlier event is not returned self.assertNotEqual(channel.json_body["event_id"], outlier_event.event_id) + + +class UserSuspensionTests(unittest.HomeserverTestCase): + servlets = [ + admin.register_servlets, + login.register_servlets, + room.register_servlets, + profile.register_servlets, + ] + + def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None: + self.user1 = self.register_user("thomas", "hackme") + self.tok1 = self.login("thomas", "hackme") + + self.user2 = self.register_user("teresa", "hackme") + self.tok2 = self.login("teresa", "hackme") + + self.room1 = self.helper.create_room_as(room_creator=self.user1, tok=self.tok1) + self.store = hs.get_datastores().main + + def test_suspended_user_cannot_send_message_to_room(self) -> None: + # set the user as suspended + self.get_success(self.store.set_user_suspended_status(self.user1, True)) + + channel = self.make_request( + "PUT", + f"/rooms/{self.room1}/send/m.room.message/1", + access_token=self.tok1, + content={"body": "hello", "msgtype": "m.text"}, + ) + self.assertEqual( + channel.json_body["errcode"], "ORG.MATRIX.MSC3823.USER_ACCOUNT_SUSPENDED" + ) + + def test_suspended_user_cannot_change_profile_data(self) -> None: + # set the user as suspended + self.get_success(self.store.set_user_suspended_status(self.user1, True)) + + channel = self.make_request( + "PUT", + f"/_matrix/client/v3/profile/{self.user1}/avatar_url", + access_token=self.tok1, + content={"avatar_url": "mxc://matrix.org/wefh34uihSDRGhw34"}, + shorthand=False, + ) + self.assertEqual( + channel.json_body["errcode"], "ORG.MATRIX.MSC3823.USER_ACCOUNT_SUSPENDED" + ) + + channel2 = self.make_request( + "PUT", + f"/_matrix/client/v3/profile/{self.user1}/displayname", + access_token=self.tok1, + content={"displayname": "something offensive"}, + shorthand=False, + ) + self.assertEqual( + channel2.json_body["errcode"], "ORG.MATRIX.MSC3823.USER_ACCOUNT_SUSPENDED" + ) + + def test_suspended_user_cannot_redact_messages_other_than_their_own(self) -> None: + # first user sends message + self.make_request("POST", f"/rooms/{self.room1}/join", access_token=self.tok2) + res = self.helper.send_event( + self.room1, + "m.room.message", + {"body": "hello", "msgtype": "m.text"}, + tok=self.tok2, + ) + event_id = res["event_id"] + + # second user sends message + self.make_request("POST", f"/rooms/{self.room1}/join", access_token=self.tok1) + res2 = self.helper.send_event( + self.room1, + "m.room.message", + {"body": "bad_message", "msgtype": "m.text"}, + tok=self.tok1, + ) + event_id2 = res2["event_id"] + + # set the second user as suspended + self.get_success(self.store.set_user_suspended_status(self.user1, True)) + + # second user can't redact first user's message + channel = self.make_request( + "PUT", + f"/_matrix/client/v3/rooms/{self.room1}/redact/{event_id}/1", + access_token=self.tok1, + content={"reason": "bogus"}, + shorthand=False, + ) + self.assertEqual( + channel.json_body["errcode"], "ORG.MATRIX.MSC3823.USER_ACCOUNT_SUSPENDED" + ) + + # but can redact their own + channel = self.make_request( + "PUT", + f"/_matrix/client/v3/rooms/{self.room1}/redact/{event_id2}/1", + access_token=self.tok1, + content={"reason": "bogus"}, + shorthand=False, + ) + self.assertEqual(channel.code, 200) From 930a64b6c1a4fe096d541bf9c5f0279fb636ed16 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 24 Jun 2024 15:40:28 +0100 Subject: [PATCH 14/84] Reintroduce #17291. (#17338) This is #17291 (which got reverted), with some added fixups, and change so that tests actually pick up the error. The problem was that we were not calculating any new chain IDs due to a missing `not` in a condition. --- changelog.d/17338.misc | 1 + synapse/storage/controllers/persist_events.py | 12 + .../databases/main/event_federation.py | 20 +- synapse/storage/databases/main/events.py | 263 +++++++++++++----- tests/storage/test_event_chain.py | 9 +- tests/storage/test_event_federation.py | 44 ++- tests/unittest.py | 2 + 7 files changed, 256 insertions(+), 95 deletions(-) create mode 100644 changelog.d/17338.misc diff --git a/changelog.d/17338.misc b/changelog.d/17338.misc new file mode 100644 index 0000000000..1a81bdef85 --- /dev/null +++ b/changelog.d/17338.misc @@ -0,0 +1 @@ +Do not block event sending/receiving while calculating large event auth chains. diff --git a/synapse/storage/controllers/persist_events.py b/synapse/storage/controllers/persist_events.py index 84699a2ee1..d0e015bf19 100644 --- a/synapse/storage/controllers/persist_events.py +++ b/synapse/storage/controllers/persist_events.py @@ -617,6 +617,17 @@ class EventsPersistenceStorageController: room_id, chunk ) + with Measure(self._clock, "calculate_chain_cover_index_for_events"): + # We now calculate chain ID/sequence numbers for any state events we're + # persisting. We ignore out of band memberships as we're not in the room + # and won't have their auth chain (we'll fix it up later if we join the + # room). + # + # See: docs/auth_chain_difference_algorithm.md + new_event_links = await self.persist_events_store.calculate_chain_cover_index_for_events( + room_id, [e for e, _ in chunk] + ) + await self.persist_events_store._persist_events_and_state_updates( room_id, chunk, @@ -624,6 +635,7 @@ class EventsPersistenceStorageController: new_forward_extremities=new_forward_extremities, use_negative_stream_ordering=backfilled, inhibit_local_membership_updates=backfilled, + new_event_links=new_event_links, ) return replaced_events diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index fb132ef090..24abab4a23 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -148,6 +148,10 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas 500000, "_event_auth_cache", size_callback=len ) + # Flag used by unit tests to disable fallback when there is no chain cover + # index. + self.tests_allow_no_chain_cover_index = True + self._clock.looping_call(self._get_stats_for_federation_staging, 30 * 1000) if isinstance(self.database_engine, PostgresEngine): @@ -220,8 +224,10 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas ) except _NoChainCoverIndex: # For whatever reason we don't actually have a chain cover index - # for the events in question, so we fall back to the old method. - pass + # for the events in question, so we fall back to the old method + # (except in tests) + if not self.tests_allow_no_chain_cover_index: + raise return await self.db_pool.runInteraction( "get_auth_chain_ids", @@ -271,7 +277,7 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas if events_missing_chain_info: # This can happen due to e.g. downgrade/upgrade of the server. We # raise an exception and fall back to the previous algorithm. - logger.info( + logger.error( "Unexpectedly found that events don't have chain IDs in room %s: %s", room_id, events_missing_chain_info, @@ -482,8 +488,10 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas ) except _NoChainCoverIndex: # For whatever reason we don't actually have a chain cover index - # for the events in question, so we fall back to the old method. - pass + # for the events in question, so we fall back to the old method + # (except in tests) + if not self.tests_allow_no_chain_cover_index: + raise return await self.db_pool.runInteraction( "get_auth_chain_difference", @@ -710,7 +718,7 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas if events_missing_chain_info - event_to_auth_ids.keys(): # Uh oh, we somehow haven't correctly done the chain cover index, # bail and fall back to the old method. - logger.info( + logger.error( "Unexpectedly found that events don't have chain IDs in room %s: %s", room_id, events_missing_chain_info - event_to_auth_ids.keys(), diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index 66428e6c8e..1f7acdb859 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -34,7 +34,6 @@ from typing import ( Optional, Set, Tuple, - Union, cast, ) @@ -100,6 +99,23 @@ class DeltaState: return not self.to_delete and not self.to_insert and not self.no_longer_in_room +@attr.s(slots=True, auto_attribs=True) +class NewEventChainLinks: + """Information about new auth chain links that need to be added to the DB. + + Attributes: + chain_id, sequence_number: the IDs corresponding to the event being + inserted, and the starting point of the links + links: Lists the links that need to be added, 2-tuple of the chain + ID/sequence number of the end point of the link. + """ + + chain_id: int + sequence_number: int + + links: List[Tuple[int, int]] = attr.Factory(list) + + class PersistEventsStore: """Contains all the functions for writing events to the database. @@ -148,6 +164,7 @@ class PersistEventsStore: *, state_delta_for_room: Optional[DeltaState], new_forward_extremities: Optional[Set[str]], + new_event_links: Dict[str, NewEventChainLinks], use_negative_stream_ordering: bool = False, inhibit_local_membership_updates: bool = False, ) -> None: @@ -217,6 +234,7 @@ class PersistEventsStore: inhibit_local_membership_updates=inhibit_local_membership_updates, state_delta_for_room=state_delta_for_room, new_forward_extremities=new_forward_extremities, + new_event_links=new_event_links, ) persist_event_counter.inc(len(events_and_contexts)) @@ -243,6 +261,87 @@ class PersistEventsStore: (room_id,), frozenset(new_forward_extremities) ) + async def calculate_chain_cover_index_for_events( + self, room_id: str, events: Collection[EventBase] + ) -> Dict[str, NewEventChainLinks]: + # Filter to state events, and ensure there are no duplicates. + state_events = [] + seen_events = set() + for event in events: + if not event.is_state() or event.event_id in seen_events: + continue + + state_events.append(event) + seen_events.add(event.event_id) + + if not state_events: + return {} + + return await self.db_pool.runInteraction( + "_calculate_chain_cover_index_for_events", + self.calculate_chain_cover_index_for_events_txn, + room_id, + state_events, + ) + + def calculate_chain_cover_index_for_events_txn( + self, txn: LoggingTransaction, room_id: str, state_events: Collection[EventBase] + ) -> Dict[str, NewEventChainLinks]: + # We now calculate chain ID/sequence numbers for any state events we're + # persisting. We ignore out of band memberships as we're not in the room + # and won't have their auth chain (we'll fix it up later if we join the + # room). + # + # See: docs/auth_chain_difference_algorithm.md + + # We ignore legacy rooms that we aren't filling the chain cover index + # for. + row = self.db_pool.simple_select_one_txn( + txn, + table="rooms", + keyvalues={"room_id": room_id}, + retcols=("room_id", "has_auth_chain_index"), + allow_none=True, + ) + if row is None or row[1] is False: + return {} + + # Filter out events that we've already calculated. + rows = self.db_pool.simple_select_many_txn( + txn, + table="event_auth_chains", + column="event_id", + iterable=[e.event_id for e in state_events], + keyvalues={}, + retcols=("event_id",), + ) + already_persisted_events = {event_id for event_id, in rows} + state_events = [ + event + for event in state_events + if event.event_id not in already_persisted_events + ] + + if not state_events: + return {} + + # We need to know the type/state_key and auth events of the events we're + # calculating chain IDs for. We don't rely on having the full Event + # instances as we'll potentially be pulling more events from the DB and + # we don't need the overhead of fetching/parsing the full event JSON. + event_to_types = {e.event_id: (e.type, e.state_key) for e in state_events} + event_to_auth_chain = {e.event_id: e.auth_event_ids() for e in state_events} + event_to_room_id = {e.event_id: e.room_id for e in state_events} + + return self._calculate_chain_cover_index( + txn, + self.db_pool, + self.store.event_chain_id_gen, + event_to_room_id, + event_to_types, + event_to_auth_chain, + ) + async def _get_events_which_are_prevs(self, event_ids: Iterable[str]) -> List[str]: """Filter the supplied list of event_ids to get those which are prev_events of existing (non-outlier/rejected) events. @@ -358,6 +457,7 @@ class PersistEventsStore: inhibit_local_membership_updates: bool, state_delta_for_room: Optional[DeltaState], new_forward_extremities: Optional[Set[str]], + new_event_links: Dict[str, NewEventChainLinks], ) -> None: """Insert some number of room events into the necessary database tables. @@ -466,7 +566,9 @@ class PersistEventsStore: # Insert into event_to_state_groups. self._store_event_state_mappings_txn(txn, events_and_contexts) - self._persist_event_auth_chain_txn(txn, [e for e, _ in events_and_contexts]) + self._persist_event_auth_chain_txn( + txn, [e for e, _ in events_and_contexts], new_event_links + ) # _store_rejected_events_txn filters out any events which were # rejected, and returns the filtered list. @@ -496,7 +598,11 @@ class PersistEventsStore: self, txn: LoggingTransaction, events: List[EventBase], + new_event_links: Dict[str, NewEventChainLinks], ) -> None: + if new_event_links: + self._persist_chain_cover_index(txn, self.db_pool, new_event_links) + # We only care about state events, so this if there are no state events. if not any(e.is_state() for e in events): return @@ -519,60 +625,6 @@ class PersistEventsStore: ], ) - # We now calculate chain ID/sequence numbers for any state events we're - # persisting. We ignore out of band memberships as we're not in the room - # and won't have their auth chain (we'll fix it up later if we join the - # room). - # - # See: docs/auth_chain_difference_algorithm.md - - # We ignore legacy rooms that we aren't filling the chain cover index - # for. - rows = cast( - List[Tuple[str, Optional[Union[int, bool]]]], - self.db_pool.simple_select_many_txn( - txn, - table="rooms", - column="room_id", - iterable={event.room_id for event in events if event.is_state()}, - keyvalues={}, - retcols=("room_id", "has_auth_chain_index"), - ), - ) - rooms_using_chain_index = { - room_id for room_id, has_auth_chain_index in rows if has_auth_chain_index - } - - state_events = { - event.event_id: event - for event in events - if event.is_state() and event.room_id in rooms_using_chain_index - } - - if not state_events: - return - - # We need to know the type/state_key and auth events of the events we're - # calculating chain IDs for. We don't rely on having the full Event - # instances as we'll potentially be pulling more events from the DB and - # we don't need the overhead of fetching/parsing the full event JSON. - event_to_types = { - e.event_id: (e.type, e.state_key) for e in state_events.values() - } - event_to_auth_chain = { - e.event_id: e.auth_event_ids() for e in state_events.values() - } - event_to_room_id = {e.event_id: e.room_id for e in state_events.values()} - - self._add_chain_cover_index( - txn, - self.db_pool, - self.store.event_chain_id_gen, - event_to_room_id, - event_to_types, - event_to_auth_chain, - ) - @classmethod def _add_chain_cover_index( cls, @@ -583,6 +635,35 @@ class PersistEventsStore: event_to_types: Dict[str, Tuple[str, str]], event_to_auth_chain: Dict[str, StrCollection], ) -> None: + """Calculate and persist the chain cover index for the given events. + + Args: + event_to_room_id: Event ID to the room ID of the event + event_to_types: Event ID to type and state_key of the event + event_to_auth_chain: Event ID to list of auth event IDs of the + event (events with no auth events can be excluded). + """ + + new_event_links = cls._calculate_chain_cover_index( + txn, + db_pool, + event_chain_id_gen, + event_to_room_id, + event_to_types, + event_to_auth_chain, + ) + cls._persist_chain_cover_index(txn, db_pool, new_event_links) + + @classmethod + def _calculate_chain_cover_index( + cls, + txn: LoggingTransaction, + db_pool: DatabasePool, + event_chain_id_gen: SequenceGenerator, + event_to_room_id: Dict[str, str], + event_to_types: Dict[str, Tuple[str, str]], + event_to_auth_chain: Dict[str, StrCollection], + ) -> Dict[str, NewEventChainLinks]: """Calculate the chain cover index for the given events. Args: @@ -590,6 +671,10 @@ class PersistEventsStore: event_to_types: Event ID to type and state_key of the event event_to_auth_chain: Event ID to list of auth event IDs of the event (events with no auth events can be excluded). + + Returns: + A mapping with any new auth chain links we need to add, keyed by + event ID. """ # Map from event ID to chain ID/sequence number. @@ -708,11 +793,11 @@ class PersistEventsStore: room_id = event_to_room_id.get(event_id) if room_id: e_type, state_key = event_to_types[event_id] - db_pool.simple_insert_txn( + db_pool.simple_upsert_txn( txn, table="event_auth_chain_to_calculate", + keyvalues={"event_id": event_id}, values={ - "event_id": event_id, "room_id": room_id, "type": e_type, "state_key": state_key, @@ -724,7 +809,7 @@ class PersistEventsStore: break if not events_to_calc_chain_id_for: - return + return {} # Allocate chain ID/sequence numbers to each new event. new_chain_tuples = cls._allocate_chain_ids( @@ -739,23 +824,10 @@ class PersistEventsStore: ) chain_map.update(new_chain_tuples) - db_pool.simple_insert_many_txn( - txn, - table="event_auth_chains", - keys=("event_id", "chain_id", "sequence_number"), - values=[ - (event_id, c_id, seq) - for event_id, (c_id, seq) in new_chain_tuples.items() - ], - ) - - db_pool.simple_delete_many_txn( - txn, - table="event_auth_chain_to_calculate", - keyvalues={}, - column="event_id", - values=new_chain_tuples, - ) + to_return = { + event_id: NewEventChainLinks(chain_id, sequence_number) + for event_id, (chain_id, sequence_number) in new_chain_tuples.items() + } # Now we need to calculate any new links between chains caused by # the new events. @@ -825,10 +897,38 @@ class PersistEventsStore: auth_chain_id, auth_sequence_number = chain_map[auth_id] # Step 2a, add link between the event and auth event + to_return[event_id].links.append((auth_chain_id, auth_sequence_number)) chain_links.add_link( (chain_id, sequence_number), (auth_chain_id, auth_sequence_number) ) + return to_return + + @classmethod + def _persist_chain_cover_index( + cls, + txn: LoggingTransaction, + db_pool: DatabasePool, + new_event_links: Dict[str, NewEventChainLinks], + ) -> None: + db_pool.simple_insert_many_txn( + txn, + table="event_auth_chains", + keys=("event_id", "chain_id", "sequence_number"), + values=[ + (event_id, new_links.chain_id, new_links.sequence_number) + for event_id, new_links in new_event_links.items() + ], + ) + + db_pool.simple_delete_many_txn( + txn, + table="event_auth_chain_to_calculate", + keyvalues={}, + column="event_id", + values=new_event_links, + ) + db_pool.simple_insert_many_txn( txn, table="event_auth_chain_links", @@ -838,7 +938,16 @@ class PersistEventsStore: "target_chain_id", "target_sequence_number", ), - values=list(chain_links.get_additions()), + values=[ + ( + new_links.chain_id, + new_links.sequence_number, + target_chain_id, + target_sequence_number, + ) + for new_links in new_event_links.values() + for (target_chain_id, target_sequence_number) in new_links.links + ], ) @staticmethod diff --git a/tests/storage/test_event_chain.py b/tests/storage/test_event_chain.py index 81feb3ec29..c4e216c308 100644 --- a/tests/storage/test_event_chain.py +++ b/tests/storage/test_event_chain.py @@ -447,7 +447,14 @@ class EventChainStoreTestCase(HomeserverTestCase): ) # Actually call the function that calculates the auth chain stuff. - persist_events_store._persist_event_auth_chain_txn(txn, events) + new_event_links = ( + persist_events_store.calculate_chain_cover_index_for_events_txn( + txn, events[0].room_id, [e for e in events if e.is_state()] + ) + ) + persist_events_store._persist_event_auth_chain_txn( + txn, events, new_event_links + ) self.get_success( persist_events_store.db_pool.runInteraction( diff --git a/tests/storage/test_event_federation.py b/tests/storage/test_event_federation.py index 0a6253e22c..088f0d24f9 100644 --- a/tests/storage/test_event_federation.py +++ b/tests/storage/test_event_federation.py @@ -365,12 +365,19 @@ class EventFederationWorkerStoreTestCase(tests.unittest.HomeserverTestCase): }, ) + events = [ + cast(EventBase, FakeEvent(event_id, room_id, AUTH_GRAPH[event_id])) + for event_id in AUTH_GRAPH + ] + new_event_links = ( + self.persist_events.calculate_chain_cover_index_for_events_txn( + txn, room_id, [e for e in events if e.is_state()] + ) + ) self.persist_events._persist_event_auth_chain_txn( txn, - [ - cast(EventBase, FakeEvent(event_id, room_id, AUTH_GRAPH[event_id])) - for event_id in AUTH_GRAPH - ], + events, + new_event_links, ) self.get_success( @@ -544,6 +551,9 @@ class EventFederationWorkerStoreTestCase(tests.unittest.HomeserverTestCase): rooms. """ + # We allow partial covers for this test + self.hs.get_datastores().main.tests_allow_no_chain_cover_index = True + room_id = "@ROOM:local" # The silly auth graph we use to test the auth difference algorithm, @@ -628,13 +638,20 @@ class EventFederationWorkerStoreTestCase(tests.unittest.HomeserverTestCase): ) # Insert all events apart from 'B' + events = [ + cast(EventBase, FakeEvent(event_id, room_id, auth_graph[event_id])) + for event_id in auth_graph + if event_id != "b" + ] + new_event_links = ( + self.persist_events.calculate_chain_cover_index_for_events_txn( + txn, room_id, [e for e in events if e.is_state()] + ) + ) self.persist_events._persist_event_auth_chain_txn( txn, - [ - cast(EventBase, FakeEvent(event_id, room_id, auth_graph[event_id])) - for event_id in auth_graph - if event_id != "b" - ], + events, + new_event_links, ) # Now we insert the event 'B' without a chain cover, by temporarily @@ -647,9 +664,14 @@ class EventFederationWorkerStoreTestCase(tests.unittest.HomeserverTestCase): updatevalues={"has_auth_chain_index": False}, ) + events = [cast(EventBase, FakeEvent("b", room_id, auth_graph["b"]))] + new_event_links = ( + self.persist_events.calculate_chain_cover_index_for_events_txn( + txn, room_id, [e for e in events if e.is_state()] + ) + ) self.persist_events._persist_event_auth_chain_txn( - txn, - [cast(EventBase, FakeEvent("b", room_id, auth_graph["b"]))], + txn, events, new_event_links ) self.store.db_pool.simple_update_txn( diff --git a/tests/unittest.py b/tests/unittest.py index 18963b9e32..a7c20556a0 100644 --- a/tests/unittest.py +++ b/tests/unittest.py @@ -344,6 +344,8 @@ class HomeserverTestCase(TestCase): self._hs_args = {"clock": self.clock, "reactor": self.reactor} self.hs = self.make_homeserver(self.reactor, self.clock) + self.hs.get_datastores().main.tests_allow_no_chain_cover_index = False + # Honour the `use_frozen_dicts` config option. We have to do this # manually because this is taken care of in the app `start` code, which # we don't run. Plus we want to reset it on tearDown. From ae4c236a6d6ef76565240ac964c5f540b9c1e1ed Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 24 Jun 2024 10:02:16 -0500 Subject: [PATCH 15/84] Fix room `type` typo in mailer (#17336) Correct event content field is `EventContentFields.ROOM_TYPE` (`type`) :white_check_mark: , not `room_type` :x: Spec: https://spec.matrix.org/v1.10/client-server-api/#mroomcreate --- changelog.d/17336.bugfix | 1 + synapse/push/mailer.py | 5 +++-- 2 files changed, 4 insertions(+), 2 deletions(-) create mode 100644 changelog.d/17336.bugfix diff --git a/changelog.d/17336.bugfix b/changelog.d/17336.bugfix new file mode 100644 index 0000000000..618834302e --- /dev/null +++ b/changelog.d/17336.bugfix @@ -0,0 +1 @@ +Fix email notification subject when invited to a space. diff --git a/synapse/push/mailer.py b/synapse/push/mailer.py index 77cc69a71f..cf611bd90b 100644 --- a/synapse/push/mailer.py +++ b/synapse/push/mailer.py @@ -28,7 +28,7 @@ import jinja2 from markupsafe import Markup from prometheus_client import Counter -from synapse.api.constants import EventTypes, Membership, RoomTypes +from synapse.api.constants import EventContentFields, EventTypes, Membership, RoomTypes from synapse.api.errors import StoreError from synapse.config.emailconfig import EmailSubjectConfig from synapse.events import EventBase @@ -716,7 +716,8 @@ class Mailer: ) if ( create_event - and create_event.content.get("room_type") == RoomTypes.SPACE + and create_event.content.get(EventContentFields.ROOM_TYPE) + == RoomTypes.SPACE ): return self.email_subjects.invite_from_person_to_space % { "person": inviter_name, From 87fac19fdebd070b09a7a7daae7217ccaa2f2d1e Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 24 Jun 2024 10:15:15 -0500 Subject: [PATCH 16/84] Fix lints See https://github.com/element-hq/synapse/pull/17320#discussion_r1647701997 ``` synapse/federation/federation_server.py:677: error: Cannot determine type of "_join_rate_per_room_limiter" [has-type] synapse/federation/federation_server.py:720: error: Cannot determine type of "_join_rate_per_room_limiter" [has-type] ``` --- synapse/types/handlers/__init__.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/synapse/types/handlers/__init__.py b/synapse/types/handlers/__init__.py index 1ba5ea55c1..8e097d8b48 100644 --- a/synapse/types/handlers/__init__.py +++ b/synapse/types/handlers/__init__.py @@ -31,10 +31,12 @@ else: from pydantic import Extra from synapse.events import EventBase -from synapse.handlers.relations import BundledAggregations from synapse.types import JsonDict, JsonMapping, StreamToken, UserID from synapse.types.rest.client import SlidingSyncBody +if TYPE_CHECKING: + from synapse.handlers.relations import BundledAggregations + class ShutdownRoomParams(TypedDict): """ @@ -197,7 +199,7 @@ class SlidingSyncResult: initial: bool required_state: List[EventBase] timeline_events: List[EventBase] - bundled_aggregations: Optional[Dict[str, BundledAggregations]] + bundled_aggregations: Optional[Dict[str, "BundledAggregations"]] is_dm: bool stripped_state: Optional[List[JsonDict]] prev_batch: StreamToken From 3c61ddbbc9ee313447c16fa7f19bdc29ce647a32 Mon Sep 17 00:00:00 2001 From: devonh Date: Mon, 24 Jun 2024 17:16:09 +0000 Subject: [PATCH 17/84] Add default values for rc_invites per_issuer to docs (#17347) A simple change to update the docs where default values were missing. ### Pull Request Checklist * [X] Pull request is based on the develop branch * [X] Pull request includes a [changelog file](https://element-hq.github.io/synapse/latest/development/contributing_guide.html#changelog). The entry should: - Be a short description of your change which makes sense to users. "Fixed a bug that prevented receiving messages from other servers." instead of "Moved X method from `EventStore` to `EventWorkerStore`.". - Use markdown where necessary, mostly for `code blocks`. - End with either a period (.) or an exclamation mark (!). - Start with a capital letter. - Feel free to credit yourself, by adding a sentence "Contributed by @github_username." or "Contributed by [Your Name]." to the end of the entry. * [X] [Code style](https://element-hq.github.io/synapse/latest/code_style.html) is correct (run the [linters](https://element-hq.github.io/synapse/latest/development/contributing_guide.html#run-the-linters)) --------- Co-authored-by: Kim Brose <2803622+HarHarLinks@users.noreply.github.com> Co-authored-by: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> --- changelog.d/17347.doc | 1 + docs/usage/configuration/config_documentation.md | 5 +++-- 2 files changed, 4 insertions(+), 2 deletions(-) create mode 100644 changelog.d/17347.doc diff --git a/changelog.d/17347.doc b/changelog.d/17347.doc new file mode 100644 index 0000000000..6cd41be60f --- /dev/null +++ b/changelog.d/17347.doc @@ -0,0 +1 @@ +Add default values for `rc_invites.per_issuer` to docs. diff --git a/docs/usage/configuration/config_documentation.md b/docs/usage/configuration/config_documentation.md index 22c545359d..b3db078703 100644 --- a/docs/usage/configuration/config_documentation.md +++ b/docs/usage/configuration/config_documentation.md @@ -1759,8 +1759,9 @@ rc_3pid_validation: ### `rc_invites` This option sets ratelimiting how often invites can be sent in a room or to a -specific user. `per_room` defaults to `per_second: 0.3`, `burst_count: 10` and -`per_user` defaults to `per_second: 0.003`, `burst_count: 5`. +specific user. `per_room` defaults to `per_second: 0.3`, `burst_count: 10`, +`per_user` defaults to `per_second: 0.003`, `burst_count: 5`, and `per_issuer` +defaults to `per_second: 0.3`, `burst_count: 10`. Client requests that invite user(s) when [creating a room](https://spec.matrix.org/v1.2/client-server-api/#post_matrixclientv3createroom) From 805e6c9a8f703a0a774321bd0755be63dcdcc807 Mon Sep 17 00:00:00 2001 From: devonh Date: Mon, 24 Jun 2024 17:18:58 +0000 Subject: [PATCH 18/84] Correct error in user_directory docs (#17348) ### Pull Request Checklist * [X] Pull request is based on the develop branch * [X] Pull request includes a [changelog file](https://element-hq.github.io/synapse/latest/development/contributing_guide.html#changelog). The entry should: - Be a short description of your change which makes sense to users. "Fixed a bug that prevented receiving messages from other servers." instead of "Moved X method from `EventStore` to `EventWorkerStore`.". - Use markdown where necessary, mostly for `code blocks`. - End with either a period (.) or an exclamation mark (!). - Start with a capital letter. - Feel free to credit yourself, by adding a sentence "Contributed by @github_username." or "Contributed by [Your Name]." to the end of the entry. * [X] [Code style](https://element-hq.github.io/synapse/latest/code_style.html) is correct (run the [linters](https://element-hq.github.io/synapse/latest/development/contributing_guide.html#run-the-linters)) --------- Co-authored-by: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> Co-authored-by: reivilibre --- changelog.d/17348.doc | 1 + docs/usage/configuration/config_documentation.md | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) create mode 100644 changelog.d/17348.doc diff --git a/changelog.d/17348.doc b/changelog.d/17348.doc new file mode 100644 index 0000000000..4ce42bbadb --- /dev/null +++ b/changelog.d/17348.doc @@ -0,0 +1 @@ +Fix an error in the docs for `search_all_users` parameter under `user_directory`. diff --git a/docs/usage/configuration/config_documentation.md b/docs/usage/configuration/config_documentation.md index b3db078703..ba9f21cdee 100644 --- a/docs/usage/configuration/config_documentation.md +++ b/docs/usage/configuration/config_documentation.md @@ -3807,7 +3807,8 @@ This setting defines options related to the user directory. This option has the following sub-options: * `enabled`: Defines whether users can search the user directory. If false then empty responses are returned to all queries. Defaults to true. -* `search_all_users`: Defines whether to search all users visible to your HS at the time the search is performed. If set to true, will return all users who share a room with the user from the homeserver. +* `search_all_users`: Defines whether to search all users visible to your homeserver at the time the search is performed. + If set to true, will return all users known to the homeserver matching the search query. If false, search results will only contain users visible in public rooms and users sharing a room with the requester. Defaults to false. From 0e71a2f2d1231603d4643f9402dbd7b4f4df226b Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 24 Jun 2024 15:56:27 -0500 Subject: [PATCH 19/84] Add TODO for filtering call invites in public rooms --- synapse/handlers/sliding_sync.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index 3e49054e43..a6e84cb976 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -841,6 +841,8 @@ class SlidingSyncHandler: != Membership.JOIN, filter_send_to_client=True, ) + # TODO: Filter out `EventTypes.CallInvite` in public rooms, + # see https://github.com/element-hq/synapse/pull/16908#discussion_r1651598029 # Determine how many "live" events we have (events within the given token range). # From 21ca02c5ad2b030f3a3d76526690b23f40ef9412 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 24 Jun 2024 16:08:58 -0500 Subject: [PATCH 20/84] `newly_joined` vs `limited` already being tracked in a discussion See https://github.com/element-hq/synapse/pull/17320#discussion_r1646579623 if anything comes out of it. --- synapse/handlers/sliding_sync.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index a6e84cb976..4d73134e7f 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -829,9 +829,6 @@ class SlidingSyncHandler: stream=timeline_events[0].internal_metadata.stream_ordering - 1 ) - # TODO: Does `newly_joined` affect `limited`? It does in sync v2 but I fail - # to understand why. - # Make sure we don't expose any events that the client shouldn't see timeline_events = await filter_events_for_client( self.storage_controllers, From 6e8af8319373e1ab470f1d8eee0420f3be84184f Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 24 Jun 2024 19:07:56 -0500 Subject: [PATCH 21/84] Add `is_invite` filtering to Sliding Sync `/sync` (#17335) Based on [MSC3575](https://github.com/matrix-org/matrix-spec-proposals/pull/3575): Sliding Sync --- changelog.d/17335.feature | 1 + synapse/handlers/sliding_sync.py | 19 +++- tests/handlers/test_sliding_sync.py | 74 +++++++++++--- tests/rest/client/test_sync.py | 148 +++++++++++++++++++++++----- 4 files changed, 199 insertions(+), 43 deletions(-) create mode 100644 changelog.d/17335.feature diff --git a/changelog.d/17335.feature b/changelog.d/17335.feature new file mode 100644 index 0000000000..c6beed42ed --- /dev/null +++ b/changelog.d/17335.feature @@ -0,0 +1 @@ +Add `is_invite` filtering to experimental [MSC3575](https://github.com/matrix-org/matrix-spec-proposals/pull/3575) Sliding Sync `/sync` endpoint. diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index 16d94925f5..847a638bba 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -554,7 +554,7 @@ class SlidingSyncHandler: # Flatten out the map dm_room_id_set = set() - if dm_map: + if isinstance(dm_map, dict): for room_ids in dm_map.values(): # Account data should be a list of room IDs. Ignore anything else if isinstance(room_ids, list): @@ -593,8 +593,21 @@ class SlidingSyncHandler: ): filtered_room_id_set.remove(room_id) - if filters.is_invite: - raise NotImplementedError() + # Filter for rooms that the user has been invited to + if filters.is_invite is not None: + # Make a copy so we don't run into an error: `Set changed size during + # iteration`, when we filter out and remove items + for room_id in list(filtered_room_id_set): + room_for_user = sync_room_map[room_id] + # If we're looking for invite rooms, filter out rooms that the user is + # not invited to and vice versa + if ( + filters.is_invite and room_for_user.membership != Membership.INVITE + ) or ( + not filters.is_invite + and room_for_user.membership == Membership.INVITE + ): + filtered_room_id_set.remove(room_id) if filters.room_types: raise NotImplementedError() diff --git a/tests/handlers/test_sliding_sync.py b/tests/handlers/test_sliding_sync.py index 0358239c7f..8dd4521b18 100644 --- a/tests/handlers/test_sliding_sync.py +++ b/tests/handlers/test_sliding_sync.py @@ -1200,11 +1200,7 @@ class FilterRoomsTestCase(HomeserverTestCase): user2_tok = self.login(user2_id, "pass") # Create a normal room - room_id = self.helper.create_room_as( - user1_id, - is_public=False, - tok=user1_tok, - ) + room_id = self.helper.create_room_as(user1_id, tok=user1_tok) # Create a DM room dm_room_id = self._create_dm_room( @@ -1261,18 +1257,10 @@ class FilterRoomsTestCase(HomeserverTestCase): user1_tok = self.login(user1_id, "pass") # Create a normal room - room_id = self.helper.create_room_as( - user1_id, - is_public=False, - tok=user1_tok, - ) + room_id = self.helper.create_room_as(user1_id, tok=user1_tok) # Create an encrypted room - encrypted_room_id = self.helper.create_room_as( - user1_id, - is_public=False, - tok=user1_tok, - ) + encrypted_room_id = self.helper.create_room_as(user1_id, tok=user1_tok) self.helper.send_state( encrypted_room_id, EventTypes.RoomEncryption, @@ -1319,6 +1307,62 @@ class FilterRoomsTestCase(HomeserverTestCase): self.assertEqual(falsy_filtered_room_map.keys(), {room_id}) + def test_filter_invite_rooms(self) -> None: + """ + Test `filter.is_invite` for rooms that the user has been invited to + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + + # Create a normal room + room_id = self.helper.create_room_as(user2_id, tok=user2_tok) + self.helper.join(room_id, user1_id, tok=user1_tok) + + # Create a room that user1 is invited to + invite_room_id = self.helper.create_room_as(user2_id, tok=user2_tok) + self.helper.invite(invite_room_id, src=user2_id, targ=user1_id, tok=user2_tok) + + after_rooms_token = self.event_sources.get_current_token() + + # Get the rooms the user should be syncing with + sync_room_map = self.get_success( + self.sliding_sync_handler.get_sync_room_ids_for_user( + UserID.from_string(user1_id), + from_token=None, + to_token=after_rooms_token, + ) + ) + + # Try with `is_invite=True` + truthy_filtered_room_map = self.get_success( + self.sliding_sync_handler.filter_rooms( + UserID.from_string(user1_id), + sync_room_map, + SlidingSyncConfig.SlidingSyncList.Filters( + is_invite=True, + ), + after_rooms_token, + ) + ) + + self.assertEqual(truthy_filtered_room_map.keys(), {invite_room_id}) + + # Try with `is_invite=False` + falsy_filtered_room_map = self.get_success( + self.sliding_sync_handler.filter_rooms( + UserID.from_string(user1_id), + sync_room_map, + SlidingSyncConfig.SlidingSyncList.Filters( + is_invite=False, + ), + after_rooms_token, + ) + ) + + self.assertEqual(falsy_filtered_room_map.keys(), {room_id}) + class SortRoomsTestCase(HomeserverTestCase): """ diff --git a/tests/rest/client/test_sync.py b/tests/rest/client/test_sync.py index 5195659ec2..bfb26139d3 100644 --- a/tests/rest/client/test_sync.py +++ b/tests/rest/client/test_sync.py @@ -19,7 +19,8 @@ # # import json -from typing import List +import logging +from typing import Dict, List from parameterized import parameterized, parameterized_class @@ -44,6 +45,8 @@ from tests.federation.transport.test_knocking import ( ) from tests.server import TimedOutException +logger = logging.getLogger(__name__) + class FilterTestCase(unittest.HomeserverTestCase): user_id = "@apple:test" @@ -1234,12 +1237,58 @@ class SlidingSyncTestCase(unittest.HomeserverTestCase): self.store = hs.get_datastores().main self.event_sources = hs.get_event_sources() + def _add_new_dm_to_global_account_data( + self, source_user_id: str, target_user_id: str, target_room_id: str + ) -> None: + """ + Helper to handle inserting a new DM for the source user into global account data + (handles all of the list merging). + + Args: + source_user_id: The user ID of the DM mapping we're going to update + target_user_id: User ID of the person the DM is with + target_room_id: Room ID of the DM + """ + + # Get the current DM map + existing_dm_map = self.get_success( + self.store.get_global_account_data_by_type_for_user( + source_user_id, AccountDataTypes.DIRECT + ) + ) + # Scrutinize the account data since it has no concrete type. We're just copying + # everything into a known type. It should be a mapping from user ID to a list of + # room IDs. Ignore anything else. + new_dm_map: Dict[str, List[str]] = {} + if isinstance(existing_dm_map, dict): + for user_id, room_ids in existing_dm_map.items(): + if isinstance(user_id, str) and isinstance(room_ids, list): + for room_id in room_ids: + if isinstance(room_id, str): + new_dm_map[user_id] = new_dm_map.get(user_id, []) + [ + room_id + ] + + # Add the new DM to the map + new_dm_map[target_user_id] = new_dm_map.get(target_user_id, []) + [ + target_room_id + ] + # Save the DM map to global account data + self.get_success( + self.store.add_account_data_for_user( + source_user_id, + AccountDataTypes.DIRECT, + new_dm_map, + ) + ) + def _create_dm_room( self, inviter_user_id: str, inviter_tok: str, invitee_user_id: str, invitee_tok: str, + should_join_room: bool = True, ) -> str: """ Helper to create a DM room as the "inviter" and invite the "invitee" user to the @@ -1260,24 +1309,17 @@ class SlidingSyncTestCase(unittest.HomeserverTestCase): tok=inviter_tok, extra_data={"is_direct": True}, ) - # Person that was invited joins the room - self.helper.join(room_id, invitee_user_id, tok=invitee_tok) + if should_join_room: + # Person that was invited joins the room + self.helper.join(room_id, invitee_user_id, tok=invitee_tok) # Mimic the client setting the room as a direct message in the global account - # data - self.get_success( - self.store.add_account_data_for_user( - invitee_user_id, - AccountDataTypes.DIRECT, - {inviter_user_id: [room_id]}, - ) + # data for both users. + self._add_new_dm_to_global_account_data( + invitee_user_id, inviter_user_id, room_id ) - self.get_success( - self.store.add_account_data_for_user( - inviter_user_id, - AccountDataTypes.DIRECT, - {invitee_user_id: [room_id]}, - ) + self._add_new_dm_to_global_account_data( + inviter_user_id, invitee_user_id, room_id ) return room_id @@ -1397,15 +1439,28 @@ class SlidingSyncTestCase(unittest.HomeserverTestCase): user2_tok = self.login(user2_id, "pass") # Create a DM room - dm_room_id = self._create_dm_room( + joined_dm_room_id = self._create_dm_room( inviter_user_id=user1_id, inviter_tok=user1_tok, invitee_user_id=user2_id, invitee_tok=user2_tok, + should_join_room=True, + ) + invited_dm_room_id = self._create_dm_room( + inviter_user_id=user1_id, + inviter_tok=user1_tok, + invitee_user_id=user2_id, + invitee_tok=user2_tok, + should_join_room=False, ) # Create a normal room - room_id = self.helper.create_room_as(user1_id, tok=user1_tok, is_public=True) + room_id = self.helper.create_room_as(user1_id, tok=user2_tok) + self.helper.join(room_id, user1_id, tok=user1_tok) + + # Create a room that user1 is invited to + invite_room_id = self.helper.create_room_as(user1_id, tok=user2_tok) + self.helper.invite(invite_room_id, src=user2_id, targ=user1_id, tok=user2_tok) # Make the Sliding Sync request channel = self.make_request( @@ -1413,18 +1468,34 @@ class SlidingSyncTestCase(unittest.HomeserverTestCase): self.sync_endpoint, { "lists": { + # Absense of filters does not imply "False" values + "all": { + "ranges": [[0, 99]], + "required_state": [], + "timeline_limit": 1, + "filters": {}, + }, + # Test single truthy filter "dms": { "ranges": [[0, 99]], "required_state": [], "timeline_limit": 1, "filters": {"is_dm": True}, }, - "foo-list": { + # Test single falsy filter + "non-dms": { "ranges": [[0, 99]], "required_state": [], "timeline_limit": 1, "filters": {"is_dm": False}, }, + # Test how multiple filters should stack (AND'd together) + "room-invites": { + "ranges": [[0, 99]], + "required_state": [], + "timeline_limit": 1, + "filters": {"is_dm": False, "is_invite": True}, + }, } }, access_token=user1_tok, @@ -1434,32 +1505,59 @@ class SlidingSyncTestCase(unittest.HomeserverTestCase): # Make sure it has the foo-list we requested self.assertListEqual( list(channel.json_body["lists"].keys()), - ["dms", "foo-list"], + ["all", "dms", "non-dms", "room-invites"], channel.json_body["lists"].keys(), ) - # Make sure the list includes the room we are joined to + # Make sure the lists have the correct rooms + self.assertListEqual( + list(channel.json_body["lists"]["all"]["ops"]), + [ + { + "op": "SYNC", + "range": [0, 99], + "room_ids": [ + invite_room_id, + room_id, + invited_dm_room_id, + joined_dm_room_id, + ], + } + ], + list(channel.json_body["lists"]["all"]), + ) self.assertListEqual( list(channel.json_body["lists"]["dms"]["ops"]), [ { "op": "SYNC", "range": [0, 99], - "room_ids": [dm_room_id], + "room_ids": [invited_dm_room_id, joined_dm_room_id], } ], list(channel.json_body["lists"]["dms"]), ) self.assertListEqual( - list(channel.json_body["lists"]["foo-list"]["ops"]), + list(channel.json_body["lists"]["non-dms"]["ops"]), [ { "op": "SYNC", "range": [0, 99], - "room_ids": [room_id], + "room_ids": [invite_room_id, room_id], } ], - list(channel.json_body["lists"]["foo-list"]), + list(channel.json_body["lists"]["non-dms"]), + ) + self.assertListEqual( + list(channel.json_body["lists"]["room-invites"]["ops"]), + [ + { + "op": "SYNC", + "range": [0, 99], + "room_ids": [invite_room_id], + } + ], + list(channel.json_body["lists"]["room-invites"]), ) def test_sort_list(self) -> None: From 35683119890e06bb65bca24e303154acb4f62a1b Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 24 Jun 2024 19:08:18 -0500 Subject: [PATCH 22/84] Fix spelling typo --- synapse/handlers/sliding_sync.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index 4d73134e7f..d5390e8945 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -535,7 +535,7 @@ class SlidingSyncHandler: # 2) ----------------------------------------------------- # We fix-up newly_left rooms after the first fixup because it may have removed - # some left rooms that we can figure out our newly_left in the following code + # some left rooms that we can figure out are newly_left in the following code # 2) Fetch membership changes that fall in the range from `from_token` up to `to_token` membership_change_events_in_from_to_range = [] From a98cb87bee18c9028d03676ce544860239e1ff34 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 25 Jun 2024 09:57:34 +0100 Subject: [PATCH 23/84] Revert "Reduce device lists replication traffic." (#17360) Reverts element-hq/synapse#17333 It looks like master was still sending out replication RDATA with the old format... somehow --- changelog.d/17333.misc | 1 - synapse/replication/tcp/client.py | 19 ++-- synapse/replication/tcp/streams/_base.py | 12 +-- synapse/storage/databases/main/devices.py | 93 +++++++------------ .../storage/databases/main/end_to_end_keys.py | 4 +- tests/storage/test_devices.py | 8 -- 6 files changed, 48 insertions(+), 89 deletions(-) delete mode 100644 changelog.d/17333.misc diff --git a/changelog.d/17333.misc b/changelog.d/17333.misc deleted file mode 100644 index d3ef0b3777..0000000000 --- a/changelog.d/17333.misc +++ /dev/null @@ -1 +0,0 @@ -Handle device lists notifications for large accounts more efficiently in worker mode. diff --git a/synapse/replication/tcp/client.py b/synapse/replication/tcp/client.py index 3dddbb70b4..2d6d49eed7 100644 --- a/synapse/replication/tcp/client.py +++ b/synapse/replication/tcp/client.py @@ -114,19 +114,13 @@ class ReplicationDataHandler: """ all_room_ids: Set[str] = set() if stream_name == DeviceListsStream.NAME: - if any(not row.is_signature and not row.hosts_calculated for row in rows): + if any(row.entity.startswith("@") and not row.is_signature for row in rows): prev_token = self.store.get_device_stream_token() all_room_ids = await self.store.get_all_device_list_changes( prev_token, token ) self.store.device_lists_in_rooms_have_changed(all_room_ids, token) - # If we're sending federation we need to update the device lists - # outbound pokes stream change cache with updated hosts. - if self.send_handler and any(row.hosts_calculated for row in rows): - hosts = await self.store.get_destinations_for_device(token) - self.store.device_lists_outbound_pokes_have_changed(hosts, token) - self.store.process_replication_rows(stream_name, instance_name, token, rows) # NOTE: this must be called after process_replication_rows to ensure any # cache invalidations are first handled before any stream ID advances. @@ -439,11 +433,12 @@ class FederationSenderHandler: # The entities are either user IDs (starting with '@') whose devices # have changed, or remote servers that we need to tell about # changes. - if any(row.hosts_calculated for row in rows): - hosts = await self.store.get_destinations_for_device(token) - await self.federation_sender.send_device_messages( - hosts, immediate=False - ) + hosts = { + row.entity + for row in rows + if not row.entity.startswith("@") and not row.is_signature + } + await self.federation_sender.send_device_messages(hosts, immediate=False) elif stream_name == ToDeviceStream.NAME: # The to_device stream includes stuff to be pushed to both local diff --git a/synapse/replication/tcp/streams/_base.py b/synapse/replication/tcp/streams/_base.py index d021904de7..661206c841 100644 --- a/synapse/replication/tcp/streams/_base.py +++ b/synapse/replication/tcp/streams/_base.py @@ -549,14 +549,10 @@ class DeviceListsStream(_StreamFromIdGen): @attr.s(slots=True, frozen=True, auto_attribs=True) class DeviceListsStreamRow: - user_id: str + entity: str # Indicates that a user has signed their own device with their user-signing key is_signature: bool - # Indicates if this is a notification that we've calculated the hosts we - # need to send the update to. - hosts_calculated: bool - NAME = "device_lists" ROW_TYPE = DeviceListsStreamRow @@ -598,13 +594,13 @@ class DeviceListsStream(_StreamFromIdGen): upper_limit_token = min(upper_limit_token, signatures_to_token) device_updates = [ - (stream_id, (entity, False, hosts)) - for stream_id, (entity, hosts) in device_updates + (stream_id, (entity, False)) + for stream_id, (entity,) in device_updates if stream_id <= upper_limit_token ] signatures_updates = [ - (stream_id, (entity, True, False)) + (stream_id, (entity, True)) for stream_id, (entity,) in signatures_updates if stream_id <= upper_limit_token ] diff --git a/synapse/storage/databases/main/devices.py b/synapse/storage/databases/main/devices.py index 5eeca6165d..40187496e2 100644 --- a/synapse/storage/databases/main/devices.py +++ b/synapse/storage/databases/main/devices.py @@ -164,24 +164,22 @@ class DeviceWorkerStore(RoomMemberWorkerStore, EndToEndKeyWorkerStore): prefilled_cache=user_signature_stream_prefill, ) - self._device_list_federation_stream_cache = None - if hs.should_send_federation(): - ( - device_list_federation_prefill, - device_list_federation_list_id, - ) = self.db_pool.get_cache_dict( - db_conn, - "device_lists_outbound_pokes", - entity_column="destination", - stream_column="stream_id", - max_value=device_list_max, - limit=10000, - ) - self._device_list_federation_stream_cache = StreamChangeCache( - "DeviceListFederationStreamChangeCache", - device_list_federation_list_id, - prefilled_cache=device_list_federation_prefill, - ) + ( + device_list_federation_prefill, + device_list_federation_list_id, + ) = self.db_pool.get_cache_dict( + db_conn, + "device_lists_outbound_pokes", + entity_column="destination", + stream_column="stream_id", + max_value=device_list_max, + limit=10000, + ) + self._device_list_federation_stream_cache = StreamChangeCache( + "DeviceListFederationStreamChangeCache", + device_list_federation_list_id, + prefilled_cache=device_list_federation_prefill, + ) if hs.config.worker.run_background_tasks: self._clock.looping_call( @@ -209,30 +207,23 @@ class DeviceWorkerStore(RoomMemberWorkerStore, EndToEndKeyWorkerStore): ) -> None: for row in rows: if row.is_signature: - self._user_signature_stream_cache.entity_has_changed(row.user_id, token) + self._user_signature_stream_cache.entity_has_changed(row.entity, token) continue # The entities are either user IDs (starting with '@') whose devices # have changed, or remote servers that we need to tell about # changes. - if not row.hosts_calculated: - self._device_list_stream_cache.entity_has_changed(row.user_id, token) - self.get_cached_devices_for_user.invalidate((row.user_id,)) - self._get_cached_user_device.invalidate((row.user_id,)) - self.get_device_list_last_stream_id_for_remote.invalidate( - (row.user_id,) + if row.entity.startswith("@"): + self._device_list_stream_cache.entity_has_changed(row.entity, token) + self.get_cached_devices_for_user.invalidate((row.entity,)) + self._get_cached_user_device.invalidate((row.entity,)) + self.get_device_list_last_stream_id_for_remote.invalidate((row.entity,)) + + else: + self._device_list_federation_stream_cache.entity_has_changed( + row.entity, token ) - def device_lists_outbound_pokes_have_changed( - self, destinations: StrCollection, token: int - ) -> None: - assert self._device_list_federation_stream_cache is not None - - for destination in destinations: - self._device_list_federation_stream_cache.entity_has_changed( - destination, token - ) - def device_lists_in_rooms_have_changed( self, room_ids: StrCollection, token: int ) -> None: @@ -372,11 +363,6 @@ class DeviceWorkerStore(RoomMemberWorkerStore, EndToEndKeyWorkerStore): EDU contents. """ now_stream_id = self.get_device_stream_token() - if from_stream_id == now_stream_id: - return now_stream_id, [] - - if self._device_list_federation_stream_cache is None: - raise Exception("Func can only be used on federation senders") has_changed = self._device_list_federation_stream_cache.has_entity_changed( destination, int(from_stream_id) @@ -1032,10 +1018,10 @@ class DeviceWorkerStore(RoomMemberWorkerStore, EndToEndKeyWorkerStore): # This query Does The Right Thing where it'll correctly apply the # bounds to the inner queries. sql = """ - SELECT stream_id, user_id, hosts FROM ( - SELECT stream_id, user_id, false AS hosts FROM device_lists_stream + SELECT stream_id, entity FROM ( + SELECT stream_id, user_id AS entity FROM device_lists_stream UNION ALL - SELECT DISTINCT stream_id, user_id, true AS hosts FROM device_lists_outbound_pokes + SELECT stream_id, destination AS entity FROM device_lists_outbound_pokes ) AS e WHERE ? < stream_id AND stream_id <= ? ORDER BY stream_id ASC @@ -1591,14 +1577,6 @@ class DeviceWorkerStore(RoomMemberWorkerStore, EndToEndKeyWorkerStore): get_device_list_changes_in_room_txn, ) - async def get_destinations_for_device(self, stream_id: int) -> StrCollection: - return await self.db_pool.simple_select_onecol( - table="device_lists_outbound_pokes", - keyvalues={"stream_id": stream_id}, - retcol="destination", - desc="get_destinations_for_device", - ) - class DeviceBackgroundUpdateStore(SQLBaseStore): def __init__( @@ -2134,13 +2112,12 @@ class DeviceStore(DeviceWorkerStore, DeviceBackgroundUpdateStore): stream_ids: List[int], context: Optional[Dict[str, str]], ) -> None: - if self._device_list_federation_stream_cache: - for host in hosts: - txn.call_after( - self._device_list_federation_stream_cache.entity_has_changed, - host, - stream_ids[-1], - ) + for host in hosts: + txn.call_after( + self._device_list_federation_stream_cache.entity_has_changed, + host, + stream_ids[-1], + ) now = self._clock.time_msec() stream_id_iterator = iter(stream_ids) diff --git a/synapse/storage/databases/main/end_to_end_keys.py b/synapse/storage/databases/main/end_to_end_keys.py index 9e6c9561ae..38d8785faa 100644 --- a/synapse/storage/databases/main/end_to_end_keys.py +++ b/synapse/storage/databases/main/end_to_end_keys.py @@ -123,9 +123,9 @@ class EndToEndKeyWorkerStore(EndToEndKeyBackgroundStore, CacheInvalidationWorker if stream_name == DeviceListsStream.NAME: for row in rows: assert isinstance(row, DeviceListsStream.DeviceListsStreamRow) - if not row.hosts_calculated: + if row.entity.startswith("@"): self._get_e2e_device_keys_for_federation_query_inner.invalidate( - (row.user_id,) + (row.entity,) ) super().process_replication_rows(stream_name, instance_name, token, rows) diff --git a/tests/storage/test_devices.py b/tests/storage/test_devices.py index ba01b038ab..7f975d04ff 100644 --- a/tests/storage/test_devices.py +++ b/tests/storage/test_devices.py @@ -36,14 +36,6 @@ class DeviceStoreTestCase(HomeserverTestCase): def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None: self.store = hs.get_datastores().main - def default_config(self) -> JsonDict: - config = super().default_config() - - # We 'enable' federation otherwise `get_device_updates_by_remote` will - # throw an exception. - config["federation_sender_instances"] = ["master"] - return config - def add_device_change(self, user_id: str, device_ids: List[str], host: str) -> None: """Add a device list change for the given device to `device_lists_outbound_pokes` table. From 554a92601a4bf61f9076adfffb613a2c19871446 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 25 Jun 2024 10:34:34 +0100 Subject: [PATCH 24/84] Reintroduce "Reduce device lists replication traffic."" (#17361) Reintroduces https://github.com/element-hq/synapse/pull/17333 Turns out the reason for revert was down two master instances running --- changelog.d/17333.misc | 1 + synapse/replication/tcp/client.py | 19 ++-- synapse/replication/tcp/streams/_base.py | 12 ++- synapse/storage/databases/main/devices.py | 93 ++++++++++++------- .../storage/databases/main/end_to_end_keys.py | 4 +- tests/storage/test_devices.py | 8 ++ 6 files changed, 89 insertions(+), 48 deletions(-) create mode 100644 changelog.d/17333.misc diff --git a/changelog.d/17333.misc b/changelog.d/17333.misc new file mode 100644 index 0000000000..d3ef0b3777 --- /dev/null +++ b/changelog.d/17333.misc @@ -0,0 +1 @@ +Handle device lists notifications for large accounts more efficiently in worker mode. diff --git a/synapse/replication/tcp/client.py b/synapse/replication/tcp/client.py index 2d6d49eed7..3dddbb70b4 100644 --- a/synapse/replication/tcp/client.py +++ b/synapse/replication/tcp/client.py @@ -114,13 +114,19 @@ class ReplicationDataHandler: """ all_room_ids: Set[str] = set() if stream_name == DeviceListsStream.NAME: - if any(row.entity.startswith("@") and not row.is_signature for row in rows): + if any(not row.is_signature and not row.hosts_calculated for row in rows): prev_token = self.store.get_device_stream_token() all_room_ids = await self.store.get_all_device_list_changes( prev_token, token ) self.store.device_lists_in_rooms_have_changed(all_room_ids, token) + # If we're sending federation we need to update the device lists + # outbound pokes stream change cache with updated hosts. + if self.send_handler and any(row.hosts_calculated for row in rows): + hosts = await self.store.get_destinations_for_device(token) + self.store.device_lists_outbound_pokes_have_changed(hosts, token) + self.store.process_replication_rows(stream_name, instance_name, token, rows) # NOTE: this must be called after process_replication_rows to ensure any # cache invalidations are first handled before any stream ID advances. @@ -433,12 +439,11 @@ class FederationSenderHandler: # The entities are either user IDs (starting with '@') whose devices # have changed, or remote servers that we need to tell about # changes. - hosts = { - row.entity - for row in rows - if not row.entity.startswith("@") and not row.is_signature - } - await self.federation_sender.send_device_messages(hosts, immediate=False) + if any(row.hosts_calculated for row in rows): + hosts = await self.store.get_destinations_for_device(token) + await self.federation_sender.send_device_messages( + hosts, immediate=False + ) elif stream_name == ToDeviceStream.NAME: # The to_device stream includes stuff to be pushed to both local diff --git a/synapse/replication/tcp/streams/_base.py b/synapse/replication/tcp/streams/_base.py index 661206c841..d021904de7 100644 --- a/synapse/replication/tcp/streams/_base.py +++ b/synapse/replication/tcp/streams/_base.py @@ -549,10 +549,14 @@ class DeviceListsStream(_StreamFromIdGen): @attr.s(slots=True, frozen=True, auto_attribs=True) class DeviceListsStreamRow: - entity: str + user_id: str # Indicates that a user has signed their own device with their user-signing key is_signature: bool + # Indicates if this is a notification that we've calculated the hosts we + # need to send the update to. + hosts_calculated: bool + NAME = "device_lists" ROW_TYPE = DeviceListsStreamRow @@ -594,13 +598,13 @@ class DeviceListsStream(_StreamFromIdGen): upper_limit_token = min(upper_limit_token, signatures_to_token) device_updates = [ - (stream_id, (entity, False)) - for stream_id, (entity,) in device_updates + (stream_id, (entity, False, hosts)) + for stream_id, (entity, hosts) in device_updates if stream_id <= upper_limit_token ] signatures_updates = [ - (stream_id, (entity, True)) + (stream_id, (entity, True, False)) for stream_id, (entity,) in signatures_updates if stream_id <= upper_limit_token ] diff --git a/synapse/storage/databases/main/devices.py b/synapse/storage/databases/main/devices.py index 40187496e2..5eeca6165d 100644 --- a/synapse/storage/databases/main/devices.py +++ b/synapse/storage/databases/main/devices.py @@ -164,22 +164,24 @@ class DeviceWorkerStore(RoomMemberWorkerStore, EndToEndKeyWorkerStore): prefilled_cache=user_signature_stream_prefill, ) - ( - device_list_federation_prefill, - device_list_federation_list_id, - ) = self.db_pool.get_cache_dict( - db_conn, - "device_lists_outbound_pokes", - entity_column="destination", - stream_column="stream_id", - max_value=device_list_max, - limit=10000, - ) - self._device_list_federation_stream_cache = StreamChangeCache( - "DeviceListFederationStreamChangeCache", - device_list_federation_list_id, - prefilled_cache=device_list_federation_prefill, - ) + self._device_list_federation_stream_cache = None + if hs.should_send_federation(): + ( + device_list_federation_prefill, + device_list_federation_list_id, + ) = self.db_pool.get_cache_dict( + db_conn, + "device_lists_outbound_pokes", + entity_column="destination", + stream_column="stream_id", + max_value=device_list_max, + limit=10000, + ) + self._device_list_federation_stream_cache = StreamChangeCache( + "DeviceListFederationStreamChangeCache", + device_list_federation_list_id, + prefilled_cache=device_list_federation_prefill, + ) if hs.config.worker.run_background_tasks: self._clock.looping_call( @@ -207,23 +209,30 @@ class DeviceWorkerStore(RoomMemberWorkerStore, EndToEndKeyWorkerStore): ) -> None: for row in rows: if row.is_signature: - self._user_signature_stream_cache.entity_has_changed(row.entity, token) + self._user_signature_stream_cache.entity_has_changed(row.user_id, token) continue # The entities are either user IDs (starting with '@') whose devices # have changed, or remote servers that we need to tell about # changes. - if row.entity.startswith("@"): - self._device_list_stream_cache.entity_has_changed(row.entity, token) - self.get_cached_devices_for_user.invalidate((row.entity,)) - self._get_cached_user_device.invalidate((row.entity,)) - self.get_device_list_last_stream_id_for_remote.invalidate((row.entity,)) - - else: - self._device_list_federation_stream_cache.entity_has_changed( - row.entity, token + if not row.hosts_calculated: + self._device_list_stream_cache.entity_has_changed(row.user_id, token) + self.get_cached_devices_for_user.invalidate((row.user_id,)) + self._get_cached_user_device.invalidate((row.user_id,)) + self.get_device_list_last_stream_id_for_remote.invalidate( + (row.user_id,) ) + def device_lists_outbound_pokes_have_changed( + self, destinations: StrCollection, token: int + ) -> None: + assert self._device_list_federation_stream_cache is not None + + for destination in destinations: + self._device_list_federation_stream_cache.entity_has_changed( + destination, token + ) + def device_lists_in_rooms_have_changed( self, room_ids: StrCollection, token: int ) -> None: @@ -363,6 +372,11 @@ class DeviceWorkerStore(RoomMemberWorkerStore, EndToEndKeyWorkerStore): EDU contents. """ now_stream_id = self.get_device_stream_token() + if from_stream_id == now_stream_id: + return now_stream_id, [] + + if self._device_list_federation_stream_cache is None: + raise Exception("Func can only be used on federation senders") has_changed = self._device_list_federation_stream_cache.has_entity_changed( destination, int(from_stream_id) @@ -1018,10 +1032,10 @@ class DeviceWorkerStore(RoomMemberWorkerStore, EndToEndKeyWorkerStore): # This query Does The Right Thing where it'll correctly apply the # bounds to the inner queries. sql = """ - SELECT stream_id, entity FROM ( - SELECT stream_id, user_id AS entity FROM device_lists_stream + SELECT stream_id, user_id, hosts FROM ( + SELECT stream_id, user_id, false AS hosts FROM device_lists_stream UNION ALL - SELECT stream_id, destination AS entity FROM device_lists_outbound_pokes + SELECT DISTINCT stream_id, user_id, true AS hosts FROM device_lists_outbound_pokes ) AS e WHERE ? < stream_id AND stream_id <= ? ORDER BY stream_id ASC @@ -1577,6 +1591,14 @@ class DeviceWorkerStore(RoomMemberWorkerStore, EndToEndKeyWorkerStore): get_device_list_changes_in_room_txn, ) + async def get_destinations_for_device(self, stream_id: int) -> StrCollection: + return await self.db_pool.simple_select_onecol( + table="device_lists_outbound_pokes", + keyvalues={"stream_id": stream_id}, + retcol="destination", + desc="get_destinations_for_device", + ) + class DeviceBackgroundUpdateStore(SQLBaseStore): def __init__( @@ -2112,12 +2134,13 @@ class DeviceStore(DeviceWorkerStore, DeviceBackgroundUpdateStore): stream_ids: List[int], context: Optional[Dict[str, str]], ) -> None: - for host in hosts: - txn.call_after( - self._device_list_federation_stream_cache.entity_has_changed, - host, - stream_ids[-1], - ) + if self._device_list_federation_stream_cache: + for host in hosts: + txn.call_after( + self._device_list_federation_stream_cache.entity_has_changed, + host, + stream_ids[-1], + ) now = self._clock.time_msec() stream_id_iterator = iter(stream_ids) diff --git a/synapse/storage/databases/main/end_to_end_keys.py b/synapse/storage/databases/main/end_to_end_keys.py index 38d8785faa..9e6c9561ae 100644 --- a/synapse/storage/databases/main/end_to_end_keys.py +++ b/synapse/storage/databases/main/end_to_end_keys.py @@ -123,9 +123,9 @@ class EndToEndKeyWorkerStore(EndToEndKeyBackgroundStore, CacheInvalidationWorker if stream_name == DeviceListsStream.NAME: for row in rows: assert isinstance(row, DeviceListsStream.DeviceListsStreamRow) - if row.entity.startswith("@"): + if not row.hosts_calculated: self._get_e2e_device_keys_for_federation_query_inner.invalidate( - (row.entity,) + (row.user_id,) ) super().process_replication_rows(stream_name, instance_name, token, rows) diff --git a/tests/storage/test_devices.py b/tests/storage/test_devices.py index 7f975d04ff..ba01b038ab 100644 --- a/tests/storage/test_devices.py +++ b/tests/storage/test_devices.py @@ -36,6 +36,14 @@ class DeviceStoreTestCase(HomeserverTestCase): def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None: self.store = hs.get_datastores().main + def default_config(self) -> JsonDict: + config = super().default_config() + + # We 'enable' federation otherwise `get_device_updates_by_remote` will + # throw an exception. + config["federation_sender_instances"] = ["master"] + return config + def add_device_change(self, user_id: str, device_ids: List[str], host: str) -> None: """Add a device list change for the given device to `device_lists_outbound_pokes` table. From c89fea3fd1f47b43c4d500dd7d024b2f9b24d2ad Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 25 Jun 2024 11:17:39 +0100 Subject: [PATCH 25/84] Limit amount of replication we send (#17358) Fixes up #17333, where we failed to actually send less data (the `DISTINCT` didn't work due to `stream_id` being different). We fix this by making it so that every device list outbound poke for a given user ID has the same stream ID. We can't change the query to only return e.g. max stream ID as the receivers look up the destinations to send to by doing `SELECT WHERE stream_id = ?` --- changelog.d/17358.misc | 1 + synapse/storage/databases/main/devices.py | 15 +++++++-------- 2 files changed, 8 insertions(+), 8 deletions(-) create mode 100644 changelog.d/17358.misc diff --git a/changelog.d/17358.misc b/changelog.d/17358.misc new file mode 100644 index 0000000000..d3ef0b3777 --- /dev/null +++ b/changelog.d/17358.misc @@ -0,0 +1 @@ +Handle device lists notifications for large accounts more efficiently in worker mode. diff --git a/synapse/storage/databases/main/devices.py b/synapse/storage/databases/main/devices.py index 5eeca6165d..59a035dd62 100644 --- a/synapse/storage/databases/main/devices.py +++ b/synapse/storage/databases/main/devices.py @@ -2131,7 +2131,7 @@ class DeviceStore(DeviceWorkerStore, DeviceBackgroundUpdateStore): user_id: str, device_id: str, hosts: Collection[str], - stream_ids: List[int], + stream_id: int, context: Optional[Dict[str, str]], ) -> None: if self._device_list_federation_stream_cache: @@ -2139,11 +2139,10 @@ class DeviceStore(DeviceWorkerStore, DeviceBackgroundUpdateStore): txn.call_after( self._device_list_federation_stream_cache.entity_has_changed, host, - stream_ids[-1], + stream_id, ) now = self._clock.time_msec() - stream_id_iterator = iter(stream_ids) encoded_context = json_encoder.encode(context) mark_sent = not self.hs.is_mine_id(user_id) @@ -2152,7 +2151,7 @@ class DeviceStore(DeviceWorkerStore, DeviceBackgroundUpdateStore): ( destination, self._instance_name, - next(stream_id_iterator), + stream_id, user_id, device_id, mark_sent, @@ -2337,22 +2336,22 @@ class DeviceStore(DeviceWorkerStore, DeviceBackgroundUpdateStore): return def add_device_list_outbound_pokes_txn( - txn: LoggingTransaction, stream_ids: List[int] + txn: LoggingTransaction, stream_id: int ) -> None: self._add_device_outbound_poke_to_stream_txn( txn, user_id=user_id, device_id=device_id, hosts=hosts, - stream_ids=stream_ids, + stream_id=stream_id, context=context, ) - async with self._device_list_id_gen.get_next_mult(len(hosts)) as stream_ids: + async with self._device_list_id_gen.get_next() as stream_id: return await self.db_pool.runInteraction( "add_device_list_outbound_pokes", add_device_list_outbound_pokes_txn, - stream_ids, + stream_id, ) async def add_remote_device_list_to_pending( From f79dbd0f61194929585d7010a3ec1b9ee208f033 Mon Sep 17 00:00:00 2001 From: douglaz Date: Tue, 25 Jun 2024 11:07:13 +0000 Subject: [PATCH 26/84] Fix refreshable_access_token_lifetime typo (#17357) Simple typo in the docs --- docs/usage/configuration/config_documentation.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/usage/configuration/config_documentation.md b/docs/usage/configuration/config_documentation.md index ba9f21cdee..80a7bf9d21 100644 --- a/docs/usage/configuration/config_documentation.md +++ b/docs/usage/configuration/config_documentation.md @@ -2719,7 +2719,7 @@ Example configuration: session_lifetime: 24h ``` --- -### `refresh_access_token_lifetime` +### `refreshable_access_token_lifetime` Time that an access token remains valid for, if the session is using refresh tokens. From a023538822c8e241cdd3180c9cbbcb0f4eb84844 Mon Sep 17 00:00:00 2001 From: Shay Date: Tue, 25 Jun 2024 07:35:37 -0700 Subject: [PATCH 27/84] Re-introduce federation /download endpoint (#17350) --- changelog.d/17350.feature | 2 + .../federation/transport/server/__init__.py | 8 + synapse/federation/transport/server/_base.py | 24 +- .../federation/transport/server/federation.py | 41 +++ synapse/media/_base.py | 78 +++++- synapse/media/media_repository.py | 14 +- synapse/media/media_storage.py | 259 +++++++++++++++++- tests/federation/test_federation_media.py | 173 ++++++++++++ 8 files changed, 588 insertions(+), 11 deletions(-) create mode 100644 changelog.d/17350.feature create mode 100644 tests/federation/test_federation_media.py diff --git a/changelog.d/17350.feature b/changelog.d/17350.feature new file mode 100644 index 0000000000..709366f5b8 --- /dev/null +++ b/changelog.d/17350.feature @@ -0,0 +1,2 @@ +Support [MSC3916](https://github.com/matrix-org/matrix-spec-proposals/blob/rav/authentication-for-media/proposals/3916-authentication-for-media.md) +by adding a federation /download endpoint. \ No newline at end of file diff --git a/synapse/federation/transport/server/__init__.py b/synapse/federation/transport/server/__init__.py index bac569e977..edaf0196d6 100644 --- a/synapse/federation/transport/server/__init__.py +++ b/synapse/federation/transport/server/__init__.py @@ -33,6 +33,7 @@ from synapse.federation.transport.server.federation import ( FEDERATION_SERVLET_CLASSES, FederationAccountStatusServlet, FederationUnstableClientKeysClaimServlet, + FederationUnstableMediaDownloadServlet, ) from synapse.http.server import HttpServer, JsonResource from synapse.http.servlet import ( @@ -315,6 +316,13 @@ def register_servlets( ): continue + if servletclass == FederationUnstableMediaDownloadServlet: + if ( + not hs.config.server.enable_media_repo + or not hs.config.experimental.msc3916_authenticated_media_enabled + ): + continue + servletclass( hs=hs, authenticator=authenticator, diff --git a/synapse/federation/transport/server/_base.py b/synapse/federation/transport/server/_base.py index db0f5076a9..4e2717b565 100644 --- a/synapse/federation/transport/server/_base.py +++ b/synapse/federation/transport/server/_base.py @@ -360,13 +360,29 @@ class BaseFederationServlet: "request" ) return None + if ( + func.__self__.__class__.__name__ # type: ignore + == "FederationUnstableMediaDownloadServlet" + ): + response = await func( + origin, content, request, *args, **kwargs + ) + else: + response = await func( + origin, content, request.args, *args, **kwargs + ) + else: + if ( + func.__self__.__class__.__name__ # type: ignore + == "FederationUnstableMediaDownloadServlet" + ): + response = await func( + origin, content, request, *args, **kwargs + ) + else: response = await func( origin, content, request.args, *args, **kwargs ) - else: - response = await func( - origin, content, request.args, *args, **kwargs - ) finally: # if we used the origin's context as the parent, add a new span using # the servlet span as a parent, so that we have a link diff --git a/synapse/federation/transport/server/federation.py b/synapse/federation/transport/server/federation.py index a59734785f..67bb907050 100644 --- a/synapse/federation/transport/server/federation.py +++ b/synapse/federation/transport/server/federation.py @@ -44,10 +44,13 @@ from synapse.federation.transport.server._base import ( ) from synapse.http.servlet import ( parse_boolean_from_args, + parse_integer, parse_integer_from_args, parse_string_from_args, parse_strings_from_args, ) +from synapse.http.site import SynapseRequest +from synapse.media._base import DEFAULT_MAX_TIMEOUT_MS, MAXIMUM_ALLOWED_MAX_TIMEOUT_MS from synapse.types import JsonDict from synapse.util import SYNAPSE_VERSION from synapse.util.ratelimitutils import FederationRateLimiter @@ -787,6 +790,43 @@ class FederationAccountStatusServlet(BaseFederationServerServlet): return 200, {"account_statuses": statuses, "failures": failures} +class FederationUnstableMediaDownloadServlet(BaseFederationServerServlet): + """ + Implementation of new federation media `/download` endpoint outlined in MSC3916. Returns + a multipart/mixed response consisting of a JSON object and the requested media + item. This endpoint only returns local media. + """ + + PATH = "/media/download/(?P[^/]*)" + PREFIX = FEDERATION_UNSTABLE_PREFIX + "/org.matrix.msc3916" + RATELIMIT = True + + def __init__( + self, + hs: "HomeServer", + ratelimiter: FederationRateLimiter, + authenticator: Authenticator, + server_name: str, + ): + super().__init__(hs, authenticator, ratelimiter, server_name) + self.media_repo = self.hs.get_media_repository() + + async def on_GET( + self, + origin: Optional[str], + content: Literal[None], + request: SynapseRequest, + media_id: str, + ) -> None: + max_timeout_ms = parse_integer( + request, "timeout_ms", default=DEFAULT_MAX_TIMEOUT_MS + ) + max_timeout_ms = min(max_timeout_ms, MAXIMUM_ALLOWED_MAX_TIMEOUT_MS) + await self.media_repo.get_local_media( + request, media_id, None, max_timeout_ms, federation=True + ) + + FEDERATION_SERVLET_CLASSES: Tuple[Type[BaseFederationServlet], ...] = ( FederationSendServlet, FederationEventServlet, @@ -818,4 +858,5 @@ FEDERATION_SERVLET_CLASSES: Tuple[Type[BaseFederationServlet], ...] = ( FederationV1SendKnockServlet, FederationMakeKnockServlet, FederationAccountStatusServlet, + FederationUnstableMediaDownloadServlet, ) diff --git a/synapse/media/_base.py b/synapse/media/_base.py index 3fbed6062f..7ad0b7c3cf 100644 --- a/synapse/media/_base.py +++ b/synapse/media/_base.py @@ -25,7 +25,16 @@ import os import urllib from abc import ABC, abstractmethod from types import TracebackType -from typing import Awaitable, Dict, Generator, List, Optional, Tuple, Type +from typing import ( + TYPE_CHECKING, + Awaitable, + Dict, + Generator, + List, + Optional, + Tuple, + Type, +) import attr @@ -37,8 +46,13 @@ from synapse.api.errors import Codes, cs_error from synapse.http.server import finish_request, respond_with_json from synapse.http.site import SynapseRequest from synapse.logging.context import make_deferred_yieldable +from synapse.util import Clock from synapse.util.stringutils import is_ascii +if TYPE_CHECKING: + from synapse.storage.databases.main.media_repository import LocalMedia + + logger = logging.getLogger(__name__) # list all text content types that will have the charset default to UTF-8 when @@ -260,6 +274,68 @@ def _can_encode_filename_as_token(x: str) -> bool: return True +async def respond_with_multipart_responder( + clock: Clock, + request: SynapseRequest, + responder: "Optional[Responder]", + media_info: "LocalMedia", +) -> None: + """ + Responds to requests originating from the federation media `/download` endpoint by + streaming a multipart/mixed response + + Args: + clock: + request: the federation request to respond to + responder: the responder which will send the response + media_info: metadata about the media item + """ + if not responder: + respond_404(request) + return + + # If we have a responder we *must* use it as a context manager. + with responder: + if request._disconnected: + logger.warning( + "Not sending response to request %s, already disconnected.", request + ) + return + + from synapse.media.media_storage import MultipartFileConsumer + + # note that currently the json_object is just {}, this will change when linked media + # is implemented + multipart_consumer = MultipartFileConsumer( + clock, request, media_info.media_type, {}, media_info.media_length + ) + + logger.debug("Responding to media request with responder %s", responder) + if media_info.media_length is not None: + content_length = multipart_consumer.content_length() + assert content_length is not None + request.setHeader(b"Content-Length", b"%d" % (content_length,)) + + request.setHeader( + b"Content-Type", + b"multipart/mixed; boundary=%s" % multipart_consumer.boundary, + ) + + try: + await responder.write_to_consumer(multipart_consumer) + except Exception as e: + # The majority of the time this will be due to the client having gone + # away. Unfortunately, Twisted simply throws a generic exception at us + # in that case. + logger.warning("Failed to write to consumer: %s %s", type(e), e) + + # Unregister the producer, if it has one, so Twisted doesn't complain + if request.producer: + request.unregisterProducer() + + finish_request(request) + + async def respond_with_responder( request: SynapseRequest, responder: "Optional[Responder]", diff --git a/synapse/media/media_repository.py b/synapse/media/media_repository.py index 6ed56099ca..1436329fad 100644 --- a/synapse/media/media_repository.py +++ b/synapse/media/media_repository.py @@ -54,6 +54,7 @@ from synapse.media._base import ( ThumbnailInfo, get_filename_from_headers, respond_404, + respond_with_multipart_responder, respond_with_responder, ) from synapse.media.filepath import MediaFilePaths @@ -429,6 +430,7 @@ class MediaRepository: media_id: str, name: Optional[str], max_timeout_ms: int, + federation: bool = False, ) -> None: """Responds to requests for local media, if exists, or returns 404. @@ -440,6 +442,7 @@ class MediaRepository: the filename in the Content-Disposition header of the response. max_timeout_ms: the maximum number of milliseconds to wait for the media to be uploaded. + federation: whether the local media being fetched is for a federation request Returns: Resolves once a response has successfully been written to request @@ -460,9 +463,14 @@ class MediaRepository: file_info = FileInfo(None, media_id, url_cache=bool(url_cache)) responder = await self.media_storage.fetch_media(file_info) - await respond_with_responder( - request, responder, media_type, media_length, upload_name - ) + if federation: + await respond_with_multipart_responder( + self.clock, request, responder, media_info + ) + else: + await respond_with_responder( + request, responder, media_type, media_length, upload_name + ) async def get_remote_media( self, diff --git a/synapse/media/media_storage.py b/synapse/media/media_storage.py index b3cd3fd8f4..1be2c9b5f5 100644 --- a/synapse/media/media_storage.py +++ b/synapse/media/media_storage.py @@ -19,9 +19,12 @@ # # import contextlib +import json import logging import os import shutil +from contextlib import closing +from io import BytesIO from types import TracebackType from typing import ( IO, @@ -30,24 +33,35 @@ from typing import ( AsyncIterator, BinaryIO, Callable, + List, Optional, Sequence, Tuple, Type, + Union, + cast, ) +from uuid import uuid4 import attr +from zope.interface import implementer +from twisted.internet import interfaces from twisted.internet.defer import Deferred from twisted.internet.interfaces import IConsumer from twisted.protocols.basic import FileSender from synapse.api.errors import NotFoundError -from synapse.logging.context import defer_to_thread, make_deferred_yieldable +from synapse.logging.context import ( + defer_to_thread, + make_deferred_yieldable, + run_in_background, +) from synapse.logging.opentracing import start_active_span, trace, trace_with_opname from synapse.util import Clock from synapse.util.file_consumer import BackgroundFileConsumer +from ..types import JsonDict from ._base import FileInfo, Responder from .filepath import MediaFilePaths @@ -57,6 +71,8 @@ if TYPE_CHECKING: logger = logging.getLogger(__name__) +CRLF = b"\r\n" + class MediaStorage: """Responsible for storing/fetching files from local sources. @@ -174,7 +190,7 @@ class MediaStorage: and configured storage providers. Args: - file_info + file_info: Metadata about the media file Returns: Returns a Responder if the file was found, otherwise None. @@ -316,7 +332,7 @@ class FileResponder(Responder): """Wraps an open file that can be sent to a request. Args: - open_file: A file like object to be streamed ot the client, + open_file: A file like object to be streamed to the client, is closed when finished streaming. """ @@ -370,3 +386,240 @@ class ReadableFileWrapper: # We yield to the reactor by sleeping for 0 seconds. await self.clock.sleep(0) + + +@implementer(interfaces.IConsumer) +@implementer(interfaces.IPushProducer) +class MultipartFileConsumer: + """Wraps a given consumer so that any data that gets written to it gets + converted to a multipart format. + """ + + def __init__( + self, + clock: Clock, + wrapped_consumer: interfaces.IConsumer, + file_content_type: str, + json_object: JsonDict, + content_length: Optional[int] = None, + ) -> None: + self.clock = clock + self.wrapped_consumer = wrapped_consumer + self.json_field = json_object + self.json_field_written = False + self.content_type_written = False + self.file_content_type = file_content_type + self.boundary = uuid4().hex.encode("ascii") + + # The producer that registered with us, and if it's a push or pull + # producer. + self.producer: Optional["interfaces.IProducer"] = None + self.streaming: Optional[bool] = None + + # Whether the wrapped consumer has asked us to pause. + self.paused = False + + self.length = content_length + + ### IConsumer APIs ### + + def registerProducer( + self, producer: "interfaces.IProducer", streaming: bool + ) -> None: + """ + Register to receive data from a producer. + + This sets self to be a consumer for a producer. When this object runs + out of data (as when a send(2) call on a socket succeeds in moving the + last data from a userspace buffer into a kernelspace buffer), it will + ask the producer to resumeProducing(). + + For L{IPullProducer} providers, C{resumeProducing} will be called once + each time data is required. + + For L{IPushProducer} providers, C{pauseProducing} will be called + whenever the write buffer fills up and C{resumeProducing} will only be + called when it empties. The consumer will only call C{resumeProducing} + to balance a previous C{pauseProducing} call; the producer is assumed + to start in an un-paused state. + + @param streaming: C{True} if C{producer} provides L{IPushProducer}, + C{False} if C{producer} provides L{IPullProducer}. + + @raise RuntimeError: If a producer is already registered. + """ + self.producer = producer + self.streaming = streaming + + self.wrapped_consumer.registerProducer(self, True) + + # kick off producing if `self.producer` is not a streaming producer + if not streaming: + self.resumeProducing() + + def unregisterProducer(self) -> None: + """ + Stop consuming data from a producer, without disconnecting. + """ + self.wrapped_consumer.write(CRLF + b"--" + self.boundary + b"--" + CRLF) + self.wrapped_consumer.unregisterProducer() + self.paused = True + + def write(self, data: bytes) -> None: + """ + The producer will write data by calling this method. + + The implementation must be non-blocking and perform whatever + buffering is necessary. If the producer has provided enough data + for now and it is a L{IPushProducer}, the consumer may call its + C{pauseProducing} method. + """ + if not self.json_field_written: + self.wrapped_consumer.write(CRLF + b"--" + self.boundary + CRLF) + + content_type = Header(b"Content-Type", b"application/json") + self.wrapped_consumer.write(bytes(content_type) + CRLF) + + json_field = json.dumps(self.json_field) + json_bytes = json_field.encode("utf-8") + self.wrapped_consumer.write(CRLF + json_bytes) + self.wrapped_consumer.write(CRLF + b"--" + self.boundary + CRLF) + + self.json_field_written = True + + # if we haven't written the content type yet, do so + if not self.content_type_written: + type = self.file_content_type.encode("utf-8") + content_type = Header(b"Content-Type", type) + self.wrapped_consumer.write(bytes(content_type) + CRLF + CRLF) + self.content_type_written = True + + self.wrapped_consumer.write(data) + + ### IPushProducer APIs ### + + def stopProducing(self) -> None: + """ + Stop producing data. + + This tells a producer that its consumer has died, so it must stop + producing data for good. + """ + assert self.producer is not None + + self.paused = True + self.producer.stopProducing() + + def pauseProducing(self) -> None: + """ + Pause producing data. + + Tells a producer that it has produced too much data to process for + the time being, and to stop until C{resumeProducing()} is called. + """ + assert self.producer is not None + + self.paused = True + + if self.streaming: + cast("interfaces.IPushProducer", self.producer).pauseProducing() + else: + self.paused = True + + def resumeProducing(self) -> None: + """ + Resume producing data. + + This tells a producer to re-add itself to the main loop and produce + more data for its consumer. + """ + assert self.producer is not None + + if self.streaming: + cast("interfaces.IPushProducer", self.producer).resumeProducing() + else: + # If the producer is not a streaming producer we need to start + # repeatedly calling `resumeProducing` in a loop. + run_in_background(self._resumeProducingRepeatedly) + + def content_length(self) -> Optional[int]: + """ + Calculate the content length of the multipart response + in bytes. + """ + if not self.length: + return None + # calculate length of json field and content-type header + json_field = json.dumps(self.json_field) + json_bytes = json_field.encode("utf-8") + json_length = len(json_bytes) + + type = self.file_content_type.encode("utf-8") + content_type = Header(b"Content-Type", type) + type_length = len(bytes(content_type)) + + # 154 is the length of the elements that aren't variable, ie + # CRLFs and boundary strings, etc + self.length += json_length + type_length + 154 + + return self.length + + ### Internal APIs. ### + + async def _resumeProducingRepeatedly(self) -> None: + assert self.producer is not None + assert not self.streaming + + producer = cast("interfaces.IPullProducer", self.producer) + + self.paused = False + while not self.paused: + producer.resumeProducing() + await self.clock.sleep(0) + + +class Header: + """ + `Header` This class is a tiny wrapper that produces + request headers. We can't use standard python header + class because it encodes unicode fields using =? bla bla ?= + encoding, which is correct, but no one in HTTP world expects + that, everyone wants utf-8 raw bytes. (stolen from treq.multipart) + + """ + + def __init__( + self, + name: bytes, + value: Any, + params: Optional[List[Tuple[Any, Any]]] = None, + ): + self.name = name + self.value = value + self.params = params or [] + + def add_param(self, name: Any, value: Any) -> None: + self.params.append((name, value)) + + def __bytes__(self) -> bytes: + with closing(BytesIO()) as h: + h.write(self.name + b": " + escape(self.value).encode("us-ascii")) + if self.params: + for name, val in self.params: + h.write(b"; ") + h.write(escape(name).encode("us-ascii")) + h.write(b"=") + h.write(b'"' + escape(val).encode("utf-8") + b'"') + h.seek(0) + return h.read() + + +def escape(value: Union[str, bytes]) -> str: + """ + This function prevents header values from corrupting the request, + a newline in the file name parameter makes form-data request unreadable + for a majority of parsers. (stolen from treq.multipart) + """ + if isinstance(value, bytes): + value = value.decode("utf-8") + return value.replace("\r", "").replace("\n", "").replace('"', '\\"') diff --git a/tests/federation/test_federation_media.py b/tests/federation/test_federation_media.py new file mode 100644 index 0000000000..2c396adbe3 --- /dev/null +++ b/tests/federation/test_federation_media.py @@ -0,0 +1,173 @@ +# +# This file is licensed under the Affero General Public License (AGPL) version 3. +# +# Copyright (C) 2024 New Vector, Ltd +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# See the GNU Affero General Public License for more details: +# . +# +# Originally licensed under the Apache License, Version 2.0: +# . +# +# [This file includes modifications made by New Vector Limited] +# +# +import io +import os +import shutil +import tempfile + +from twisted.test.proto_helpers import MemoryReactor + +from synapse.media.filepath import MediaFilePaths +from synapse.media.media_storage import MediaStorage +from synapse.media.storage_provider import ( + FileStorageProviderBackend, + StorageProviderWrapper, +) +from synapse.server import HomeServer +from synapse.types import UserID +from synapse.util import Clock + +from tests import unittest +from tests.test_utils import SMALL_PNG +from tests.unittest import override_config + + +class FederationUnstableMediaDownloadsTest(unittest.FederatingHomeserverTestCase): + + def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None: + super().prepare(reactor, clock, hs) + self.test_dir = tempfile.mkdtemp(prefix="synapse-tests-") + self.addCleanup(shutil.rmtree, self.test_dir) + self.primary_base_path = os.path.join(self.test_dir, "primary") + self.secondary_base_path = os.path.join(self.test_dir, "secondary") + + hs.config.media.media_store_path = self.primary_base_path + + storage_providers = [ + StorageProviderWrapper( + FileStorageProviderBackend(hs, self.secondary_base_path), + store_local=True, + store_remote=False, + store_synchronous=True, + ) + ] + + self.filepaths = MediaFilePaths(self.primary_base_path) + self.media_storage = MediaStorage( + hs, self.primary_base_path, self.filepaths, storage_providers + ) + self.media_repo = hs.get_media_repository() + + @override_config( + {"experimental_features": {"msc3916_authenticated_media_enabled": True}} + ) + def test_file_download(self) -> None: + content = io.BytesIO(b"file_to_stream") + content_uri = self.get_success( + self.media_repo.create_content( + "text/plain", + "test_upload", + content, + 46, + UserID.from_string("@user_id:whatever.org"), + ) + ) + # test with a text file + channel = self.make_signed_federation_request( + "GET", + f"/_matrix/federation/unstable/org.matrix.msc3916/media/download/{content_uri.media_id}", + ) + self.pump() + self.assertEqual(200, channel.code) + + content_type = channel.headers.getRawHeaders("content-type") + assert content_type is not None + assert "multipart/mixed" in content_type[0] + assert "boundary" in content_type[0] + + # extract boundary + boundary = content_type[0].split("boundary=")[1] + # split on boundary and check that json field and expected value exist + stripped = channel.text_body.split("\r\n" + "--" + boundary) + # TODO: the json object expected will change once MSC3911 is implemented, currently + # {} is returned for all requests as a placeholder (per MSC3196) + found_json = any( + "\r\nContent-Type: application/json\r\n\r\n{}" in field + for field in stripped + ) + self.assertTrue(found_json) + + # check that the text file and expected value exist + found_file = any( + "\r\nContent-Type: text/plain\r\n\r\nfile_to_stream" in field + for field in stripped + ) + self.assertTrue(found_file) + + content = io.BytesIO(SMALL_PNG) + content_uri = self.get_success( + self.media_repo.create_content( + "image/png", + "test_png_upload", + content, + 67, + UserID.from_string("@user_id:whatever.org"), + ) + ) + # test with an image file + channel = self.make_signed_federation_request( + "GET", + f"/_matrix/federation/unstable/org.matrix.msc3916/media/download/{content_uri.media_id}", + ) + self.pump() + self.assertEqual(200, channel.code) + + content_type = channel.headers.getRawHeaders("content-type") + assert content_type is not None + assert "multipart/mixed" in content_type[0] + assert "boundary" in content_type[0] + + # extract boundary + boundary = content_type[0].split("boundary=")[1] + # split on boundary and check that json field and expected value exist + body = channel.result.get("body") + assert body is not None + stripped_bytes = body.split(b"\r\n" + b"--" + boundary.encode("utf-8")) + found_json = any( + b"\r\nContent-Type: application/json\r\n\r\n{}" in field + for field in stripped_bytes + ) + self.assertTrue(found_json) + + # check that the png file exists and matches what was uploaded + found_file = any(SMALL_PNG in field for field in stripped_bytes) + self.assertTrue(found_file) + + @override_config( + {"experimental_features": {"msc3916_authenticated_media_enabled": False}} + ) + def test_disable_config(self) -> None: + content = io.BytesIO(b"file_to_stream") + content_uri = self.get_success( + self.media_repo.create_content( + "text/plain", + "test_upload", + content, + 46, + UserID.from_string("@user_id:whatever.org"), + ) + ) + channel = self.make_signed_federation_request( + "GET", + f"/_matrix/federation/unstable/org.matrix.msc3916/media/download/{content_uri.media_id}", + ) + self.pump() + self.assertEqual(404, channel.code) + self.assertEqual(channel.json_body.get("errcode"), "M_UNRECOGNIZED") From 9cf0ef9c70c0f2b93f4056d6273f130e7a75a201 Mon Sep 17 00:00:00 2001 From: Denis Kasak Date: Tue, 25 Jun 2024 16:58:30 +0200 Subject: [PATCH 28/84] Fix outdated Security Disclosure Policy references (#17341) --- .github/ISSUE_TEMPLATE.md | 2 +- .github/ISSUE_TEMPLATE/BUG_REPORT.yml | 2 +- changelog.d/17341.doc | 1 + docs/welcome_and_overview.md | 6 +++--- 4 files changed, 6 insertions(+), 5 deletions(-) create mode 100644 changelog.d/17341.doc diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 1632170c9d..d3114882d7 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -2,4 +2,4 @@ (using a matrix.org account if necessary). We do not use GitHub issues for support. -**If you want to report a security issue** please see https://matrix.org/security-disclosure-policy/ +**If you want to report a security issue** please see https://element.io/security/security-disclosure-policy diff --git a/.github/ISSUE_TEMPLATE/BUG_REPORT.yml b/.github/ISSUE_TEMPLATE/BUG_REPORT.yml index 77a04109dd..ebd36a9398 100644 --- a/.github/ISSUE_TEMPLATE/BUG_REPORT.yml +++ b/.github/ISSUE_TEMPLATE/BUG_REPORT.yml @@ -7,7 +7,7 @@ body: **THIS IS NOT A SUPPORT CHANNEL!** **IF YOU HAVE SUPPORT QUESTIONS ABOUT RUNNING OR CONFIGURING YOUR OWN HOME SERVER**, please ask in **[#synapse:matrix.org](https://matrix.to/#/#synapse:matrix.org)** (using a matrix.org account if necessary). - If you want to report a security issue, please see https://matrix.org/security-disclosure-policy/ + If you want to report a security issue, please see https://element.io/security/security-disclosure-policy This is a bug report form. By following the instructions below and completing the sections with your information, you will help the us to get all the necessary data to fix your issue. diff --git a/changelog.d/17341.doc b/changelog.d/17341.doc new file mode 100644 index 0000000000..353c8adbe8 --- /dev/null +++ b/changelog.d/17341.doc @@ -0,0 +1 @@ +Fix stale references to the Foundation's Security Disclosure Policy. diff --git a/docs/welcome_and_overview.md b/docs/welcome_and_overview.md index ae5d0f5d90..b88fed7e44 100644 --- a/docs/welcome_and_overview.md +++ b/docs/welcome_and_overview.md @@ -62,6 +62,6 @@ following documentation: ## Reporting a security vulnerability -If you've found a security issue in Synapse or any other Matrix.org Foundation -project, please report it to us in accordance with our [Security Disclosure -Policy](https://www.matrix.org/security-disclosure-policy/). Thank you! +If you've found a security issue in Synapse or any other Element project, +please report it to us in accordance with our [Security Disclosure +Policy](https://element.io/security/security-disclosure-policy). Thank you! From ef7fbdfebd009d70dbf3e2dddfea8d6edb8ea94c Mon Sep 17 00:00:00 2001 From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> Date: Tue, 25 Jun 2024 16:20:59 +0100 Subject: [PATCH 29/84] Fixes to the table of contents in the README (#17329) --- README.rst | 8 ++++---- changelog.d/17329.doc | 1 + 2 files changed, 5 insertions(+), 4 deletions(-) create mode 100644 changelog.d/17329.doc diff --git a/README.rst b/README.rst index db9b79a237..145315a7fe 100644 --- a/README.rst +++ b/README.rst @@ -1,9 +1,9 @@ .. image:: https://github.com/element-hq/product/assets/87339233/7abf477a-5277-47f3-be44-ea44917d8ed7 :height: 60px -=========================================================================================================== -Element Synapse - Matrix homeserver implementation |support| |development| |documentation| |license| |pypi| |python| -=========================================================================================================== +**Element Synapse - Matrix homeserver implementation** + +|support| |development| |documentation| |license| |pypi| |python| Synapse is an open source `Matrix `_ homeserver implementation, written and maintained by `Element `_. @@ -14,7 +14,7 @@ license. There is no support provided from Element unless you have a subscription. Subscription alternative ------------------------- +======================== Alternatively, for those that need an enterprise-ready solution, Element Server Suite (ESS) is `available as a subscription `_. diff --git a/changelog.d/17329.doc b/changelog.d/17329.doc new file mode 100644 index 0000000000..2486256dad --- /dev/null +++ b/changelog.d/17329.doc @@ -0,0 +1 @@ +Update header in the README to visually fix the the auto-generated table of contents. \ No newline at end of file From 7aea406c22066f061cf537ed25d0dbb00a107308 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 25 Jun 2024 11:18:27 -0500 Subject: [PATCH 30/84] Just stripped_state for invite rooms --- synapse/handlers/sliding_sync.py | 27 ++-- synapse/rest/client/sync.py | 57 ++++++--- synapse/types/handlers/__init__.py | 15 ++- tests/rest/client/test_sync.py | 192 ++++++++++++++++++++++------- 4 files changed, 210 insertions(+), 81 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index d5390e8945..991d32356e 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -755,14 +755,23 @@ class SlidingSyncHandler: """ # Assemble the list of timeline events - timeline_events: List[EventBase] = [] - limited = False - # We want to start off using the `to_token` (vs `from_token`) because we look - # backwards from the `to_token` up to the `timeline_limit` and we might not - # reach the `from_token` before we hit the limit. We will update the room stream - # position once we've fetched the events to point to the earliest event fetched. - prev_batch_token = to_token - if room_sync_config.timeline_limit > 0: + timeline_events: Optional[List[EventBase]] = None + limited: Optional[bool] = None + prev_batch_token: Optional[StreamToken] = None + num_live: Optional[int] = None + if ( + room_sync_config.timeline_limit > 0 + # No timeline for invite/knock rooms (just `stripped_state`) + and rooms_for_user_membership_at_to_token.membership + not in (Membership.INVITE, Membership.KNOCK) + ): + limited = False + # We want to start off using the `to_token` (vs `from_token`) because we look + # backwards from the `to_token` up to the `timeline_limit` and we might not + # reach the `from_token` before we hit the limit. We will update the room stream + # position once we've fetched the events to point to the earliest event fetched. + prev_batch_token = to_token + newly_joined = False if ( # We can only determine new-ness if we have a `from_token` to define our range @@ -903,7 +912,7 @@ class SlidingSyncHandler: # If the timeline is `limited=True`, the client does not have all events # necessary to calculate aggregations themselves. bundled_aggregations = None - if limited: + if limited and timeline_events is not None: bundled_aggregations = ( await self.relations_handler.get_bundled_aggregations( timeline_events, user.to_string() diff --git a/synapse/rest/client/sync.py b/synapse/rest/client/sync.py index b60af6356a..1d955a2e89 100644 --- a/synapse/rest/client/sync.py +++ b/synapse/rest/client/sync.py @@ -973,31 +973,13 @@ class SlidingSyncRestServlet(RestServlet): requester=requester, ) - serialized_rooms = {} + serialized_rooms: Dict[str, JsonDict] = {} for room_id, room_result in rooms.items(): - serialized_timeline = await self.event_serializer.serialize_events( - room_result.timeline_events, - time_now, - config=serialize_options, - bundle_aggregations=room_result.bundled_aggregations, - ) - - serialized_required_state = await self.event_serializer.serialize_events( - room_result.required_state, - time_now, - config=serialize_options, - ) - serialized_rooms[room_id] = { - "required_state": serialized_required_state, - "timeline": serialized_timeline, - "prev_batch": await room_result.prev_batch.to_string(self.store), - "limited": room_result.limited, "joined_count": room_result.joined_count, "invited_count": room_result.invited_count, "notification_count": room_result.notification_count, "highlight_count": room_result.highlight_count, - "num_live": room_result.num_live, } if room_result.name: @@ -1014,12 +996,47 @@ class SlidingSyncRestServlet(RestServlet): if room_result.initial: serialized_rooms[room_id]["initial"] = room_result.initial + # This will omitted for invite/knock rooms with `stripped_state` + if room_result.required_state is not None: + serialized_required_state = ( + await self.event_serializer.serialize_events( + room_result.required_state, + time_now, + config=serialize_options, + ) + ) + serialized_rooms[room_id]["required_state"] = serialized_required_state + + # This will omitted for invite/knock rooms with `stripped_state` + if room_result.timeline_events is not None: + serialized_timeline = await self.event_serializer.serialize_events( + room_result.timeline_events, + time_now, + config=serialize_options, + bundle_aggregations=room_result.bundled_aggregations, + ) + serialized_rooms[room_id]["timeline"] = serialized_timeline + + # This will omitted for invite/knock rooms with `stripped_state` + if room_result.limited is not None: + serialized_rooms[room_id]["limited"] = room_result.limited + + # This will omitted for invite/knock rooms with `stripped_state` + if room_result.prev_batch is not None: + serialized_rooms[room_id]["prev_batch"] = ( + await room_result.prev_batch.to_string(self.store) + ) + + # This will omitted for invite/knock rooms with `stripped_state` + if room_result.num_live is not None: + serialized_rooms[room_id]["num_live"] = room_result.num_live + # Field should be absent on non-DM rooms if room_result.is_dm: serialized_rooms[room_id]["is_dm"] = room_result.is_dm # Stripped state only applies to invite/knock rooms - if room_result.stripped_state: + if room_result.stripped_state is not None: # TODO: `knocked_state` but that isn't specced yet. # # TODO: Instead of adding `knocked_state`, it would be good to rename diff --git a/synapse/types/handlers/__init__.py b/synapse/types/handlers/__init__.py index 8e097d8b48..d50d02bfc6 100644 --- a/synapse/types/handlers/__init__.py +++ b/synapse/types/handlers/__init__.py @@ -197,18 +197,23 @@ class SlidingSyncResult: avatar: Optional[str] heroes: Optional[List[EventBase]] initial: bool - required_state: List[EventBase] - timeline_events: List[EventBase] + # Only optional because it won't be included for invite/knock rooms with `stripped_state` + required_state: Optional[List[EventBase]] + # Only optional because it won't be included for invite/knock rooms with `stripped_state` + timeline_events: Optional[List[EventBase]] bundled_aggregations: Optional[Dict[str, "BundledAggregations"]] is_dm: bool stripped_state: Optional[List[JsonDict]] - prev_batch: StreamToken - limited: bool + # Only optional because it won't be included for invite/knock rooms with `stripped_state` + prev_batch: Optional[StreamToken] + # Only optional because it won't be included for invite/knock rooms with `stripped_state` + limited: Optional[bool] joined_count: int invited_count: int notification_count: int highlight_count: int - num_live: int + # Only optional because it won't be included for invite/knock rooms with `stripped_state` + num_live: Optional[int] @attr.s(slots=True, frozen=True, auto_attribs=True) class SlidingWindowList: diff --git a/tests/rest/client/test_sync.py b/tests/rest/client/test_sync.py index a55804c96c..ad6b29b412 100644 --- a/tests/rest/client/test_sync.py +++ b/tests/rest/client/test_sync.py @@ -1881,27 +1881,134 @@ class SlidingSyncTestCase(unittest.HomeserverTestCase): ) self.assertEqual(channel.code, 200, channel.json_body) - # Should not see anything (except maybe the invite event) because we haven't - # joined yet (history visibility is `shared`) (`filter_events_for_client(...)` - # is doing the work here) - self.assertEqual( - channel.json_body["rooms"][room_id1]["timeline"], - [], - channel.json_body["rooms"][room_id1]["timeline"], - ) - # No "live" events in an initial sync (no `from_token` to define the "live" - # range) and no events returned in the timeline anyway so nothing could be - # "live". - self.assertEqual( - channel.json_body["rooms"][room_id1]["num_live"], - 0, + # `timeline` is omitted for `invite` rooms with `stripped_state` + self.assertIsNone( + channel.json_body["rooms"][room_id1].get("timeline"), channel.json_body["rooms"][room_id1], ) - # Even though we don't get any timeline events because they are filtered out, - # there is still more to paginate + # `num_live` is omitted for `invite` rooms with `stripped_state` (no timeline anyway) + self.assertIsNone( + channel.json_body["rooms"][room_id1].get("num_live"), + channel.json_body["rooms"][room_id1], + ) + # `limited` is omitted for `invite` rooms with `stripped_state` (no timeline anyway) + self.assertIsNone( + channel.json_body["rooms"][room_id1].get("limited"), + channel.json_body["rooms"][room_id1], + ) + # `prev_batch` is omitted for `invite` rooms with `stripped_state` (no timeline anyway) + self.assertIsNone( + channel.json_body["rooms"][room_id1].get("prev_batch"), + channel.json_body["rooms"][room_id1], + ) + # We should have some `stripped_state` so the potential joiner can identify the + # room (we don't care about the order). + self.assertCountEqual( + channel.json_body["rooms"][room_id1]["invite_state"], + [ + { + "content": {"creator": user2_id, "room_version": "10"}, + "sender": user2_id, + "state_key": "", + "type": "m.room.create", + }, + { + "content": {"join_rule": "public"}, + "sender": user2_id, + "state_key": "", + "type": "m.room.join_rules", + }, + { + "content": {"displayname": user2.localpart, "membership": "join"}, + "sender": user2_id, + "state_key": user2_id, + "type": "m.room.member", + }, + { + "content": {"displayname": user1.localpart, "membership": "invite"}, + "sender": user2_id, + "state_key": user1_id, + "type": "m.room.member", + }, + ], + channel.json_body["rooms"][room_id1]["invite_state"], + ) + + def test_rooms_invite_shared_history_incremental_sync(self) -> None: + """ + Test that `rooms` we are invited to have some stripped `invite_state` + + This is an `invite` room so we should only have `stripped_state` (no timeline) + but we also shouldn't see any timeline events because the history visiblity is + `shared` and we haven't joined the room yet. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user1 = UserID.from_string(user1_id) + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + user2 = UserID.from_string(user2_id) + + room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok) + # Ensure we're testing with a room with `shared` history visibility which means + # history visible until you actually join the room. + history_visibility_response = self.helper.get_state( + room_id1, EventTypes.RoomHistoryVisibility, tok=user2_tok + ) self.assertEqual( - channel.json_body["rooms"][room_id1]["limited"], - True, + history_visibility_response.get("history_visibility"), + HistoryVisibility.SHARED, + ) + + self.helper.send(room_id1, "activity before invite1", tok=user2_tok) + self.helper.send(room_id1, "activity before invite2", tok=user2_tok) + self.helper.invite(room_id1, src=user2_id, targ=user1_id, tok=user2_tok) + self.helper.send(room_id1, "activity after invite3", tok=user2_tok) + self.helper.send(room_id1, "activity after invite4", tok=user2_tok) + + from_token = self.event_sources.get_current_token() + + self.helper.send(room_id1, "activity after token5", tok=user2_tok) + self.helper.send(room_id1, "activity after toekn6", tok=user2_tok) + + # Make the Sliding Sync request + channel = self.make_request( + "POST", + self.sync_endpoint + + f"?pos={self.get_success( + from_token.to_string(self.store) + )}", + { + "lists": { + "foo-list": { + "ranges": [[0, 1]], + "required_state": [], + "timeline_limit": 3, + } + } + }, + access_token=user1_tok, + ) + self.assertEqual(channel.code, 200, channel.json_body) + + # `timeline` is omitted for `invite` rooms with `stripped_state` + self.assertIsNone( + channel.json_body["rooms"][room_id1].get("timeline"), + channel.json_body["rooms"][room_id1], + ) + # `num_live` is omitted for `invite` rooms with `stripped_state` (no timeline anyway) + self.assertIsNone( + channel.json_body["rooms"][room_id1].get("num_live"), + channel.json_body["rooms"][room_id1], + ) + # `limited` is omitted for `invite` rooms with `stripped_state` (no timeline anyway) + self.assertIsNone( + channel.json_body["rooms"][room_id1].get("limited"), + channel.json_body["rooms"][room_id1], + ) + # `prev_batch` is omitted for `invite` rooms with `stripped_state` (no timeline anyway) + self.assertIsNone( + channel.json_body["rooms"][room_id1].get("prev_batch"), channel.json_body["rooms"][room_id1], ) # We should have some `stripped_state` so the potential joiner can identify the @@ -1977,12 +2084,10 @@ class SlidingSyncTestCase(unittest.HomeserverTestCase): ) self.helper.send(room_id1, "activity before1", tok=user2_tok) - event_response2 = self.helper.send(room_id1, "activity before2", tok=user2_tok) - use1_invite_response = self.helper.invite( - room_id1, src=user2_id, targ=user1_id, tok=user2_tok - ) - event_response3 = self.helper.send(room_id1, "activity after3", tok=user2_tok) - event_response4 = self.helper.send(room_id1, "activity after4", tok=user2_tok) + self.helper.send(room_id1, "activity before2", tok=user2_tok) + self.helper.invite(room_id1, src=user2_id, targ=user1_id, tok=user2_tok) + self.helper.send(room_id1, "activity after3", tok=user2_tok) + self.helper.send(room_id1, "activity after4", tok=user2_tok) # Make the Sliding Sync request channel = self.make_request( @@ -2002,31 +2107,24 @@ class SlidingSyncTestCase(unittest.HomeserverTestCase): ) self.assertEqual(channel.code, 200, channel.json_body) - # Should see the last 4 events in the room - self.assertEqual( - [ - event["event_id"] - for event in channel.json_body["rooms"][room_id1]["timeline"] - ], - [ - event_response2["event_id"], - use1_invite_response["event_id"], - event_response3["event_id"], - event_response4["event_id"], - ], - channel.json_body["rooms"][room_id1]["timeline"], - ) - # No "live" events in an initial sync (no `from_token` to define the "live" - # range) - self.assertEqual( - channel.json_body["rooms"][room_id1]["num_live"], - 0, + # `timeline` is omitted for `invite` rooms with `stripped_state` + self.assertIsNone( + channel.json_body["rooms"][room_id1].get("timeline"), channel.json_body["rooms"][room_id1], ) - # There is still more to paginate - self.assertEqual( - channel.json_body["rooms"][room_id1]["limited"], - True, + # `num_live` is omitted for `invite` rooms with `stripped_state` (no timeline anyway) + self.assertIsNone( + channel.json_body["rooms"][room_id1].get("num_live"), + channel.json_body["rooms"][room_id1], + ) + # `limited` is omitted for `invite` rooms with `stripped_state` (no timeline anyway) + self.assertIsNone( + channel.json_body["rooms"][room_id1].get("limited"), + channel.json_body["rooms"][room_id1], + ) + # `prev_batch` is omitted for `invite` rooms with `stripped_state` (no timeline anyway) + self.assertIsNone( + channel.json_body["rooms"][room_id1].get("prev_batch"), channel.json_body["rooms"][room_id1], ) # We should have some `stripped_state` so the potential joiner can identify the From e3e431fab4ba821b62558ebdffb5bbad2fcc6da3 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 25 Jun 2024 12:35:48 -0500 Subject: [PATCH 31/84] Finish up stripped_state for invite rooms See https://github.com/element-hq/synapse/pull/17320#discussion_r1646581077 --- synapse/handlers/sliding_sync.py | 27 ++--- synapse/types/handlers/__init__.py | 1 + tests/rest/client/test_sync.py | 156 +++++++++++++++++++++++++++-- 3 files changed, 162 insertions(+), 22 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index 991d32356e..e781080470 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -26,6 +26,7 @@ from immutabledict import immutabledict from synapse.api.constants import AccountDataTypes, Direction, EventTypes, Membership from synapse.events import EventBase from synapse.events.utils import strip_event +from synapse.handlers.relations import BundledAggregations from synapse.storage.roommember import RoomsForUser from synapse.types import ( JsonDict, @@ -756,6 +757,7 @@ class SlidingSyncHandler: # Assemble the list of timeline events timeline_events: Optional[List[EventBase]] = None + bundled_aggregations: Optional[Dict[str, BundledAggregations]] = None limited: Optional[bool] = None prev_batch_token: Optional[StreamToken] = None num_live: Optional[int] = None @@ -848,7 +850,9 @@ class SlidingSyncHandler: filter_send_to_client=True, ) # TODO: Filter out `EventTypes.CallInvite` in public rooms, - # see https://github.com/element-hq/synapse/pull/16908#discussion_r1651598029 + # see https://github.com/element-hq/synapse/issues/17359 + + # TODO: Handle timeline gaps (`get_timeline_gaps()`) # Determine how many "live" events we have (events within the given token range). # @@ -878,6 +882,15 @@ class SlidingSyncHandler: # this more with a binary search (bisect). break + # If the timeline is `limited=True`, the client does not have all events + # necessary to calculate aggregations themselves. + if limited: + bundled_aggregations = ( + await self.relations_handler.get_bundled_aggregations( + timeline_events, user.to_string() + ) + ) + # Update the `prev_batch_token` to point to the position that allows us to # keep paginating backwards from the oldest event we return in the timeline. prev_batch_token = prev_batch_token.copy_and_replace( @@ -907,18 +920,6 @@ class SlidingSyncHandler: stripped_state.append(strip_event(invite_or_knock_event)) - # TODO: Handle timeline gaps (`get_timeline_gaps()`) - - # If the timeline is `limited=True`, the client does not have all events - # necessary to calculate aggregations themselves. - bundled_aggregations = None - if limited and timeline_events is not None: - bundled_aggregations = ( - await self.relations_handler.get_bundled_aggregations( - timeline_events, user.to_string() - ) - ) - return SlidingSyncResult.RoomResult( # TODO: Dummy value name=None, diff --git a/synapse/types/handlers/__init__.py b/synapse/types/handlers/__init__.py index d50d02bfc6..3cd3c8fb0f 100644 --- a/synapse/types/handlers/__init__.py +++ b/synapse/types/handlers/__init__.py @@ -203,6 +203,7 @@ class SlidingSyncResult: timeline_events: Optional[List[EventBase]] bundled_aggregations: Optional[Dict[str, "BundledAggregations"]] is_dm: bool + # Optional because it's only relevant to invite/knock rooms stripped_state: Optional[List[JsonDict]] # Only optional because it won't be included for invite/knock rooms with `stripped_state` prev_batch: Optional[StreamToken] diff --git a/tests/rest/client/test_sync.py b/tests/rest/client/test_sync.py index ad6b29b412..ba7cae8645 100644 --- a/tests/rest/client/test_sync.py +++ b/tests/rest/client/test_sync.py @@ -1836,9 +1836,12 @@ class SlidingSyncTestCase(unittest.HomeserverTestCase): def test_rooms_invite_shared_history_initial_sync(self) -> None: """ - Test that `rooms` we are invited to have some stripped `invite_state` and that - we can't see any timeline events because the history visiblity is `shared` and - we haven't joined the room yet. + Test that `rooms` we are invited to have some stripped `invite_state` during an + initial sync. + + This is an `invite` room so we should only have `stripped_state` (no `timeline`) + but we also shouldn't see any timeline events because the history visiblity is + `shared` and we haven't joined the room yet. """ user1_id = self.register_user("user1", "pass") user1_tok = self.login(user1_id, "pass") @@ -1936,9 +1939,10 @@ class SlidingSyncTestCase(unittest.HomeserverTestCase): def test_rooms_invite_shared_history_incremental_sync(self) -> None: """ - Test that `rooms` we are invited to have some stripped `invite_state` - - This is an `invite` room so we should only have `stripped_state` (no timeline) + Test that `rooms` we are invited to have some stripped `invite_state` during an + incremental sync. + + This is an `invite` room so we should only have `stripped_state` (no `timeline`) but we also shouldn't see any timeline events because the history visiblity is `shared` and we haven't joined the room yet. """ @@ -2046,9 +2050,14 @@ class SlidingSyncTestCase(unittest.HomeserverTestCase): def test_rooms_invite_world_readable_history_initial_sync(self) -> None: """ - Test that `rooms` we are invited to have some stripped `invite_state` and that - we can't see any timeline events because the history visiblity is `shared` and - we haven't joined the room yet. + Test that `rooms` we are invited to have some stripped `invite_state` during an + initial sync. + + This is an `invite` room so we should only have `stripped_state` (no `timeline`) + but depending on the semantics we decide, we could potentially see some + historical events before/after the `from_token` because the history is + `world_readable`. Same situation for events after the `from_token` if the + history visibility was set to `invited`. """ user1_id = self.register_user("user1", "pass") user1_tok = self.login(user1_id, "pass") @@ -2160,6 +2169,135 @@ class SlidingSyncTestCase(unittest.HomeserverTestCase): channel.json_body["rooms"][room_id1]["invite_state"], ) + def test_rooms_invite_world_readable_history_incremental_sync(self) -> None: + """ + Test that `rooms` we are invited to have some stripped `invite_state` during an + incremental sync. + + This is an `invite` room so we should only have `stripped_state` (no `timeline`) + but depending on the semantics we decide, we could potentially see some + historical events before/after the `from_token` because the history is + `world_readable`. Same situation for events after the `from_token` if the + history visibility was set to `invited`. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user1 = UserID.from_string(user1_id) + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + user2 = UserID.from_string(user2_id) + + room_id1 = self.helper.create_room_as( + user2_id, + tok=user2_tok, + extra_content={ + "preset": "public_chat", + "initial_state": [ + { + "content": { + "history_visibility": HistoryVisibility.WORLD_READABLE + }, + "state_key": "", + "type": EventTypes.RoomHistoryVisibility, + } + ], + }, + ) + # Ensure we're testing with a room with `world_readable` history visibility + # which means events are visible to anyone even without membership. + history_visibility_response = self.helper.get_state( + room_id1, EventTypes.RoomHistoryVisibility, tok=user2_tok + ) + self.assertEqual( + history_visibility_response.get("history_visibility"), + HistoryVisibility.WORLD_READABLE, + ) + + self.helper.send(room_id1, "activity before invite1", tok=user2_tok) + self.helper.send(room_id1, "activity before invite2", tok=user2_tok) + self.helper.invite(room_id1, src=user2_id, targ=user1_id, tok=user2_tok) + self.helper.send(room_id1, "activity after invite3", tok=user2_tok) + self.helper.send(room_id1, "activity after invite4", tok=user2_tok) + + from_token = self.event_sources.get_current_token() + + self.helper.send(room_id1, "activity after token5", tok=user2_tok) + self.helper.send(room_id1, "activity after toekn6", tok=user2_tok) + + # Make the Sliding Sync request + channel = self.make_request( + "POST", + self.sync_endpoint + + f"?pos={self.get_success( + from_token.to_string(self.store) + )}", + { + "lists": { + "foo-list": { + "ranges": [[0, 1]], + "required_state": [], + # Large enough to see the latest events and before the invite + "timeline_limit": 4, + } + } + }, + access_token=user1_tok, + ) + self.assertEqual(channel.code, 200, channel.json_body) + + # `timeline` is omitted for `invite` rooms with `stripped_state` + self.assertIsNone( + channel.json_body["rooms"][room_id1].get("timeline"), + channel.json_body["rooms"][room_id1], + ) + # `num_live` is omitted for `invite` rooms with `stripped_state` (no timeline anyway) + self.assertIsNone( + channel.json_body["rooms"][room_id1].get("num_live"), + channel.json_body["rooms"][room_id1], + ) + # `limited` is omitted for `invite` rooms with `stripped_state` (no timeline anyway) + self.assertIsNone( + channel.json_body["rooms"][room_id1].get("limited"), + channel.json_body["rooms"][room_id1], + ) + # `prev_batch` is omitted for `invite` rooms with `stripped_state` (no timeline anyway) + self.assertIsNone( + channel.json_body["rooms"][room_id1].get("prev_batch"), + channel.json_body["rooms"][room_id1], + ) + # We should have some `stripped_state` so the potential joiner can identify the + # room (we don't care about the order). + self.assertCountEqual( + channel.json_body["rooms"][room_id1]["invite_state"], + [ + { + "content": {"creator": user2_id, "room_version": "10"}, + "sender": user2_id, + "state_key": "", + "type": "m.room.create", + }, + { + "content": {"join_rule": "public"}, + "sender": user2_id, + "state_key": "", + "type": "m.room.join_rules", + }, + { + "content": {"displayname": user2.localpart, "membership": "join"}, + "sender": user2_id, + "state_key": user2_id, + "type": "m.room.member", + }, + { + "content": {"displayname": user1.localpart, "membership": "invite"}, + "sender": user2_id, + "state_key": user1_id, + "type": "m.room.member", + }, + ], + channel.json_body["rooms"][room_id1]["invite_state"], + ) + def test_rooms_ban_initial_sync(self) -> None: """ Test that `rooms` we are banned from in an intial sync only allows us to see From 303d834b78a7c93e390da3f426754cafff07c20f Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 25 Jun 2024 12:38:09 -0500 Subject: [PATCH 32/84] Add tracking discussion for not optional in the future --- synapse/handlers/sliding_sync.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index e781080470..0538fddf84 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -756,6 +756,11 @@ class SlidingSyncHandler: """ # Assemble the list of timeline events + # + # It would be nice to make the `rooms` response more uniform regardless of + # membership. Currently, we have to make all of these optional because + # `invite`/`knock` rooms only have `stripped_state`. See + # https://github.com/matrix-org/matrix-spec-proposals/pull/3575#discussion_r1653045932 timeline_events: Optional[List[EventBase]] = None bundled_aggregations: Optional[Dict[str, BundledAggregations]] = None limited: Optional[bool] = None From 4c2213144258cef2b2ac7960f290649a076d1927 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 25 Jun 2024 15:27:43 -0500 Subject: [PATCH 33/84] Start testing for the correct room membership (failing) --- tests/handlers/test_sliding_sync.py | 477 +++++++++++++++++++++++++--- 1 file changed, 432 insertions(+), 45 deletions(-) diff --git a/tests/handlers/test_sliding_sync.py b/tests/handlers/test_sliding_sync.py index 0358239c7f..df262400e4 100644 --- a/tests/handlers/test_sliding_sync.py +++ b/tests/handlers/test_sliding_sync.py @@ -63,6 +63,7 @@ class GetSyncRoomIdsForUserTestCase(HomeserverTestCase): self.sliding_sync_handler = self.hs.get_sliding_sync_handler() self.store = self.hs.get_datastores().main self.event_sources = hs.get_event_sources() + self.storage_controllers = hs.get_storage_controllers() def test_no_rooms(self) -> None: """ @@ -90,10 +91,13 @@ class GetSyncRoomIdsForUserTestCase(HomeserverTestCase): """ user1_id = self.register_user("user1", "pass") user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") before_room_token = self.event_sources.get_current_token() - room_id = self.helper.create_room_as(user1_id, tok=user1_tok, is_public=True) + room_id = self.helper.create_room_as(user2_id, tok=user2_tok) + join_response = self.helper.join(room_id, user1_id, tok=user1_tok) after_room_token = self.event_sources.get_current_token() @@ -106,6 +110,12 @@ class GetSyncRoomIdsForUserTestCase(HomeserverTestCase): ) self.assertEqual(room_id_results.keys(), {room_id}) + # It should be pointing to the join event (latest membership event in the + # from/to range) + self.assertEqual( + room_id_results[room_id].event_id, + join_response["event_id"], + ) def test_get_already_joined_room(self) -> None: """ @@ -113,8 +123,11 @@ class GetSyncRoomIdsForUserTestCase(HomeserverTestCase): """ user1_id = self.register_user("user1", "pass") user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") - room_id = self.helper.create_room_as(user1_id, tok=user1_tok, is_public=True) + room_id = self.helper.create_room_as(user2_id, tok=user2_tok) + join_response = self.helper.join(room_id, user1_id, tok=user1_tok) after_room_token = self.event_sources.get_current_token() @@ -127,6 +140,12 @@ class GetSyncRoomIdsForUserTestCase(HomeserverTestCase): ) self.assertEqual(room_id_results.keys(), {room_id}) + # It should be pointing to the join event (latest membership event in the + # from/to range) + self.assertEqual( + room_id_results[room_id].event_id, + join_response["event_id"], + ) def test_get_invited_banned_knocked_room(self) -> None: """ @@ -142,14 +161,18 @@ class GetSyncRoomIdsForUserTestCase(HomeserverTestCase): # Setup the invited room (user2 invites user1 to the room) invited_room_id = self.helper.create_room_as(user2_id, tok=user2_tok) - self.helper.invite(invited_room_id, targ=user1_id, tok=user2_tok) + invite_response = self.helper.invite( + invited_room_id, targ=user1_id, tok=user2_tok + ) # Setup the ban room (user2 bans user1 from the room) ban_room_id = self.helper.create_room_as( user2_id, tok=user2_tok, is_public=True ) self.helper.join(ban_room_id, user1_id, tok=user1_tok) - self.helper.ban(ban_room_id, src=user2_id, targ=user1_id, tok=user2_tok) + ban_response = self.helper.ban( + ban_room_id, src=user2_id, targ=user1_id, tok=user2_tok + ) # Setup the knock room (user1 knocks on the room) knock_room_id = self.helper.create_room_as( @@ -162,13 +185,19 @@ class GetSyncRoomIdsForUserTestCase(HomeserverTestCase): tok=user2_tok, ) # User1 knocks on the room - channel = self.make_request( + knock_channel = self.make_request( "POST", "/_matrix/client/r0/knock/%s" % (knock_room_id,), b"{}", user1_tok, ) - self.assertEqual(channel.code, 200, channel.result) + self.assertEqual(knock_channel.code, 200, knock_channel.result) + knock_room_membership_state_event = self.get_success( + self.storage_controllers.state.get_current_state_event( + knock_room_id, EventTypes.Member, user1_id + ) + ) + assert knock_room_membership_state_event is not None after_room_token = self.event_sources.get_current_token() @@ -189,6 +218,20 @@ class GetSyncRoomIdsForUserTestCase(HomeserverTestCase): knock_room_id, }, ) + # It should be pointing to the the respective membership event (latest + # membership event in the from/to range) + self.assertEqual( + room_id_results[invited_room_id].event_id, + invite_response["event_id"], + ) + self.assertEqual( + room_id_results[ban_room_id].event_id, + ban_response["event_id"], + ) + self.assertEqual( + room_id_results[knock_room_id].event_id, + knock_room_membership_state_event.event_id, + ) def test_get_kicked_room(self) -> None: """ @@ -206,7 +249,7 @@ class GetSyncRoomIdsForUserTestCase(HomeserverTestCase): ) self.helper.join(kick_room_id, user1_id, tok=user1_tok) # Kick user1 from the room - self.helper.change_membership( + kick_response = self.helper.change_membership( room=kick_room_id, src=user2_id, targ=user1_id, @@ -229,6 +272,11 @@ class GetSyncRoomIdsForUserTestCase(HomeserverTestCase): # The kicked room should show up self.assertEqual(room_id_results.keys(), {kick_room_id}) + # It should be pointing to the latest membership event in the from/to range + self.assertEqual( + room_id_results[kick_room_id].event_id, + kick_response["event_id"], + ) def test_forgotten_rooms(self) -> None: """ @@ -329,7 +377,7 @@ class GetSyncRoomIdsForUserTestCase(HomeserverTestCase): # Leave during the from_token/to_token range (newly_left) room_id2 = self.helper.create_room_as(user1_id, tok=user1_tok) - self.helper.leave(room_id2, user1_id, tok=user1_tok) + leave_response = self.helper.leave(room_id2, user1_id, tok=user1_tok) after_room2_token = self.event_sources.get_current_token() @@ -343,6 +391,11 @@ class GetSyncRoomIdsForUserTestCase(HomeserverTestCase): # Only the newly_left room should show up self.assertEqual(room_id_results.keys(), {room_id2}) + # It should be pointing to the latest membership event in the from/to range + self.assertEqual( + room_id_results[room_id2].event_id, + leave_response["event_id"], + ) def test_no_joins_after_to_token(self) -> None: """ @@ -351,16 +404,19 @@ class GetSyncRoomIdsForUserTestCase(HomeserverTestCase): """ user1_id = self.register_user("user1", "pass") user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") before_room1_token = self.event_sources.get_current_token() - room_id1 = self.helper.create_room_as(user1_id, tok=user1_tok) + room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok) + join_response1 = self.helper.join(room_id1, user1_id, tok=user1_tok) after_room1_token = self.event_sources.get_current_token() - # Room join after after our `to_token` shouldn't show up - room_id2 = self.helper.create_room_as(user1_id, tok=user1_tok) - _ = room_id2 + # Room join after our `to_token` shouldn't show up + room_id2 = self.helper.create_room_as(user2_id, tok=user2_tok) + self.helper.join(room_id2, user1_id, tok=user1_tok) room_id_results = self.get_success( self.sliding_sync_handler.get_sync_room_ids_for_user( @@ -371,6 +427,11 @@ class GetSyncRoomIdsForUserTestCase(HomeserverTestCase): ) self.assertEqual(room_id_results.keys(), {room_id1}) + # It should be pointing to the latest membership event in the from/to range + self.assertEqual( + room_id_results[room_id1].event_id, + join_response1["event_id"], + ) def test_join_during_range_and_left_room_after_to_token(self) -> None: """ @@ -380,15 +441,18 @@ class GetSyncRoomIdsForUserTestCase(HomeserverTestCase): """ user1_id = self.register_user("user1", "pass") user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") before_room1_token = self.event_sources.get_current_token() - room_id1 = self.helper.create_room_as(user1_id, tok=user1_tok) + room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok) + join_response = self.helper.join(room_id1, user1_id, tok=user1_tok) after_room1_token = self.event_sources.get_current_token() # Leave the room after we already have our tokens - self.helper.leave(room_id1, user1_id, tok=user1_tok) + leave_response = self.helper.leave(room_id1, user1_id, tok=user1_tok) room_id_results = self.get_success( self.sliding_sync_handler.get_sync_room_ids_for_user( @@ -401,6 +465,18 @@ class GetSyncRoomIdsForUserTestCase(HomeserverTestCase): # We should still see the room because we were joined during the # from_token/to_token time period. self.assertEqual(room_id_results.keys(), {room_id1}) + # It should be pointing to the latest membership event in the from/to range + self.assertEqual( + room_id_results[room_id1].event_id, + join_response["event_id"], + "Corresponding map to disambiguate the opaque event IDs: " + + str( + { + "join_response": join_response["event_id"], + "leave_response": leave_response["event_id"], + } + ), + ) def test_join_before_range_and_left_room_after_to_token(self) -> None: """ @@ -410,13 +486,16 @@ class GetSyncRoomIdsForUserTestCase(HomeserverTestCase): """ user1_id = self.register_user("user1", "pass") user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") - room_id1 = self.helper.create_room_as(user1_id, tok=user1_tok) + room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok) + join_response = self.helper.join(room_id1, user1_id, tok=user1_tok) after_room1_token = self.event_sources.get_current_token() # Leave the room after we already have our tokens - self.helper.leave(room_id1, user1_id, tok=user1_tok) + leave_response = self.helper.leave(room_id1, user1_id, tok=user1_tok) room_id_results = self.get_success( self.sliding_sync_handler.get_sync_room_ids_for_user( @@ -428,6 +507,18 @@ class GetSyncRoomIdsForUserTestCase(HomeserverTestCase): # We should still see the room because we were joined before the `from_token` self.assertEqual(room_id_results.keys(), {room_id1}) + # It should be pointing to the latest membership event in the from/to range + self.assertEqual( + room_id_results[room_id1].event_id, + join_response["event_id"], + "Corresponding map to disambiguate the opaque event IDs: " + + str( + { + "join_response": join_response["event_id"], + "leave_response": leave_response["event_id"], + } + ), + ) def test_kicked_before_range_and_left_after_to_token(self) -> None: """ @@ -444,9 +535,9 @@ class GetSyncRoomIdsForUserTestCase(HomeserverTestCase): kick_room_id = self.helper.create_room_as( user2_id, tok=user2_tok, is_public=True ) - self.helper.join(kick_room_id, user1_id, tok=user1_tok) + join_response1 = self.helper.join(kick_room_id, user1_id, tok=user1_tok) # Kick user1 from the room - self.helper.change_membership( + kick_response = self.helper.change_membership( room=kick_room_id, src=user2_id, targ=user1_id, @@ -463,8 +554,8 @@ class GetSyncRoomIdsForUserTestCase(HomeserverTestCase): # # We have to join before we can leave (leave -> leave isn't a valid transition # or at least it doesn't work in Synapse, 403 forbidden) - self.helper.join(kick_room_id, user1_id, tok=user1_tok) - self.helper.leave(kick_room_id, user1_id, tok=user1_tok) + join_response2 = self.helper.join(kick_room_id, user1_id, tok=user1_tok) + leave_response = self.helper.leave(kick_room_id, user1_id, tok=user1_tok) room_id_results = self.get_success( self.sliding_sync_handler.get_sync_room_ids_for_user( @@ -476,6 +567,20 @@ class GetSyncRoomIdsForUserTestCase(HomeserverTestCase): # We shouldn't see the room because it was forgotten self.assertEqual(room_id_results.keys(), {kick_room_id}) + # It should be pointing to the latest membership event in the from/to range + self.assertEqual( + room_id_results[kick_room_id].event_id, + kick_response["event_id"], + "Corresponding map to disambiguate the opaque event IDs: " + + str( + { + "join_response1": join_response1["event_id"], + "kick_response": kick_response["event_id"], + "join_response2": join_response2["event_id"], + "leave_response": leave_response["event_id"], + } + ), + ) def test_newly_left_during_range_and_join_leave_after_to_token(self) -> None: """ @@ -494,14 +599,14 @@ class GetSyncRoomIdsForUserTestCase(HomeserverTestCase): # leave and can still re-join. room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok, is_public=True) # Join and leave the room during the from/to range - self.helper.join(room_id1, user1_id, tok=user1_tok) - self.helper.leave(room_id1, user1_id, tok=user1_tok) + join_response1 = self.helper.join(room_id1, user1_id, tok=user1_tok) + leave_response1 = self.helper.leave(room_id1, user1_id, tok=user1_tok) after_room1_token = self.event_sources.get_current_token() # Join and leave the room after we already have our tokens - self.helper.join(room_id1, user1_id, tok=user1_tok) - self.helper.leave(room_id1, user1_id, tok=user1_tok) + join_response2 = self.helper.join(room_id1, user1_id, tok=user1_tok) + leave_response2 = self.helper.leave(room_id1, user1_id, tok=user1_tok) room_id_results = self.get_success( self.sliding_sync_handler.get_sync_room_ids_for_user( @@ -513,6 +618,20 @@ class GetSyncRoomIdsForUserTestCase(HomeserverTestCase): # Room should still show up because it's newly_left during the from/to range self.assertEqual(room_id_results.keys(), {room_id1}) + # It should be pointing to the latest membership event in the from/to range + self.assertEqual( + room_id_results[room_id1].event_id, + leave_response1["event_id"], + "Corresponding map to disambiguate the opaque event IDs: " + + str( + { + "join_response1": join_response1["event_id"], + "leave_response1": leave_response1["event_id"], + "join_response2": join_response2["event_id"], + "leave_response2": leave_response2["event_id"], + } + ), + ) def test_newly_left_during_range_and_join_after_to_token(self) -> None: """ @@ -531,13 +650,13 @@ class GetSyncRoomIdsForUserTestCase(HomeserverTestCase): # leave and can still re-join. room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok, is_public=True) # Join and leave the room during the from/to range - self.helper.join(room_id1, user1_id, tok=user1_tok) - self.helper.leave(room_id1, user1_id, tok=user1_tok) + join_response1 = self.helper.join(room_id1, user1_id, tok=user1_tok) + leave_response1 = self.helper.leave(room_id1, user1_id, tok=user1_tok) after_room1_token = self.event_sources.get_current_token() # Join the room after we already have our tokens - self.helper.join(room_id1, user1_id, tok=user1_tok) + join_response2 = self.helper.join(room_id1, user1_id, tok=user1_tok) room_id_results = self.get_success( self.sliding_sync_handler.get_sync_room_ids_for_user( @@ -549,11 +668,24 @@ class GetSyncRoomIdsForUserTestCase(HomeserverTestCase): # Room should still show up because it's newly_left during the from/to range self.assertEqual(room_id_results.keys(), {room_id1}) + # It should be pointing to the latest membership event in the from/to range + self.assertEqual( + room_id_results[room_id1].event_id, + leave_response1["event_id"], + "Corresponding map to disambiguate the opaque event IDs: " + + str( + { + "join_response1": join_response1["event_id"], + "leave_response1": leave_response1["event_id"], + "join_response2": join_response2["event_id"], + } + ), + ) def test_no_from_token(self) -> None: """ Test that if we don't provide a `from_token`, we get all the rooms that we we're - joined to up to the `to_token`. + joined up to the `to_token`. Providing `from_token` only really has the effect that it adds `newly_left` rooms to the response. @@ -569,7 +701,7 @@ class GetSyncRoomIdsForUserTestCase(HomeserverTestCase): room_id2 = self.helper.create_room_as(user2_id, tok=user2_tok, is_public=True) # Join room1 - self.helper.join(room_id1, user1_id, tok=user1_tok) + join_response1 = self.helper.join(room_id1, user1_id, tok=user1_tok) # Join and leave the room2 before the `to_token` self.helper.join(room_id2, user1_id, tok=user1_tok) @@ -590,6 +722,11 @@ class GetSyncRoomIdsForUserTestCase(HomeserverTestCase): # Only rooms we were joined to before the `to_token` should show up self.assertEqual(room_id_results.keys(), {room_id1}) + # It should be pointing to the latest membership event in the from/to range + self.assertEqual( + room_id_results[room_id1].event_id, + join_response1["event_id"], + ) def test_from_token_ahead_of_to_token(self) -> None: """ @@ -609,7 +746,7 @@ class GetSyncRoomIdsForUserTestCase(HomeserverTestCase): room_id4 = self.helper.create_room_as(user2_id, tok=user2_tok, is_public=True) # Join room1 before `before_room_token` - self.helper.join(room_id1, user1_id, tok=user1_tok) + join_response1 = self.helper.join(room_id1, user1_id, tok=user1_tok) # Join and leave the room2 before `before_room_token` self.helper.join(room_id2, user1_id, tok=user1_tok) @@ -651,6 +788,11 @@ class GetSyncRoomIdsForUserTestCase(HomeserverTestCase): # There won't be any newly_left rooms because the `from_token` is ahead of the # `to_token` and that range will give no membership changes to check. self.assertEqual(room_id_results.keys(), {room_id1}) + # It should be pointing to the latest membership event in the from/to range + self.assertEqual( + room_id_results[room_id1].event_id, + join_response1["event_id"], + ) def test_leave_before_range_and_join_leave_after_to_token(self) -> None: """ @@ -741,16 +883,16 @@ class GetSyncRoomIdsForUserTestCase(HomeserverTestCase): # leave and can still re-join. room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok, is_public=True) # Join, leave, join back to the room before the from/to range - self.helper.join(room_id1, user1_id, tok=user1_tok) - self.helper.leave(room_id1, user1_id, tok=user1_tok) - self.helper.join(room_id1, user1_id, tok=user1_tok) + join_response1 = self.helper.join(room_id1, user1_id, tok=user1_tok) + leave_response1 = self.helper.leave(room_id1, user1_id, tok=user1_tok) + join_response2 = self.helper.join(room_id1, user1_id, tok=user1_tok) after_room1_token = self.event_sources.get_current_token() # Leave and Join the room multiple times after we already have our tokens - self.helper.leave(room_id1, user1_id, tok=user1_tok) - self.helper.join(room_id1, user1_id, tok=user1_tok) - self.helper.leave(room_id1, user1_id, tok=user1_tok) + leave_response2 = self.helper.leave(room_id1, user1_id, tok=user1_tok) + join_response3 = self.helper.join(room_id1, user1_id, tok=user1_tok) + leave_response3 = self.helper.leave(room_id1, user1_id, tok=user1_tok) room_id_results = self.get_success( self.sliding_sync_handler.get_sync_room_ids_for_user( @@ -762,6 +904,22 @@ class GetSyncRoomIdsForUserTestCase(HomeserverTestCase): # Room should show up because it was newly_left and joined during the from/to range self.assertEqual(room_id_results.keys(), {room_id1}) + # It should be pointing to the latest membership event in the from/to range + self.assertEqual( + room_id_results[room_id1].event_id, + join_response2["event_id"], + "Corresponding map to disambiguate the opaque event IDs: " + + str( + { + "join_response1": join_response1["event_id"], + "leave_response1": leave_response1["event_id"], + "join_response2": join_response2["event_id"], + "leave_response2": leave_response2["event_id"], + "join_response3": join_response3["event_id"], + "leave_response3": leave_response3["event_id"], + } + ), + ) def test_join_leave_multiple_times_before_range_and_after_to_token( self, @@ -781,16 +939,16 @@ class GetSyncRoomIdsForUserTestCase(HomeserverTestCase): # leave and can still re-join. room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok, is_public=True) # Join, leave, join back to the room before the from/to range - self.helper.join(room_id1, user1_id, tok=user1_tok) - self.helper.leave(room_id1, user1_id, tok=user1_tok) - self.helper.join(room_id1, user1_id, tok=user1_tok) + join_response1 = self.helper.join(room_id1, user1_id, tok=user1_tok) + leave_response1 = self.helper.leave(room_id1, user1_id, tok=user1_tok) + join_response2 = self.helper.join(room_id1, user1_id, tok=user1_tok) after_room1_token = self.event_sources.get_current_token() # Leave and Join the room multiple times after we already have our tokens - self.helper.leave(room_id1, user1_id, tok=user1_tok) - self.helper.join(room_id1, user1_id, tok=user1_tok) - self.helper.leave(room_id1, user1_id, tok=user1_tok) + leave_response2 = self.helper.leave(room_id1, user1_id, tok=user1_tok) + join_response3 = self.helper.join(room_id1, user1_id, tok=user1_tok) + leave_response3 = self.helper.leave(room_id1, user1_id, tok=user1_tok) room_id_results = self.get_success( self.sliding_sync_handler.get_sync_room_ids_for_user( @@ -802,6 +960,22 @@ class GetSyncRoomIdsForUserTestCase(HomeserverTestCase): # Room should show up because we were joined before the from/to range self.assertEqual(room_id_results.keys(), {room_id1}) + # It should be pointing to the latest membership event in the from/to range + self.assertEqual( + room_id_results[room_id1].event_id, + join_response2["event_id"], + "Corresponding map to disambiguate the opaque event IDs: " + + str( + { + "join_response1": join_response1["event_id"], + "leave_response1": leave_response1["event_id"], + "join_response2": join_response2["event_id"], + "leave_response2": leave_response2["event_id"], + "join_response3": join_response3["event_id"], + "leave_response3": leave_response3["event_id"], + } + ), + ) def test_invite_before_range_and_join_leave_after_to_token( self, @@ -821,13 +995,15 @@ class GetSyncRoomIdsForUserTestCase(HomeserverTestCase): room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok, is_public=True) # Invited to the room before the token - self.helper.invite(room_id1, src=user2_id, targ=user1_id, tok=user2_tok) + invite_response = self.helper.invite( + room_id1, src=user2_id, targ=user1_id, tok=user2_tok + ) after_room1_token = self.event_sources.get_current_token() # Join and leave the room after we already have our tokens - self.helper.join(room_id1, user1_id, tok=user1_tok) - self.helper.leave(room_id1, user1_id, tok=user1_tok) + join_respsonse = self.helper.join(room_id1, user1_id, tok=user1_tok) + leave_response = self.helper.leave(room_id1, user1_id, tok=user1_tok) room_id_results = self.get_success( self.sliding_sync_handler.get_sync_room_ids_for_user( @@ -839,6 +1015,217 @@ class GetSyncRoomIdsForUserTestCase(HomeserverTestCase): # Room should show up because we were invited before the from/to range self.assertEqual(room_id_results.keys(), {room_id1}) + # It should be pointing to the latest membership event in the from/to range + self.assertEqual( + room_id_results[room_id1].event_id, + invite_response["event_id"], + "Corresponding map to disambiguate the opaque event IDs: " + + str( + { + "invite_response": invite_response["event_id"], + "join_respsonse": join_respsonse["event_id"], + "leave_response": leave_response["event_id"], + } + ), + ) + + def test_display_name_changes( + self, + ) -> None: + """ + Test that we point to the correct membership event within the from/to range even + if there are multiple `join` membership events in a row indicating + `displayname`/`avatar_url` updates. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + + before_room1_token = self.event_sources.get_current_token() + + # We create the room with user2 so the room isn't left with no members when we + # leave and can still re-join. + room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok, is_public=True) + join_response = self.helper.join(room_id1, user1_id, tok=user1_tok) + # Update the displayname during the token range + displayname_change_during_token_range_response = self.helper.send_state( + room_id1, + event_type=EventTypes.Member, + state_key=user1_id, + body={ + "membership": Membership.JOIN, + "displayname": "displayname during token range", + }, + tok=user1_tok, + ) + + after_room1_token = self.event_sources.get_current_token() + + # Update the displayname after the token range + displayname_change_after_token_range_response = self.helper.send_state( + room_id1, + event_type=EventTypes.Member, + state_key=user1_id, + body={ + "membership": Membership.JOIN, + "displayname": "displayname after token range", + }, + tok=user1_tok, + ) + + room_id_results = self.get_success( + self.sliding_sync_handler.get_sync_room_ids_for_user( + UserID.from_string(user1_id), + from_token=before_room1_token, + to_token=after_room1_token, + ) + ) + + # Room should show up because we were joined during the from/to range + self.assertEqual(room_id_results.keys(), {room_id1}) + # It should be pointing to the latest membership event in the from/to range + self.assertEqual( + room_id_results[room_id1].event_id, + displayname_change_during_token_range_response["event_id"], + "Corresponding map to disambiguate the opaque event IDs: " + + str( + { + "join_response": join_response["event_id"], + "displayname_change_during_token_range_response": displayname_change_during_token_range_response[ + "event_id" + ], + "displayname_change_after_token_range_response": displayname_change_after_token_range_response[ + "event_id" + ], + } + ), + ) + + def test_display_name_changes_leave_after_token_range( + self, + ) -> None: + """ + Test that we point to the correct membership event within the from/to range even + if there are multiple `join` membership events in a row indicating + `displayname`/`avatar_url` updates and we leave after the `to_token`. + + See condition "1a)" comments in the `get_sync_room_ids_for_user()` method. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + + before_room1_token = self.event_sources.get_current_token() + + # We create the room with user2 so the room isn't left with no members when we + # leave and can still re-join. + room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok, is_public=True) + join_response = self.helper.join(room_id1, user1_id, tok=user1_tok) + # Update the displayname during the token range + displayname_change_during_token_range_response = self.helper.send_state( + room_id1, + event_type=EventTypes.Member, + state_key=user1_id, + body={ + "membership": Membership.JOIN, + "displayname": "displayname during token range", + }, + tok=user1_tok, + ) + + after_room1_token = self.event_sources.get_current_token() + + # Update the displayname after the token range + displayname_change_after_token_range_response = self.helper.send_state( + room_id1, + event_type=EventTypes.Member, + state_key=user1_id, + body={ + "membership": Membership.JOIN, + "displayname": "displayname after token range", + }, + tok=user1_tok, + ) + + # Leave after the token + self.helper.leave(room_id1, user1_id, tok=user1_tok) + + room_id_results = self.get_success( + self.sliding_sync_handler.get_sync_room_ids_for_user( + UserID.from_string(user1_id), + from_token=before_room1_token, + to_token=after_room1_token, + ) + ) + + # Room should show up because we were joined during the from/to range + self.assertEqual(room_id_results.keys(), {room_id1}) + # It should be pointing to the latest membership event in the from/to range + self.assertEqual( + room_id_results[room_id1].event_id, + displayname_change_during_token_range_response["event_id"], + "Corresponding map to disambiguate the opaque event IDs: " + + str( + { + "join_response": join_response["event_id"], + "displayname_change_during_token_range_response": displayname_change_during_token_range_response[ + "event_id" + ], + "displayname_change_after_token_range_response": displayname_change_after_token_range_response[ + "event_id" + ], + } + ), + ) + + def test_display_name_changes_join_after_token_range( + self, + ) -> None: + """ + Test that multiple `join` membership events (after the `to_token`) in a row + indicating `displayname`/`avatar_url` updates doesn't affect the results (we + joined after the token range so it shouldn't show up) + + See condition "1b)" comments in the `get_sync_room_ids_for_user()` method. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + + before_room1_token = self.event_sources.get_current_token() + + # We create the room with user2 so the room isn't left with no members when we + # leave and can still re-join. + room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok, is_public=True) + + after_room1_token = self.event_sources.get_current_token() + + self.helper.join(room_id1, user1_id, tok=user1_tok) + # Update the displayname after the token range + self.helper.send_state( + room_id1, + event_type=EventTypes.Member, + state_key=user1_id, + body={ + "membership": Membership.JOIN, + "displayname": "displayname after token range", + }, + tok=user1_tok, + ) + + room_id_results = self.get_success( + self.sliding_sync_handler.get_sync_room_ids_for_user( + UserID.from_string(user1_id), + from_token=before_room1_token, + to_token=after_room1_token, + ) + ) + + # Room shouldn't show up because we joined after the from/to range + self.assertEqual(room_id_results.keys(), set()) def test_multiple_rooms_are_not_confused( self, From 83d6f76606bb7d1eaba9d5e498efc9fa15d13957 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 25 Jun 2024 15:27:49 -0500 Subject: [PATCH 34/84] Describe `current_state_delta_stream` better --- synapse/storage/schema/main/delta/42/current_state_delta.sql | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/synapse/storage/schema/main/delta/42/current_state_delta.sql b/synapse/storage/schema/main/delta/42/current_state_delta.sql index 876b61e6a5..3d2fd69480 100644 --- a/synapse/storage/schema/main/delta/42/current_state_delta.sql +++ b/synapse/storage/schema/main/delta/42/current_state_delta.sql @@ -32,7 +32,10 @@ * limitations under the License. */ - +-- Tracks what the server thinks is the current state of the room as time goes. It does +-- not track how state progresses from the beginning of the room. So for example, when +-- you remotely join a room, the first rows will just be the state when you joined and +-- progress from there. CREATE TABLE current_state_delta_stream ( stream_id BIGINT NOT NULL, room_id TEXT NOT NULL, From fbd92e1c9da2bc89a555f3fa609bba20a76e4440 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 25 Jun 2024 20:16:50 -0500 Subject: [PATCH 35/84] Add `get_current_state_delta_membership_changes_for_user(...)` (using `current_state_delta_stream`) (still need to add newly_left rooms back) --- synapse/handlers/sliding_sync.py | 347 ++++++++++++++--------- synapse/storage/databases/main/stream.py | 151 +++++++++- tests/handlers/test_sliding_sync.py | 73 ++++- 3 files changed, 426 insertions(+), 145 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index 0538fddf84..2e24b0c338 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -27,6 +27,7 @@ from synapse.api.constants import AccountDataTypes, Direction, EventTypes, Membe from synapse.events import EventBase from synapse.events.utils import strip_event from synapse.handlers.relations import BundledAggregations +from synapse.storage.databases.main.stream import CurrentStateDeltaMembership from synapse.storage.roommember import RoomsForUser from synapse.types import ( JsonDict, @@ -369,6 +370,9 @@ class SlidingSyncHandler: # Our working list of rooms that can show up in the sync response sync_room_id_set = { + # Note: The `room_for_user` we're assigning here will need to be fixed up + # (below) because they are potentially from the current snapshot time + # instead from the time of the `to_token`. room_for_user.room_id: room_for_user for room_for_user in room_for_user_list if filter_membership_for_sync( @@ -404,33 +408,10 @@ class SlidingSyncHandler: instance_map=immutabledict(instance_to_max_stream_ordering_map), ) - # Since we fetched the users room list at some point in time after the from/to - # tokens, we need to revert/rewind some membership changes to match the point in - # time of the `to_token`. In particular, we need to make these fixups: - # - # - 1a) Remove rooms that the user joined after the `to_token` - # - 1b) Add back rooms that the user left after the `to_token` - # - 2) Add back newly_left rooms (> `from_token` and <= `to_token`) - # - # Below, we're doing two separate lookups for membership changes. We could - # request everything for both fixups in one range, [`from_token.room_key`, - # `membership_snapshot_token`), but we want to avoid raw `stream_ordering` - # comparison without `instance_name` (which is flawed). We could refactor - # `event.internal_metadata` to include `instance_name` but it might turn out a - # little difficult and a bigger, broader Synapse change than we want to make. - - # 1) ----------------------------------------------------- - - # 1) Fetch membership changes that fall in the range from `to_token` up to - # `membership_snapshot_token` - # - # If our `to_token` is already the same or ahead of the latest room membership - # for the user, we don't need to do any "2)" fix-ups and can just straight-up - # use the room list from the snapshot as a base (nothing has changed) - membership_change_events_after_to_token = [] + current_state_delta_membership_changes_after_to_token = [] if not membership_snapshot_token.is_before_or_eq(to_token.room_key): - membership_change_events_after_to_token = ( - await self.store.get_membership_changes_for_user( + current_state_delta_membership_changes_after_to_token = ( + await self.store.get_current_state_delta_membership_changes_for_user( user_id, from_key=to_token.room_key, to_key=membership_snapshot_token, @@ -438,138 +419,224 @@ class SlidingSyncHandler: ) ) - # 1) Assemble a list of the last membership events in some given ranges. Someone - # could have left and joined multiple times during the given range but we only - # care about end-result so we grab the last one. - last_membership_change_by_room_id_after_to_token: Dict[str, EventBase] = {} - # We also need the first membership event after the `to_token` so we can step + # We need the first membership event after the `to_token` so we can step # backward to the previous membership that would apply to the from/to range. - first_membership_change_by_room_id_after_to_token: Dict[str, EventBase] = {} - for event in membership_change_events_after_to_token: - last_membership_change_by_room_id_after_to_token[event.room_id] = event + first_membership_change_by_room_id_after_to_token: Dict[ + str, CurrentStateDeltaMembership + ] = {} + for membership_change in current_state_delta_membership_changes_after_to_token: # Only set if we haven't already set it first_membership_change_by_room_id_after_to_token.setdefault( - event.room_id, event + membership_change.room_id, membership_change ) - # 1) Fixup + # Since we fetched a snapshot of the users room list at some point in time after + # the from/to tokens, we need to revert/rewind some membership changes to match + # the point in time of the `to_token`. + prev_event_ids_in_from_to_range = [] for ( - last_membership_change_after_to_token - ) in last_membership_change_by_room_id_after_to_token.values(): - room_id = last_membership_change_after_to_token.room_id - - # We want to find the first membership change after the `to_token` then step - # backward to know the membership in the from/to range. - first_membership_change_after_to_token = ( - first_membership_change_by_room_id_after_to_token.get(room_id) - ) - assert first_membership_change_after_to_token is not None, ( - "If there was a `last_membership_change_after_to_token` that we're iterating over, " - + "then there should be corresponding a first change. For example, even if there " - + "is only one event after the `to_token`, the first and last event will be same event. " - + "This is probably a mistake in assembling the `last_membership_change_by_room_id_after_to_token`" - + "/`first_membership_change_by_room_id_after_to_token` dicts above." - ) - # TODO: Instead of reading from `unsigned`, refactor this to use the - # `current_state_delta_stream` table in the future. Probably a new - # `get_membership_changes_for_user()` function that uses - # `current_state_delta_stream` with a join to `room_memberships`. This would - # help in state reset scenarios since `prev_content` is looking at the - # current branch vs the current room state. This is all just data given to - # the client so no real harm to data integrity, but we'd like to be nice to - # the client. Since the `current_state_delta_stream` table is new, it - # doesn't have all events in it. Since this is Sliding Sync, if we ever need - # to, we can signal the client to throw all of their state away by sending - # "operation: RESET". - prev_content = first_membership_change_after_to_token.unsigned.get( - "prev_content", {} - ) - prev_membership = prev_content.get("membership", None) - prev_sender = first_membership_change_after_to_token.unsigned.get( - "prev_sender", None + room_id, + first_membership_change_after_to_token, + ) in first_membership_change_by_room_id_after_to_token.items(): + # One of these should exist to be a valid row in `current_state_delta_stream` + assert ( + first_membership_change_after_to_token.event_id is not None + or first_membership_change_after_to_token.prev_event_id is not None ) - # Check if the previous membership (membership that applies to the from/to - # range) should be included in our `sync_room_id_set` - should_prev_membership_be_included = ( - prev_membership is not None - and prev_sender is not None - and filter_membership_for_sync( - membership=prev_membership, - user_id=user_id, - sender=prev_sender, + # If the membership change was added after the `to_token`, we need to remove + # it + if first_membership_change_after_to_token.prev_event_id is None: + sync_room_id_set.pop(room_id, None) + # From the first membership event after the `to_token`, we need to step + # backward to the previous membership that would apply to the from/to range. + else: + prev_event_ids_in_from_to_range.append( + first_membership_change_after_to_token.prev_event_id ) - ) - # Check if the last membership (membership that applies to our snapshot) was - # already included in our `sync_room_id_set` - was_last_membership_already_included = filter_membership_for_sync( - membership=last_membership_change_after_to_token.membership, + # Fetch the previous membership events that apply to the from/to range and fixup + # our working list. + prev_events_in_from_to_range = await self.store.get_events( + prev_event_ids_in_from_to_range + ) + for prev_event_in_from_to_range in prev_events_in_from_to_range.values(): + # Update if the membership should be included + if filter_membership_for_sync( + membership=prev_event_in_from_to_range.membership, user_id=user_id, - sender=last_membership_change_after_to_token.sender, - ) - - # 1a) Add back rooms that the user left after the `to_token` - # - # For example, if the last membership event after the `to_token` is a leave - # event, then the room was excluded from `sync_room_id_set` when we first - # crafted it above. We should add these rooms back as long as the user also - # was part of the room before the `to_token`. - if ( - not was_last_membership_already_included - and should_prev_membership_be_included + sender=prev_event_in_from_to_range.sender, ): - sync_room_id_set[room_id] = convert_event_to_rooms_for_user( - last_membership_change_after_to_token + sync_room_id_set[prev_event_in_from_to_range.room_id] = ( + convert_event_to_rooms_for_user(prev_event_in_from_to_range) ) - # 1b) Remove rooms that the user joined (hasn't left) after the `to_token` - # - # For example, if the last membership event after the `to_token` is a "join" - # event, then the room was included `sync_room_id_set` when we first crafted - # it above. We should remove these rooms as long as the user also wasn't - # part of the room before the `to_token`. - elif ( - was_last_membership_already_included - and not should_prev_membership_be_included - ): - del sync_room_id_set[room_id] + # Otherwise, remove it + else: + sync_room_id_set.pop(prev_event_in_from_to_range.room_id, None) - # 2) ----------------------------------------------------- - # We fix-up newly_left rooms after the first fixup because it may have removed - # some left rooms that we can figure out are newly_left in the following code + # TODO: Add back newly_left rooms - # 2) Fetch membership changes that fall in the range from `from_token` up to `to_token` - membership_change_events_in_from_to_range = [] - if from_token: - membership_change_events_in_from_to_range = ( - await self.store.get_membership_changes_for_user( - user_id, - from_key=from_token.room_key, - to_key=to_token.room_key, - excluded_rooms=self.rooms_to_exclude_globally, - ) - ) + # Since we fetched the users room list at some point in time after the from/to + # tokens, we need to revert/rewind some membership changes to match the point in + # time of the `to_token`. In particular, we need to make these fixups: + # + # - 1a) Remove rooms that the user joined after the `to_token` + # - 1b) Add back rooms that the user left after the `to_token` + # - 2) Add back newly_left rooms (> `from_token` and <= `to_token`) - # 2) Assemble a list of the last membership events in some given ranges. Someone - # could have left and joined multiple times during the given range but we only - # care about end-result so we grab the last one. - last_membership_change_by_room_id_in_from_to_range: Dict[str, EventBase] = {} - for event in membership_change_events_in_from_to_range: - last_membership_change_by_room_id_in_from_to_range[event.room_id] = event + # # 1) ----------------------------------------------------- - # 2) Fixup - for ( - last_membership_change_in_from_to_range - ) in last_membership_change_by_room_id_in_from_to_range.values(): - room_id = last_membership_change_in_from_to_range.room_id + # # 1) Fetch membership changes that fall in the range from `to_token` up to + # # `membership_snapshot_token` + # # + # # If our `to_token` is already the same or ahead of the latest room membership + # # for the user, we don't need to do any "2)" fix-ups and can just straight-up + # # use the room list from the snapshot as a base (nothing has changed) + # membership_change_events_after_to_token = [] + # if not membership_snapshot_token.is_before_or_eq(to_token.room_key): + # membership_change_events_after_to_token = ( + # await self.store.get_membership_changes_for_user( + # user_id, + # from_key=to_token.room_key, + # to_key=membership_snapshot_token, + # excluded_rooms=self.rooms_to_exclude_globally, + # ) + # ) - # 2) Add back newly_left rooms (> `from_token` and <= `to_token`). We - # include newly_left rooms because the last event that the user should see - # is their own leave event - if last_membership_change_in_from_to_range.membership == Membership.LEAVE: - sync_room_id_set[room_id] = convert_event_to_rooms_for_user( - last_membership_change_in_from_to_range - ) + # # 1) Assemble a list of the last membership events in some given ranges. Someone + # # could have left and joined multiple times during the given range but we only + # # care about end-result so we grab the last one. + # last_membership_change_by_room_id_after_to_token: Dict[str, EventBase] = {} + # # We also need the first membership event after the `to_token` so we can step + # # backward to the previous membership that would apply to the from/to range. + # first_membership_change_by_room_id_after_to_token: Dict[str, EventBase] = {} + # for event in membership_change_events_after_to_token: + # last_membership_change_by_room_id_after_to_token[event.room_id] = event + # # Only set if we haven't already set it + # first_membership_change_by_room_id_after_to_token.setdefault( + # event.room_id, event + # ) + + # # 1) Fixup + # for ( + # last_membership_change_after_to_token + # ) in last_membership_change_by_room_id_after_to_token.values(): + # room_id = last_membership_change_after_to_token.room_id + + # # We want to find the first membership change after the `to_token` then step + # # backward to know the membership in the from/to range. + # first_membership_change_after_to_token = ( + # first_membership_change_by_room_id_after_to_token.get(room_id) + # ) + # assert first_membership_change_after_to_token is not None, ( + # "If there was a `last_membership_change_after_to_token` that we're iterating over, " + # + "then there should be corresponding a first change. For example, even if there " + # + "is only one event after the `to_token`, the first and last event will be same event. " + # + "This is probably a mistake in assembling the `last_membership_change_by_room_id_after_to_token`" + # + "/`first_membership_change_by_room_id_after_to_token` dicts above." + # ) + # # TODO: Instead of reading from `unsigned`, refactor this to use the + # # `current_state_delta_stream` table in the future. Probably a new + # # `get_membership_changes_for_user()` function that uses + # # `current_state_delta_stream` with a join to `room_memberships`. This would + # # help in state reset scenarios since `prev_content` is looking at the + # # current branch vs the current room state. This is all just data given to + # # the client so no real harm to data integrity, but we'd like to be nice to + # # the client. Since the `current_state_delta_stream` table is new, it + # # doesn't have all events in it. Since this is Sliding Sync, if we ever need + # # to, we can signal the client to throw all of their state away by sending + # # "operation: RESET". + # prev_content = first_membership_change_after_to_token.unsigned.get( + # "prev_content", {} + # ) + # prev_membership = prev_content.get("membership", None) + # prev_sender = first_membership_change_after_to_token.unsigned.get( + # "prev_sender", None + # ) + + # # Check if the previous membership (membership that applies to the from/to + # # range) should be included in our `sync_room_id_set` + # should_prev_membership_be_included = ( + # prev_membership is not None + # and prev_sender is not None + # and filter_membership_for_sync( + # membership=prev_membership, + # user_id=user_id, + # sender=prev_sender, + # ) + # ) + + # # Check if the last membership (membership that applies to our snapshot) was + # # already included in our `sync_room_id_set` + # was_last_membership_already_included = filter_membership_for_sync( + # membership=last_membership_change_after_to_token.membership, + # user_id=user_id, + # sender=last_membership_change_after_to_token.sender, + # ) + + # # 1a) Add back rooms that the user left after the `to_token` + # # + # # For example, if the last membership event after the `to_token` is a leave + # # event, then the room was excluded from `sync_room_id_set` when we first + # # crafted it above. We should add these rooms back as long as the user also + # # was part of the room before the `to_token`. + # if ( + # not was_last_membership_already_included + # and should_prev_membership_be_included + # ): + # # TODO: Assign the correct membership event at the `to_token` here + # # (currently we're setting it as the last event after the `to_token`) + # sync_room_id_set[room_id] = convert_event_to_rooms_for_user( + # last_membership_change_after_to_token + # ) + # # 1b) Remove rooms that the user joined (hasn't left) after the `to_token` + # # + # # For example, if the last membership event after the `to_token` is a "join" + # # event, then the room was included `sync_room_id_set` when we first crafted + # # it above. We should remove these rooms as long as the user also wasn't + # # part of the room before the `to_token`. + # elif ( + # was_last_membership_already_included + # and not should_prev_membership_be_included + # ): + # del sync_room_id_set[room_id] + + # # 2) ----------------------------------------------------- + # # We fix-up newly_left rooms after the first fixup because it may have removed + # # some left rooms that we can figure out are newly_left in the following code + + # # 2) Fetch membership changes that fall in the range from `from_token` up to `to_token` + # membership_change_events_in_from_to_range = [] + # if from_token: + # membership_change_events_in_from_to_range = ( + # await self.store.get_membership_changes_for_user( + # user_id, + # from_key=from_token.room_key, + # to_key=to_token.room_key, + # excluded_rooms=self.rooms_to_exclude_globally, + # ) + # ) + + # # 2) Assemble a list of the last membership events in some given ranges. Someone + # # could have left and joined multiple times during the given range but we only + # # care about end-result so we grab the last one. + # last_membership_change_by_room_id_in_from_to_range: Dict[str, EventBase] = {} + # for event in membership_change_events_in_from_to_range: + # last_membership_change_by_room_id_in_from_to_range[event.room_id] = event + + # # 2) Fixup + # for ( + # last_membership_change_in_from_to_range + # ) in last_membership_change_by_room_id_in_from_to_range.values(): + # room_id = last_membership_change_in_from_to_range.room_id + + # # 2) Add back newly_left rooms (> `from_token` and <= `to_token`). We + # # include newly_left rooms because the last event that the user should see + # # is their own leave event + # if last_membership_change_in_from_to_range.membership == Membership.LEAVE: + # sync_room_id_set[room_id] = convert_event_to_rooms_for_user( + # last_membership_change_in_from_to_range + # ) return sync_room_id_set diff --git a/synapse/storage/databases/main/stream.py b/synapse/storage/databases/main/stream.py index c21e69ecda..f5de23080d 100644 --- a/synapse/storage/databases/main/stream.py +++ b/synapse/storage/databases/main/stream.py @@ -44,6 +44,7 @@ what sort order was used: import logging from typing import ( TYPE_CHECKING, + AbstractSet, Any, Collection, Dict, @@ -62,7 +63,7 @@ from typing_extensions import Literal from twisted.internet import defer -from synapse.api.constants import Direction +from synapse.api.constants import Direction, EventTypes from synapse.api.filtering import Filter from synapse.events import EventBase from synapse.logging.context import make_deferred_yieldable, run_in_background @@ -111,6 +112,24 @@ class _EventsAround: end: RoomStreamToken +@attr.s(slots=True, frozen=True, auto_attribs=True) +class CurrentStateDeltaMembership: + """ + Attributes: + event_id: The "current" membership event ID in this room. May be `None` if the + server is no longer in the room or a state reset happened. + prev_event_id: The previous membership event in this room that was replaced by + the "current" one. May be `None` if there was no previous membership event. + room_id: The room ID of the membership event. + """ + + event_id: Optional[str] + prev_event_id: Optional[str] + room_id: str + # Could be useful but we're not using it yet. + # event_pos: PersistedEventPosition + + def generate_pagination_where_clause( direction: Direction, column_names: Tuple[str, str], @@ -390,6 +409,42 @@ def _filter_results( return True +def _filter_results_by_stream( + lower_token: Optional[RoomStreamToken], + upper_token: Optional[RoomStreamToken], + instance_name: str, + stream_ordering: int, +) -> bool: + """ + Note: This function only works with "live" tokens with `stream_ordering` only. + + Returns True if the event persisted by the given instance at the given + topological/stream_ordering falls between the two tokens (taking a None + token to mean unbounded). + + Used to filter results from fetching events in the DB against the given + tokens. This is necessary to handle the case where the tokens include + position maps, which we handle by fetching more than necessary from the DB + and then filtering (rather than attempting to construct a complicated SQL + query). + """ + if lower_token: + assert lower_token.topological is None + + # If these are live tokens we compare the stream ordering against the + # writers stream position. + if stream_ordering <= lower_token.get_stream_pos_for_instance(instance_name): + return False + + if upper_token: + assert upper_token.topological is None + + if upper_token.get_stream_pos_for_instance(instance_name) < stream_ordering: + return False + + return True + + def filter_to_clause(event_filter: Optional[Filter]) -> Tuple[str, List[str]]: # NB: This may create SQL clauses that don't optimise well (and we don't # have indices on all possible clauses). E.g. it may create @@ -731,6 +786,94 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): return ret, key + async def get_current_state_delta_membership_changes_for_user( + self, + user_id: str, + from_key: RoomStreamToken, + to_key: RoomStreamToken, + excluded_rooms: Optional[List[str]] = None, + ) -> List[CurrentStateDeltaMembership]: + """ + TODO + + Note: This function only works with "live" tokens with `stream_ordering` only. + + All such events whose stream ordering `s` lies in the range `from_key < s <= + to_key` are returned. Events are sorted by `stream_ordering` ascending. + """ + # Start by ruling out cases where a DB query is not necessary. + if from_key == to_key: + return [] + + if from_key: + has_changed = self._membership_stream_cache.has_entity_changed( + user_id, int(from_key.stream) + ) + if not has_changed: + return [] + + def f(txn: LoggingTransaction) -> List[CurrentStateDeltaMembership]: + # To handle tokens with a non-empty instance_map we fetch more + # results than necessary and then filter down + min_from_id = from_key.stream + max_to_id = to_key.get_max_stream_pos() + + args: List[Any] = [EventTypes.Member, user_id, min_from_id, max_to_id] + + # TODO: It would be good to assert that the `to_token` is >= + # the first row in `current_state_delta_stream` for the rooms we're + # interested in. Otherwise, we will end up with empty results and not know + # it. + + # Note: There is no index for `(type, state_key)` in + # `current_state_delta_stream`. We also can't just add an index for + # `event_id` and join the `room_memberships` table by `event_id` because it + # may be `null` in `current_state_delta_stream` so nothing will match (it's + # `null` when the server is no longer in the room or a state reset happened + # and it was unset). + sql = """ + SELECT s.event_id, s.prev_event_id, s.room_id, s.instance_name, s.stream_id + FROM current_state_delta_stream AS s + WHERE s.type = ? AND s.state_key = ? + AND s.stream_id > ? AND s.stream_id <= ? + ORDER BY s.stream_id ASC + """ + + txn.execute(sql, args) + + return [ + CurrentStateDeltaMembership( + event_id=event_id, + prev_event_id=prev_event_id, + room_id=room_id, + # event_pos=PersistedEventPosition( + # instance_name=instance_name, + # stream=stream_ordering, + # ), + ) + for event_id, prev_event_id, room_id, instance_name, stream_ordering in txn + if _filter_results_by_stream( + from_key, + to_key, + instance_name, + stream_ordering, + ) + ] + + current_state_delta_membership_changes = await self.db_pool.runInteraction( + "get_current_state_delta_membership_changes_for_user", f + ) + + rooms_to_exclude: AbstractSet[str] = set() + if excluded_rooms is not None: + rooms_to_exclude = set(excluded_rooms) + + return [ + membership_change + for membership_change in current_state_delta_membership_changes + if membership_change.room_id not in rooms_to_exclude + ] + @cancellable async def get_membership_changes_for_user( self, @@ -766,10 +909,10 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): ignore_room_clause = "" if excluded_rooms is not None and len(excluded_rooms) > 0: - ignore_room_clause = "AND e.room_id NOT IN (%s)" % ",".join( - "?" for _ in excluded_rooms + ignore_room_clause, ignore_room_args = make_in_list_sql_clause( + txn.database_engine, "e.room_id", excluded_rooms, negative=True ) - args = args + excluded_rooms + args += ignore_room_args sql = """ SELECT m.event_id, instance_name, topological_ordering, stream_ordering diff --git a/tests/handlers/test_sliding_sync.py b/tests/handlers/test_sliding_sync.py index df262400e4..694fd17a02 100644 --- a/tests/handlers/test_sliding_sync.py +++ b/tests/handlers/test_sliding_sync.py @@ -1029,7 +1029,7 @@ class GetSyncRoomIdsForUserTestCase(HomeserverTestCase): ), ) - def test_display_name_changes( + def test_display_name_changes_in_token_range( self, ) -> None: """ @@ -1102,6 +1102,77 @@ class GetSyncRoomIdsForUserTestCase(HomeserverTestCase): ), ) + def test_display_name_changes_before_and_after_token_range( + self, + ) -> None: + """ + Test that we point to the correct membership event even though there are no + membership events in the from/range but there are `displayname`/`avatar_url` + changes before/after the token range. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + + # We create the room with user2 so the room isn't left with no members when we + # leave and can still re-join. + room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok, is_public=True) + join_response = self.helper.join(room_id1, user1_id, tok=user1_tok) + # Update the displayname before the token range + displayname_change_before_token_range_response = self.helper.send_state( + room_id1, + event_type=EventTypes.Member, + state_key=user1_id, + body={ + "membership": Membership.JOIN, + "displayname": "displayname during token range", + }, + tok=user1_tok, + ) + + after_room1_token = self.event_sources.get_current_token() + + # Update the displayname after the token range + displayname_change_after_token_range_response = self.helper.send_state( + room_id1, + event_type=EventTypes.Member, + state_key=user1_id, + body={ + "membership": Membership.JOIN, + "displayname": "displayname after token range", + }, + tok=user1_tok, + ) + + room_id_results = self.get_success( + self.sliding_sync_handler.get_sync_room_ids_for_user( + UserID.from_string(user1_id), + from_token=after_room1_token, + to_token=after_room1_token, + ) + ) + + # Room should show up because we were joined before the from/to range + self.assertEqual(room_id_results.keys(), {room_id1}) + # It should be pointing to the latest membership event in the from/to range + self.assertEqual( + room_id_results[room_id1].event_id, + displayname_change_before_token_range_response["event_id"], + "Corresponding map to disambiguate the opaque event IDs: " + + str( + { + "join_response": join_response["event_id"], + "displayname_change_before_token_range_response": displayname_change_before_token_range_response[ + "event_id" + ], + "displayname_change_after_token_range_response": displayname_change_after_token_range_response[ + "event_id" + ], + } + ), + ) + def test_display_name_changes_leave_after_token_range( self, ) -> None: From 6c791a88b34b5646324a22584d5f84d99501ff34 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 25 Jun 2024 21:07:19 -0500 Subject: [PATCH 36/84] WIP: Add back `newly_left` --- synapse/handlers/sliding_sync.py | 113 +++++++++++++++++------ synapse/storage/databases/main/stream.py | 18 +++- 2 files changed, 101 insertions(+), 30 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index 2e24b0c338..5603fdeb38 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -375,11 +375,6 @@ class SlidingSyncHandler: # instead from the time of the `to_token`. room_for_user.room_id: room_for_user for room_for_user in room_for_user_list - if filter_membership_for_sync( - membership=room_for_user.membership, - user_id=user_id, - sender=room_for_user.sender, - ) } # Get the `RoomStreamToken` that represents the spot we queried up to when we got @@ -408,6 +403,23 @@ class SlidingSyncHandler: instance_map=immutabledict(instance_to_max_stream_ordering_map), ) + # Since we fetched the users room list at some point in time after the from/to + # tokens, we need to revert/rewind some membership changes to match the point in + # time of the `to_token`. In particular, we need to make these fixups: + # + # - 1a) Remove rooms that the user joined after the `to_token` + # - 1b) Add back rooms that the user left after the `to_token` + # - 1c) Update room membership events to the point in time of the `to_token` + # - 2) Add back newly_left rooms (> `from_token` and <= `to_token`) + + # 1) ----------------------------------------------------- + + # 1) Fetch membership changes that fall in the range from `to_token` up to + # `membership_snapshot_token` + # + # If our `to_token` is already the same or ahead of the latest room membership + # for the user, we don't need to do any "2)" fix-ups and can just straight-up + # use the room list from the snapshot as a base (nothing has changed) current_state_delta_membership_changes_after_to_token = [] if not membership_snapshot_token.is_before_or_eq(to_token.room_key): current_state_delta_membership_changes_after_to_token = ( @@ -419,8 +431,9 @@ class SlidingSyncHandler: ) ) - # We need the first membership event after the `to_token` so we can step - # backward to the previous membership that would apply to the from/to range. + # 1) Assemble a list of the first membership event after the `to_token` so we can + # step backward to the previous membership that would apply to the from/to + # range. first_membership_change_by_room_id_after_to_token: Dict[ str, CurrentStateDeltaMembership ] = {} @@ -430,6 +443,8 @@ class SlidingSyncHandler: membership_change.room_id, membership_change ) + # 1) Fixup part 1 + # # Since we fetched a snapshot of the users room list at some point in time after # the from/to tokens, we need to revert/rewind some membership changes to match # the point in time of the `to_token`. @@ -444,37 +459,81 @@ class SlidingSyncHandler: or first_membership_change_after_to_token.prev_event_id is not None ) - # If the membership change was added after the `to_token`, we need to remove - # it + # 1a) Remove rooms that the user joined after the `to_token` if first_membership_change_after_to_token.prev_event_id is None: sync_room_id_set.pop(room_id, None) - # From the first membership event after the `to_token`, we need to step - # backward to the previous membership that would apply to the from/to range. + # 1b) 1c) From the first membership event after the `to_token`, step backward to the + # previous membership that would apply to the from/to range. else: prev_event_ids_in_from_to_range.append( first_membership_change_after_to_token.prev_event_id ) - # Fetch the previous membership events that apply to the from/to range and fixup - # our working list. + # 1) Fixup part 2 + # + # 1b) 1c) Fetch the previous membership events that apply to the from/to range + # and fixup our working list. prev_events_in_from_to_range = await self.store.get_events( prev_event_ids_in_from_to_range ) for prev_event_in_from_to_range in prev_events_in_from_to_range.values(): - # Update if the membership should be included - if filter_membership_for_sync( - membership=prev_event_in_from_to_range.membership, - user_id=user_id, - sender=prev_event_in_from_to_range.sender, - ): - sync_room_id_set[prev_event_in_from_to_range.room_id] = ( - convert_event_to_rooms_for_user(prev_event_in_from_to_range) - ) - # Otherwise, remove it - else: - sync_room_id_set.pop(prev_event_in_from_to_range.room_id, None) + # 1b) 1c) Update the membership with what we found + sync_room_id_set[prev_event_in_from_to_range.room_id] = ( + convert_event_to_rooms_for_user(prev_event_in_from_to_range) + ) - # TODO: Add back newly_left rooms + filtered_sync_room_id_set = { + room_id: room_for_user + for room_id, room_for_user in sync_room_id_set.items() + if filter_membership_for_sync( + membership=room_for_user.membership, + user_id=user_id, + sender=room_for_user.sender, + ) + } + + # 2) ----------------------------------------------------- + # We fix-up newly_left rooms after the first fixup because it may have removed + # some left rooms that we can figure out are newly_left in the following code + + # 2) Fetch membership changes that fall in the range from `from_token` up to `to_token` + current_state_delta_membership_changes_in_from_to_range = [] + if from_token: + current_state_delta_membership_changes_in_from_to_range = ( + await self.store.get_current_state_delta_membership_changes_for_user( + user_id, + from_key=from_token.room_key, + to_key=to_token.room_key, + excluded_rooms=self.rooms_to_exclude_globally, + ) + ) + + # 2) Assemble a list of the last membership events in some given ranges. Someone + # could have left and joined multiple times during the given range but we only + # care about end-result so we grab the last one. + last_membership_change_by_room_id_in_from_to_range: Dict[ + str, CurrentStateDeltaMembership + ] = {} + for ( + membership_change + ) in current_state_delta_membership_changes_in_from_to_range: + last_membership_change_by_room_id_in_from_to_range[ + membership_change.room_id + ] = membership_change + + # 2) Fixup + for ( + last_membership_change_in_from_to_range + ) in last_membership_change_by_room_id_in_from_to_range.values(): + room_id = last_membership_change_in_from_to_range.room_id + + # 2) Add back newly_left rooms (> `from_token` and <= `to_token`). We + # include newly_left rooms because the last event that the user should see + # is their own leave event + if last_membership_change_in_from_to_range.membership == Membership.LEAVE: + filtered_sync_room_id_set[room_id] = convert_event_to_rooms_for_user( + last_membership_change_in_from_to_range + ) # Since we fetched the users room list at some point in time after the from/to # tokens, we need to revert/rewind some membership changes to match the point in @@ -638,7 +697,7 @@ class SlidingSyncHandler: # last_membership_change_in_from_to_range # ) - return sync_room_id_set + return filtered_sync_room_id_set async def filter_rooms( self, diff --git a/synapse/storage/databases/main/stream.py b/synapse/storage/databases/main/stream.py index f5de23080d..595245e70e 100644 --- a/synapse/storage/databases/main/stream.py +++ b/synapse/storage/databases/main/stream.py @@ -63,7 +63,7 @@ from typing_extensions import Literal from twisted.internet import defer -from synapse.api.constants import Direction, EventTypes +from synapse.api.constants import Direction, EventTypes, Membership from synapse.api.filtering import Filter from synapse.events import EventBase from synapse.logging.context import make_deferred_yieldable, run_in_background @@ -126,6 +126,7 @@ class CurrentStateDeltaMembership: event_id: Optional[str] prev_event_id: Optional[str] room_id: str + membership: str # Could be useful but we're not using it yet. # event_pos: PersistedEventPosition @@ -832,7 +833,13 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): # `null` when the server is no longer in the room or a state reset happened # and it was unset). sql = """ - SELECT s.event_id, s.prev_event_id, s.room_id, s.instance_name, s.stream_id + SELECT + s.event_id, + s.prev_event_id, + s.room_id, + s.instance_name, + s.stream_id, + m.membership FROM current_state_delta_stream AS s WHERE s.type = ? AND s.state_key = ? AND s.stream_id > ? AND s.stream_id <= ? @@ -846,12 +853,17 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): event_id=event_id, prev_event_id=prev_event_id, room_id=room_id, + # We can assume that the membership is `LEAVE` as a default. This + # will happen when `current_state_delta_stream.event_id` is null + # because it was unset due to a state reset or the server is no + # longer in the room (everyone on our local server left). + membership=membership if membership else Membership.LEAVE, # event_pos=PersistedEventPosition( # instance_name=instance_name, # stream=stream_ordering, # ), ) - for event_id, prev_event_id, room_id, instance_name, stream_ordering in txn + for event_id, prev_event_id, room_id, instance_name, stream_ordering, membership in txn if _filter_results_by_stream( from_key, to_key, From 27d74b023e1a5679b4fbe6a5b4f6efaada8ec3b0 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 25 Jun 2024 22:20:27 -0500 Subject: [PATCH 37/84] Iterate --- synapse/handlers/sliding_sync.py | 33 ++++--- synapse/storage/databases/main/stream.py | 115 +++++++++-------------- 2 files changed, 68 insertions(+), 80 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index 5603fdeb38..dbbbbc66bf 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -443,22 +443,16 @@ class SlidingSyncHandler: membership_change.room_id, membership_change ) - # 1) Fixup part 1 + # 1) Fixup # # Since we fetched a snapshot of the users room list at some point in time after # the from/to tokens, we need to revert/rewind some membership changes to match # the point in time of the `to_token`. - prev_event_ids_in_from_to_range = [] + prev_event_ids_in_from_to_range: List[str] = [] for ( room_id, first_membership_change_after_to_token, ) in first_membership_change_by_room_id_after_to_token.items(): - # One of these should exist to be a valid row in `current_state_delta_stream` - assert ( - first_membership_change_after_to_token.event_id is not None - or first_membership_change_after_to_token.prev_event_id is not None - ) - # 1a) Remove rooms that the user joined after the `to_token` if first_membership_change_after_to_token.prev_event_id is None: sync_room_id_set.pop(room_id, None) @@ -469,7 +463,7 @@ class SlidingSyncHandler: first_membership_change_after_to_token.prev_event_id ) - # 1) Fixup part 2 + # 1) Fixup (more) # # 1b) 1c) Fetch the previous membership events that apply to the from/to range # and fixup our working list. @@ -522,18 +516,33 @@ class SlidingSyncHandler: ] = membership_change # 2) Fixup + last_membership_event_ids_to_include_in_from_to_range: List[str] = [] for ( last_membership_change_in_from_to_range ) in last_membership_change_by_room_id_in_from_to_range.values(): room_id = last_membership_change_in_from_to_range.room_id + sync_room_id_set[room_id] + # 2) Add back newly_left rooms (> `from_token` and <= `to_token`). We # include newly_left rooms because the last event that the user should see # is their own leave event if last_membership_change_in_from_to_range.membership == Membership.LEAVE: - filtered_sync_room_id_set[room_id] = convert_event_to_rooms_for_user( - last_membership_change_in_from_to_range - ) + # Save the look-up if we already have the `leave` event + if sync_room_id_set[room_id].event_id == last_membership_change_in_from_to_range.prev_event_id:: + filtered_sync_room_id_set[room_id] = sync_room_id_set[room_id] + else: + last_membership_event_ids_to_include_in_from_to_range.append(last_membership_change_in_from_to_range.event_id) + + # TODO + # last_membership_events_to_include_in_from_to_range = await self.store.get_events( + # last_membership_event_ids_to_include_in_from_to_range + # ) + # for prev_event_in_from_to_range in prev_events_in_from_to_range.values(): + # # 1b) 1c) Update the membership with what we found + # sync_room_id_set[prev_event_in_from_to_range.room_id] = ( + # convert_event_to_rooms_for_user(prev_event_in_from_to_range) + # ) # Since we fetched the users room list at some point in time after the from/to # tokens, we need to revert/rewind some membership changes to match the point in diff --git a/synapse/storage/databases/main/stream.py b/synapse/storage/databases/main/stream.py index 595245e70e..ed571b0de7 100644 --- a/synapse/storage/databases/main/stream.py +++ b/synapse/storage/databases/main/stream.py @@ -63,7 +63,7 @@ from typing_extensions import Literal from twisted.internet import defer -from synapse.api.constants import Direction, EventTypes, Membership +from synapse.api.constants import Direction from synapse.api.filtering import Filter from synapse.events import EventBase from synapse.logging.context import make_deferred_yieldable, run_in_background @@ -116,14 +116,13 @@ class _EventsAround: class CurrentStateDeltaMembership: """ Attributes: - event_id: The "current" membership event ID in this room. May be `None` if the - server is no longer in the room or a state reset happened. + event_id: The "current" membership event ID in this room. prev_event_id: The previous membership event in this room that was replaced by the "current" one. May be `None` if there was no previous membership event. room_id: The room ID of the membership event. """ - event_id: Optional[str] + event_id: str prev_event_id: Optional[str] room_id: str membership: str @@ -410,42 +409,6 @@ def _filter_results( return True -def _filter_results_by_stream( - lower_token: Optional[RoomStreamToken], - upper_token: Optional[RoomStreamToken], - instance_name: str, - stream_ordering: int, -) -> bool: - """ - Note: This function only works with "live" tokens with `stream_ordering` only. - - Returns True if the event persisted by the given instance at the given - topological/stream_ordering falls between the two tokens (taking a None - token to mean unbounded). - - Used to filter results from fetching events in the DB against the given - tokens. This is necessary to handle the case where the tokens include - position maps, which we handle by fetching more than necessary from the DB - and then filtering (rather than attempting to construct a complicated SQL - query). - """ - if lower_token: - assert lower_token.topological is None - - # If these are live tokens we compare the stream ordering against the - # writers stream position. - if stream_ordering <= lower_token.get_stream_pos_for_instance(instance_name): - return False - - if upper_token: - assert upper_token.topological is None - - if upper_token.get_stream_pos_for_instance(instance_name) < stream_ordering: - return False - - return True - - def filter_to_clause(event_filter: Optional[Filter]) -> Tuple[str, List[str]]: # NB: This may create SQL clauses that don't optimise well (and we don't # have indices on all possible clauses). E.g. it may create @@ -819,58 +782,74 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): min_from_id = from_key.stream max_to_id = to_key.get_max_stream_pos() - args: List[Any] = [EventTypes.Member, user_id, min_from_id, max_to_id] + args: List[Any] = [user_id, min_from_id, max_to_id] # TODO: It would be good to assert that the `to_token` is >= # the first row in `current_state_delta_stream` for the rooms we're # interested in. Otherwise, we will end up with empty results and not know # it. - # Note: There is no index for `(type, state_key)` in - # `current_state_delta_stream`. We also can't just add an index for - # `event_id` and join the `room_memberships` table by `event_id` because it - # may be `null` in `current_state_delta_stream` so nothing will match (it's - # `null` when the server is no longer in the room or a state reset happened - # and it was unset). + # We have to look-up events by `stream_ordering` because + # `current_state_delta_stream.event_id` can be `null` if the server is no + # longer in the room or a state reset happened and it was unset. + # `stream_ordering` is unique across the Synapse instance so this should + # work fine. sql = """ SELECT - s.event_id, + e.event_id, s.prev_event_id, s.room_id, s.instance_name, s.stream_id, + e.topological_ordering, m.membership FROM current_state_delta_stream AS s - WHERE s.type = ? AND s.state_key = ? + INNER JOIN events AS e ON e.stream_ordering = s.stream_id + INNER JOIN room_memberships AS m ON m.event_id = e.event_id + WHERE m.user_id = ? AND s.stream_id > ? AND s.stream_id <= ? ORDER BY s.stream_id ASC """ txn.execute(sql, args) - return [ - CurrentStateDeltaMembership( - event_id=event_id, - prev_event_id=prev_event_id, - room_id=room_id, - # We can assume that the membership is `LEAVE` as a default. This - # will happen when `current_state_delta_stream.event_id` is null - # because it was unset due to a state reset or the server is no - # longer in the room (everyone on our local server left). - membership=membership if membership else Membership.LEAVE, - # event_pos=PersistedEventPosition( - # instance_name=instance_name, - # stream=stream_ordering, - # ), - ) - for event_id, prev_event_id, room_id, instance_name, stream_ordering, membership in txn - if _filter_results_by_stream( + membership_changes: List[CurrentStateDeltaMembership] = [] + for ( + event_id, + prev_event_id, + room_id, + instance_name, + stream_ordering, + topological_ordering, + membership, + ) in txn: + assert event_id is not None + # `prev_event_id` can be `None` + assert room_id is not None + assert instance_name is not None + assert stream_ordering is not None + assert topological_ordering is not None + assert membership is not None + + if _filter_results( from_key, to_key, instance_name, + topological_ordering, stream_ordering, - ) - ] + ): + membership_changes.append( + CurrentStateDeltaMembership( + event_id=event_id, + prev_event_id=prev_event_id, + room_id=room_id, + membership=membership, + # event_pos=PersistedEventPosition( + # instance_name=instance_name, + # stream=stream_ordering, + # ), + ) + ) current_state_delta_membership_changes = await self.db_pool.runInteraction( "get_current_state_delta_membership_changes_for_user", f From fb8fbd489cb920b6d29282e3b2912a311bade162 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 25 Jun 2024 22:37:45 -0500 Subject: [PATCH 38/84] Just fetch full events for `get_current_state_delta_membership_changes_for_user(...)` Makes downstream logic simpler and although we may look-up some events we don't use, the lookup is all done in one go instead of fetching events from event_ids in a couple different places. --- synapse/handlers/sliding_sync.py | 203 +---------------------- synapse/storage/databases/main/stream.py | 54 +++++- 2 files changed, 51 insertions(+), 206 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index dbbbbc66bf..5d63099499 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -448,34 +448,20 @@ class SlidingSyncHandler: # Since we fetched a snapshot of the users room list at some point in time after # the from/to tokens, we need to revert/rewind some membership changes to match # the point in time of the `to_token`. - prev_event_ids_in_from_to_range: List[str] = [] for ( room_id, first_membership_change_after_to_token, ) in first_membership_change_by_room_id_after_to_token.items(): # 1a) Remove rooms that the user joined after the `to_token` - if first_membership_change_after_to_token.prev_event_id is None: + if first_membership_change_after_to_token.prev_event is None: sync_room_id_set.pop(room_id, None) # 1b) 1c) From the first membership event after the `to_token`, step backward to the # previous membership that would apply to the from/to range. else: - prev_event_ids_in_from_to_range.append( - first_membership_change_after_to_token.prev_event_id + sync_room_id_set[room_id] = convert_event_to_rooms_for_user( + first_membership_change_after_to_token.prev_event ) - # 1) Fixup (more) - # - # 1b) 1c) Fetch the previous membership events that apply to the from/to range - # and fixup our working list. - prev_events_in_from_to_range = await self.store.get_events( - prev_event_ids_in_from_to_range - ) - for prev_event_in_from_to_range in prev_events_in_from_to_range.values(): - # 1b) 1c) Update the membership with what we found - sync_room_id_set[prev_event_in_from_to_range.room_id] = ( - convert_event_to_rooms_for_user(prev_event_in_from_to_range) - ) - filtered_sync_room_id_set = { room_id: room_for_user for room_id, room_for_user in sync_room_id_set.items() @@ -516,195 +502,18 @@ class SlidingSyncHandler: ] = membership_change # 2) Fixup - last_membership_event_ids_to_include_in_from_to_range: List[str] = [] for ( last_membership_change_in_from_to_range ) in last_membership_change_by_room_id_in_from_to_range.values(): room_id = last_membership_change_in_from_to_range.room_id - sync_room_id_set[room_id] - # 2) Add back newly_left rooms (> `from_token` and <= `to_token`). We # include newly_left rooms because the last event that the user should see # is their own leave event if last_membership_change_in_from_to_range.membership == Membership.LEAVE: - # Save the look-up if we already have the `leave` event - if sync_room_id_set[room_id].event_id == last_membership_change_in_from_to_range.prev_event_id:: - filtered_sync_room_id_set[room_id] = sync_room_id_set[room_id] - else: - last_membership_event_ids_to_include_in_from_to_range.append(last_membership_change_in_from_to_range.event_id) - - # TODO - # last_membership_events_to_include_in_from_to_range = await self.store.get_events( - # last_membership_event_ids_to_include_in_from_to_range - # ) - # for prev_event_in_from_to_range in prev_events_in_from_to_range.values(): - # # 1b) 1c) Update the membership with what we found - # sync_room_id_set[prev_event_in_from_to_range.room_id] = ( - # convert_event_to_rooms_for_user(prev_event_in_from_to_range) - # ) - - # Since we fetched the users room list at some point in time after the from/to - # tokens, we need to revert/rewind some membership changes to match the point in - # time of the `to_token`. In particular, we need to make these fixups: - # - # - 1a) Remove rooms that the user joined after the `to_token` - # - 1b) Add back rooms that the user left after the `to_token` - # - 2) Add back newly_left rooms (> `from_token` and <= `to_token`) - - # # 1) ----------------------------------------------------- - - # # 1) Fetch membership changes that fall in the range from `to_token` up to - # # `membership_snapshot_token` - # # - # # If our `to_token` is already the same or ahead of the latest room membership - # # for the user, we don't need to do any "2)" fix-ups and can just straight-up - # # use the room list from the snapshot as a base (nothing has changed) - # membership_change_events_after_to_token = [] - # if not membership_snapshot_token.is_before_or_eq(to_token.room_key): - # membership_change_events_after_to_token = ( - # await self.store.get_membership_changes_for_user( - # user_id, - # from_key=to_token.room_key, - # to_key=membership_snapshot_token, - # excluded_rooms=self.rooms_to_exclude_globally, - # ) - # ) - - # # 1) Assemble a list of the last membership events in some given ranges. Someone - # # could have left and joined multiple times during the given range but we only - # # care about end-result so we grab the last one. - # last_membership_change_by_room_id_after_to_token: Dict[str, EventBase] = {} - # # We also need the first membership event after the `to_token` so we can step - # # backward to the previous membership that would apply to the from/to range. - # first_membership_change_by_room_id_after_to_token: Dict[str, EventBase] = {} - # for event in membership_change_events_after_to_token: - # last_membership_change_by_room_id_after_to_token[event.room_id] = event - # # Only set if we haven't already set it - # first_membership_change_by_room_id_after_to_token.setdefault( - # event.room_id, event - # ) - - # # 1) Fixup - # for ( - # last_membership_change_after_to_token - # ) in last_membership_change_by_room_id_after_to_token.values(): - # room_id = last_membership_change_after_to_token.room_id - - # # We want to find the first membership change after the `to_token` then step - # # backward to know the membership in the from/to range. - # first_membership_change_after_to_token = ( - # first_membership_change_by_room_id_after_to_token.get(room_id) - # ) - # assert first_membership_change_after_to_token is not None, ( - # "If there was a `last_membership_change_after_to_token` that we're iterating over, " - # + "then there should be corresponding a first change. For example, even if there " - # + "is only one event after the `to_token`, the first and last event will be same event. " - # + "This is probably a mistake in assembling the `last_membership_change_by_room_id_after_to_token`" - # + "/`first_membership_change_by_room_id_after_to_token` dicts above." - # ) - # # TODO: Instead of reading from `unsigned`, refactor this to use the - # # `current_state_delta_stream` table in the future. Probably a new - # # `get_membership_changes_for_user()` function that uses - # # `current_state_delta_stream` with a join to `room_memberships`. This would - # # help in state reset scenarios since `prev_content` is looking at the - # # current branch vs the current room state. This is all just data given to - # # the client so no real harm to data integrity, but we'd like to be nice to - # # the client. Since the `current_state_delta_stream` table is new, it - # # doesn't have all events in it. Since this is Sliding Sync, if we ever need - # # to, we can signal the client to throw all of their state away by sending - # # "operation: RESET". - # prev_content = first_membership_change_after_to_token.unsigned.get( - # "prev_content", {} - # ) - # prev_membership = prev_content.get("membership", None) - # prev_sender = first_membership_change_after_to_token.unsigned.get( - # "prev_sender", None - # ) - - # # Check if the previous membership (membership that applies to the from/to - # # range) should be included in our `sync_room_id_set` - # should_prev_membership_be_included = ( - # prev_membership is not None - # and prev_sender is not None - # and filter_membership_for_sync( - # membership=prev_membership, - # user_id=user_id, - # sender=prev_sender, - # ) - # ) - - # # Check if the last membership (membership that applies to our snapshot) was - # # already included in our `sync_room_id_set` - # was_last_membership_already_included = filter_membership_for_sync( - # membership=last_membership_change_after_to_token.membership, - # user_id=user_id, - # sender=last_membership_change_after_to_token.sender, - # ) - - # # 1a) Add back rooms that the user left after the `to_token` - # # - # # For example, if the last membership event after the `to_token` is a leave - # # event, then the room was excluded from `sync_room_id_set` when we first - # # crafted it above. We should add these rooms back as long as the user also - # # was part of the room before the `to_token`. - # if ( - # not was_last_membership_already_included - # and should_prev_membership_be_included - # ): - # # TODO: Assign the correct membership event at the `to_token` here - # # (currently we're setting it as the last event after the `to_token`) - # sync_room_id_set[room_id] = convert_event_to_rooms_for_user( - # last_membership_change_after_to_token - # ) - # # 1b) Remove rooms that the user joined (hasn't left) after the `to_token` - # # - # # For example, if the last membership event after the `to_token` is a "join" - # # event, then the room was included `sync_room_id_set` when we first crafted - # # it above. We should remove these rooms as long as the user also wasn't - # # part of the room before the `to_token`. - # elif ( - # was_last_membership_already_included - # and not should_prev_membership_be_included - # ): - # del sync_room_id_set[room_id] - - # # 2) ----------------------------------------------------- - # # We fix-up newly_left rooms after the first fixup because it may have removed - # # some left rooms that we can figure out are newly_left in the following code - - # # 2) Fetch membership changes that fall in the range from `from_token` up to `to_token` - # membership_change_events_in_from_to_range = [] - # if from_token: - # membership_change_events_in_from_to_range = ( - # await self.store.get_membership_changes_for_user( - # user_id, - # from_key=from_token.room_key, - # to_key=to_token.room_key, - # excluded_rooms=self.rooms_to_exclude_globally, - # ) - # ) - - # # 2) Assemble a list of the last membership events in some given ranges. Someone - # # could have left and joined multiple times during the given range but we only - # # care about end-result so we grab the last one. - # last_membership_change_by_room_id_in_from_to_range: Dict[str, EventBase] = {} - # for event in membership_change_events_in_from_to_range: - # last_membership_change_by_room_id_in_from_to_range[event.room_id] = event - - # # 2) Fixup - # for ( - # last_membership_change_in_from_to_range - # ) in last_membership_change_by_room_id_in_from_to_range.values(): - # room_id = last_membership_change_in_from_to_range.room_id - - # # 2) Add back newly_left rooms (> `from_token` and <= `to_token`). We - # # include newly_left rooms because the last event that the user should see - # # is their own leave event - # if last_membership_change_in_from_to_range.membership == Membership.LEAVE: - # sync_room_id_set[room_id] = convert_event_to_rooms_for_user( - # last_membership_change_in_from_to_range - # ) + filtered_sync_room_id_set[room_id] = convert_event_to_rooms_for_user( + last_membership_change_in_from_to_range.event + ) return filtered_sync_room_id_set diff --git a/synapse/storage/databases/main/stream.py b/synapse/storage/databases/main/stream.py index ed571b0de7..ce135ededc 100644 --- a/synapse/storage/databases/main/stream.py +++ b/synapse/storage/databases/main/stream.py @@ -113,21 +113,37 @@ class _EventsAround: @attr.s(slots=True, frozen=True, auto_attribs=True) -class CurrentStateDeltaMembership: +class _CurrentStateDeltaMembershipReturn: """ Attributes: event_id: The "current" membership event ID in this room. prev_event_id: The previous membership event in this room that was replaced by the "current" one. May be `None` if there was no previous membership event. room_id: The room ID of the membership event. + membership: The membership state of the user in the room. """ event_id: str prev_event_id: Optional[str] room_id: str membership: str - # Could be useful but we're not using it yet. - # event_pos: PersistedEventPosition + + +@attr.s(slots=True, frozen=True, auto_attribs=True) +class CurrentStateDeltaMembership: + """ + Attributes: + event: The "current" membership event in this room. + prev_event: The previous membership event in this room that was replaced by + the "current" one. May be `None` if there was no previous membership event. + room_id: The room ID of the membership event. + membership: The membership state of the user in the room. + """ + + event: EventBase + prev_event: Optional[EventBase] + room_id: str + membership: str def generate_pagination_where_clause( @@ -776,7 +792,7 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): if not has_changed: return [] - def f(txn: LoggingTransaction) -> List[CurrentStateDeltaMembership]: + def f(txn: LoggingTransaction) -> List[_CurrentStateDeltaMembershipReturn]: # To handle tokens with a non-empty instance_map we fetch more # results than necessary and then filter down min_from_id = from_key.stream @@ -813,7 +829,7 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): txn.execute(sql, args) - membership_changes: List[CurrentStateDeltaMembership] = [] + membership_changes: List[_CurrentStateDeltaMembershipReturn] = [] for ( event_id, prev_event_id, @@ -839,7 +855,7 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): stream_ordering, ): membership_changes.append( - CurrentStateDeltaMembership( + _CurrentStateDeltaMembershipReturn( event_id=event_id, prev_event_id=prev_event_id, room_id=room_id, @@ -851,17 +867,37 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): ) ) - current_state_delta_membership_changes = await self.db_pool.runInteraction( + return membership_changes + + membership_changes = await self.db_pool.runInteraction( "get_current_state_delta_membership_changes_for_user", f ) + # Fetch all events in one go + event_ids = [] + for m in membership_changes: + event_ids.append(m.event_id) + if m.prev_event_id is not None: + event_ids.append(m.prev_event_id) + + events = await self.get_events(event_ids, get_prev_content=False) + rooms_to_exclude: AbstractSet[str] = set() if excluded_rooms is not None: rooms_to_exclude = set(excluded_rooms) return [ - membership_change - for membership_change in current_state_delta_membership_changes + CurrentStateDeltaMembership( + event=events[membership_change.event_id], + prev_event=( + events[membership_change.prev_event_id] + if membership_change.prev_event_id + else None + ), + room_id=membership_change.room_id, + membership=membership_change.membership, + ) + for membership_change in membership_changes if membership_change.room_id not in rooms_to_exclude ] From d91aa0018ca082cc88a3b3bfb7e06d1becb74227 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 25 Jun 2024 22:45:59 -0500 Subject: [PATCH 39/84] Remove extras --- synapse/handlers/sliding_sync.py | 15 +++++---- synapse/storage/databases/main/stream.py | 40 +++++++----------------- 2 files changed, 21 insertions(+), 34 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index 5d63099499..fed663ac36 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -427,7 +427,7 @@ class SlidingSyncHandler: user_id, from_key=to_token.room_key, to_key=membership_snapshot_token, - excluded_rooms=self.rooms_to_exclude_globally, + excluded_room_ids=self.rooms_to_exclude_globally, ) ) @@ -440,7 +440,7 @@ class SlidingSyncHandler: for membership_change in current_state_delta_membership_changes_after_to_token: # Only set if we haven't already set it first_membership_change_by_room_id_after_to_token.setdefault( - membership_change.room_id, membership_change + membership_change.event.room_id, membership_change ) # 1) Fixup @@ -484,7 +484,7 @@ class SlidingSyncHandler: user_id, from_key=from_token.room_key, to_key=to_token.room_key, - excluded_rooms=self.rooms_to_exclude_globally, + excluded_room_ids=self.rooms_to_exclude_globally, ) ) @@ -498,19 +498,22 @@ class SlidingSyncHandler: membership_change ) in current_state_delta_membership_changes_in_from_to_range: last_membership_change_by_room_id_in_from_to_range[ - membership_change.room_id + membership_change.event.room_id ] = membership_change # 2) Fixup for ( last_membership_change_in_from_to_range ) in last_membership_change_by_room_id_in_from_to_range.values(): - room_id = last_membership_change_in_from_to_range.room_id + room_id = last_membership_change_in_from_to_range.event.room_id # 2) Add back newly_left rooms (> `from_token` and <= `to_token`). We # include newly_left rooms because the last event that the user should see # is their own leave event - if last_membership_change_in_from_to_range.membership == Membership.LEAVE: + if ( + last_membership_change_in_from_to_range.event.membership + == Membership.LEAVE + ): filtered_sync_room_id_set[room_id] = convert_event_to_rooms_for_user( last_membership_change_in_from_to_range.event ) diff --git a/synapse/storage/databases/main/stream.py b/synapse/storage/databases/main/stream.py index ce135ededc..efc0b88797 100644 --- a/synapse/storage/databases/main/stream.py +++ b/synapse/storage/databases/main/stream.py @@ -120,13 +120,11 @@ class _CurrentStateDeltaMembershipReturn: prev_event_id: The previous membership event in this room that was replaced by the "current" one. May be `None` if there was no previous membership event. room_id: The room ID of the membership event. - membership: The membership state of the user in the room. """ event_id: str prev_event_id: Optional[str] room_id: str - membership: str @attr.s(slots=True, frozen=True, auto_attribs=True) @@ -136,14 +134,10 @@ class CurrentStateDeltaMembership: event: The "current" membership event in this room. prev_event: The previous membership event in this room that was replaced by the "current" one. May be `None` if there was no previous membership event. - room_id: The room ID of the membership event. - membership: The membership state of the user in the room. """ event: EventBase prev_event: Optional[EventBase] - room_id: str - membership: str def generate_pagination_where_clause( @@ -771,7 +765,7 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): user_id: str, from_key: RoomStreamToken, to_key: RoomStreamToken, - excluded_rooms: Optional[List[str]] = None, + excluded_room_ids: Optional[List[str]] = None, ) -> List[CurrentStateDeltaMembership]: """ TODO @@ -817,8 +811,7 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): s.room_id, s.instance_name, s.stream_id, - e.topological_ordering, - m.membership + e.topological_ordering FROM current_state_delta_stream AS s INNER JOIN events AS e ON e.stream_ordering = s.stream_id INNER JOIN room_memberships AS m ON m.event_id = e.event_id @@ -837,7 +830,6 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): instance_name, stream_ordering, topological_ordering, - membership, ) in txn: assert event_id is not None # `prev_event_id` can be `None` @@ -845,7 +837,6 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): assert instance_name is not None assert stream_ordering is not None assert topological_ordering is not None - assert membership is not None if _filter_results( from_key, @@ -859,46 +850,39 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): event_id=event_id, prev_event_id=prev_event_id, room_id=room_id, - membership=membership, - # event_pos=PersistedEventPosition( - # instance_name=instance_name, - # stream=stream_ordering, - # ), ) ) return membership_changes - membership_changes = await self.db_pool.runInteraction( + raw_membership_changes = await self.db_pool.runInteraction( "get_current_state_delta_membership_changes_for_user", f ) # Fetch all events in one go event_ids = [] - for m in membership_changes: + for m in raw_membership_changes: event_ids.append(m.event_id) if m.prev_event_id is not None: event_ids.append(m.prev_event_id) events = await self.get_events(event_ids, get_prev_content=False) - rooms_to_exclude: AbstractSet[str] = set() - if excluded_rooms is not None: - rooms_to_exclude = set(excluded_rooms) + room_ids_to_exclude: AbstractSet[str] = set() + if excluded_room_ids is not None: + room_ids_to_exclude = set(excluded_room_ids) return [ CurrentStateDeltaMembership( - event=events[membership_change.event_id], + event=events[raw_membership_change.event_id], prev_event=( - events[membership_change.prev_event_id] - if membership_change.prev_event_id + events[raw_membership_change.prev_event_id] + if raw_membership_change.prev_event_id else None ), - room_id=membership_change.room_id, - membership=membership_change.membership, ) - for membership_change in membership_changes - if membership_change.room_id not in rooms_to_exclude + for raw_membership_change in raw_membership_changes + if raw_membership_change.room_id not in room_ids_to_exclude ] @cancellable From daa7e3691aa73f2d8a81de1823a0a44b54fe838f Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 25 Jun 2024 23:01:28 -0500 Subject: [PATCH 40/84] Add docstring --- synapse/storage/databases/main/stream.py | 32 ++++++++++++++++++++---- 1 file changed, 27 insertions(+), 5 deletions(-) diff --git a/synapse/storage/databases/main/stream.py b/synapse/storage/databases/main/stream.py index efc0b88797..730e55d135 100644 --- a/synapse/storage/databases/main/stream.py +++ b/synapse/storage/databases/main/stream.py @@ -768,12 +768,34 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): excluded_room_ids: Optional[List[str]] = None, ) -> List[CurrentStateDeltaMembership]: """ - TODO + Fetch membership events (and the previous event that was replaced by that one) + for a given user. - Note: This function only works with "live" tokens with `stream_ordering` only. + We're looking for membership changes in the token range (> `from_key` and <= + `to_key`). - All such events whose stream ordering `s` lies in the range `from_key < s <= - to_key` are returned. Events are sorted by `stream_ordering` ascending. + Please be mindful to only use this with `from_key` and `to_key` tokens that are + recent enough to be after when the first local user joined the room. Otherwise, + the results may be incomplete or too greedy. For example, if you use a token + range before the first local user joined the room, you will see 0 events since + `current_state_delta_stream` tracks what the server thinks is the current state + of the room as time goes. It does not track how state progresses from the + beginning of the room. So for example, when you remotely join a room, the first + rows will just be the state when you joined and progress from there. + + You can probably reasonably use this with `/sync` because the `to_key` passed in + will be the "current" now token and the range will cover when the user joined + the room. + + Args: + user_id: The user ID to fetch membership events for. + from_key: The point in the stream to sync from (fetching events > this point). + to_key: The token to fetch rooms up to (fetching events <= this point). + excluded_room_ids: Optional list of room IDs to exclude from the results. + + Returns: + All membership changes to the current state in the token range. Events are + sorted by `stream_ordering` ascending. """ # Start by ruling out cases where a DB query is not necessary. if from_key == to_key: @@ -794,7 +816,7 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): args: List[Any] = [user_id, min_from_id, max_to_id] - # TODO: It would be good to assert that the `to_token` is >= + # TODO: It would be good to assert that the `from_token`/`to_token` is >= # the first row in `current_state_delta_stream` for the rooms we're # interested in. Otherwise, we will end up with empty results and not know # it. From cccbd15e7ece55ec8aab2632fcb7099215b29c86 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 25 Jun 2024 23:40:55 -0500 Subject: [PATCH 41/84] Refactor back to not pulling out full events --- synapse/handlers/sliding_sync.py | 159 +++++++++++++---------- synapse/storage/databases/main/stream.py | 75 +++++------ 2 files changed, 125 insertions(+), 109 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index fed663ac36..c1cfec5000 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -28,7 +28,6 @@ from synapse.events import EventBase from synapse.events.utils import strip_event from synapse.handlers.relations import BundledAggregations from synapse.storage.databases.main.stream import CurrentStateDeltaMembership -from synapse.storage.roommember import RoomsForUser from synapse.types import ( JsonDict, PersistedEventPosition, @@ -48,27 +47,6 @@ if TYPE_CHECKING: logger = logging.getLogger(__name__) -def convert_event_to_rooms_for_user(event: EventBase) -> RoomsForUser: - """ - Quick helper to convert an event to a `RoomsForUser` object. - """ - # These fields should be present for all persisted events - assert event.internal_metadata.stream_ordering is not None - assert event.internal_metadata.instance_name is not None - - return RoomsForUser( - room_id=event.room_id, - sender=event.sender, - membership=event.membership, - event_id=event.event_id, - event_pos=PersistedEventPosition( - event.internal_metadata.instance_name, - event.internal_metadata.stream_ordering, - ), - room_version_id=event.room_version.identifier, - ) - - def filter_membership_for_sync(*, membership: str, user_id: str, sender: str) -> bool: """ Returns True if the membership event should be included in the sync response, @@ -108,6 +86,25 @@ class RoomSyncConfig: required_state: Set[Tuple[str, str]] +@attr.s(slots=True, frozen=True, auto_attribs=True) +class _RoomMembershipForUser: + """ + Attributes: + event_id: The event ID of the membership event + event_pos: The stream position of the membership event + membership: The membership state of the user in the room + sender: The person who sent the membership event + newly_joined: Whether the user newly joined the room during the given token + range + """ + + event_id: str + event_pos: PersistedEventPosition + membership: str + sender: str + newly_joined: bool + + class SlidingSyncHandler: def __init__(self, hs: "HomeServer"): self.clock = hs.get_clock() @@ -302,7 +299,7 @@ class SlidingSyncHandler: user=sync_config.user, room_id=room_id, room_sync_config=room_sync_config, - rooms_for_user_membership_at_to_token=sync_room_map[room_id], + rooms_membership_for_user_at_to_token=sync_room_map[room_id], from_token=from_token, to_token=to_token, ) @@ -321,7 +318,7 @@ class SlidingSyncHandler: user: UserID, to_token: StreamToken, from_token: Optional[StreamToken] = None, - ) -> Dict[str, RoomsForUser]: + ) -> Dict[str, _RoomMembershipForUser]: """ Fetch room IDs that should be listed for this user in the sync response (the full room list that will be filtered, sorted, and sliced). @@ -373,7 +370,13 @@ class SlidingSyncHandler: # Note: The `room_for_user` we're assigning here will need to be fixed up # (below) because they are potentially from the current snapshot time # instead from the time of the `to_token`. - room_for_user.room_id: room_for_user + room_for_user.room_id: _RoomMembershipForUser( + event_id=room_for_user.event_id, + event_pos=room_for_user.event_pos, + membership=room_for_user.membership, + sender=room_for_user.sender, + newly_joined=False, + ) for room_for_user in room_for_user_list } @@ -440,7 +443,7 @@ class SlidingSyncHandler: for membership_change in current_state_delta_membership_changes_after_to_token: # Only set if we haven't already set it first_membership_change_by_room_id_after_to_token.setdefault( - membership_change.event.room_id, membership_change + membership_change.room_id, membership_change ) # 1) Fixup @@ -448,27 +451,59 @@ class SlidingSyncHandler: # Since we fetched a snapshot of the users room list at some point in time after # the from/to tokens, we need to revert/rewind some membership changes to match # the point in time of the `to_token`. + prev_event_ids_in_from_to_range: List[str] = [] for ( room_id, first_membership_change_after_to_token, ) in first_membership_change_by_room_id_after_to_token.items(): # 1a) Remove rooms that the user joined after the `to_token` - if first_membership_change_after_to_token.prev_event is None: + if first_membership_change_after_to_token.prev_event_id is None: sync_room_id_set.pop(room_id, None) # 1b) 1c) From the first membership event after the `to_token`, step backward to the # previous membership that would apply to the from/to range. else: - sync_room_id_set[room_id] = convert_event_to_rooms_for_user( - first_membership_change_after_to_token.prev_event + prev_event_ids_in_from_to_range.append( + first_membership_change_after_to_token.prev_event_id ) + # 1) Fixup (more) + # + # 1b) 1c) Fetch the previous membership events that apply to the from/to range + # and fixup our working list. + prev_events_in_from_to_range = await self.store.get_events( + prev_event_ids_in_from_to_range + ) + for prev_event_in_from_to_range in prev_events_in_from_to_range.values(): + # These fields should be present for all persisted events + assert ( + prev_event_in_from_to_range.internal_metadata.instance_name is not None + ) + assert ( + prev_event_in_from_to_range.internal_metadata.stream_ordering + is not None + ) + + # 1b) 1c) Update the membership with what we found + sync_room_id_set[prev_event_in_from_to_range.room_id] = ( + _RoomMembershipForUser( + event_id=prev_event_in_from_to_range.event_id, + event_pos=PersistedEventPosition( + instance_name=prev_event_in_from_to_range.internal_metadata.instance_name, + stream=prev_event_in_from_to_range.internal_metadata.stream_ordering, + ), + membership=prev_event_in_from_to_range.membership, + sender=prev_event_in_from_to_range.sender, + newly_joined=False, + ) + ) + filtered_sync_room_id_set = { - room_id: room_for_user - for room_id, room_for_user in sync_room_id_set.items() + room_id: room_membership_for_user + for room_id, room_membership_for_user in sync_room_id_set.items() if filter_membership_for_sync( - membership=room_for_user.membership, + membership=room_membership_for_user.membership, user_id=user_id, - sender=room_for_user.sender, + sender=room_membership_for_user.sender, ) } @@ -498,35 +533,38 @@ class SlidingSyncHandler: membership_change ) in current_state_delta_membership_changes_in_from_to_range: last_membership_change_by_room_id_in_from_to_range[ - membership_change.event.room_id + membership_change.room_id ] = membership_change # 2) Fixup for ( last_membership_change_in_from_to_range ) in last_membership_change_by_room_id_in_from_to_range.values(): - room_id = last_membership_change_in_from_to_range.event.room_id + room_id = last_membership_change_in_from_to_range.room_id # 2) Add back newly_left rooms (> `from_token` and <= `to_token`). We # include newly_left rooms because the last event that the user should see # is their own leave event - if ( - last_membership_change_in_from_to_range.event.membership - == Membership.LEAVE - ): - filtered_sync_room_id_set[room_id] = convert_event_to_rooms_for_user( - last_membership_change_in_from_to_range.event + if last_membership_change_in_from_to_range.membership == Membership.LEAVE: + filtered_sync_room_id_set[room_id] = _RoomMembershipForUser( + event_id=last_membership_change_in_from_to_range.event_id, + event_pos=last_membership_change_in_from_to_range.event_pos, + membership=last_membership_change_in_from_to_range.membership, + sender=last_membership_change_in_from_to_range.sender, + newly_joined=False, ) + # TODO: Figure out `newly_joined` + return filtered_sync_room_id_set async def filter_rooms( self, user: UserID, - sync_room_map: Dict[str, RoomsForUser], + sync_room_map: Dict[str, _RoomMembershipForUser], filters: SlidingSyncConfig.SlidingSyncList.Filters, to_token: StreamToken, - ) -> Dict[str, RoomsForUser]: + ) -> Dict[str, _RoomMembershipForUser]: """ Filter rooms based on the sync request. @@ -627,9 +665,9 @@ class SlidingSyncHandler: async def sort_rooms( self, - sync_room_map: Dict[str, RoomsForUser], + sync_room_map: Dict[str, _RoomMembershipForUser], to_token: StreamToken, - ) -> List[Tuple[str, RoomsForUser]]: + ) -> List[Tuple[str, _RoomMembershipForUser]]: """ Sort by `stream_ordering` of the last event that the user should see in the room. `stream_ordering` is unique so we get a stable sort. @@ -682,7 +720,7 @@ class SlidingSyncHandler: user: UserID, room_id: str, room_sync_config: RoomSyncConfig, - rooms_for_user_membership_at_to_token: RoomsForUser, + rooms_membership_for_user_at_to_token: _RoomMembershipForUser, from_token: Optional[StreamToken], to_token: StreamToken, ) -> SlidingSyncResult.RoomResult: @@ -696,7 +734,7 @@ class SlidingSyncHandler: room_id: The room ID to fetch data for room_sync_config: Config for what data we should fetch for a room in the sync response. - rooms_for_user_membership_at_to_token: Membership information for the user + rooms_membership_for_user_at_to_token: Membership information for the user in the room at the time of `to_token`. from_token: The point in the stream to sync from. to_token: The point in the stream to sync up to. @@ -716,7 +754,7 @@ class SlidingSyncHandler: if ( room_sync_config.timeline_limit > 0 # No timeline for invite/knock rooms (just `stripped_state`) - and rooms_for_user_membership_at_to_token.membership + and rooms_membership_for_user_at_to_token.membership not in (Membership.INVITE, Membership.KNOCK) ): limited = False @@ -726,27 +764,15 @@ class SlidingSyncHandler: # position once we've fetched the events to point to the earliest event fetched. prev_batch_token = to_token - newly_joined = False - if ( - # We can only determine new-ness if we have a `from_token` to define our range - from_token is not None - and rooms_for_user_membership_at_to_token.membership == Membership.JOIN - ): - newly_joined = ( - rooms_for_user_membership_at_to_token.event_pos.persisted_after( - from_token.room_key - ) - ) - # We're going to paginate backwards from the `to_token` from_bound = to_token.room_key # People shouldn't see past their leave/ban event - if rooms_for_user_membership_at_to_token.membership in ( + if rooms_membership_for_user_at_to_token.membership in ( Membership.LEAVE, Membership.BAN, ): from_bound = ( - rooms_for_user_membership_at_to_token.event_pos.to_room_stream_token() + rooms_membership_for_user_at_to_token.event_pos.to_room_stream_token() ) # Determine whether we should limit the timeline to the token range. @@ -760,7 +786,8 @@ class SlidingSyncHandler: # connection before to_bound = ( from_token.room_key - if from_token is not None and not newly_joined + if from_token is not None + and not rooms_membership_for_user_at_to_token.newly_joined else None ) @@ -797,7 +824,7 @@ class SlidingSyncHandler: self.storage_controllers, user.to_string(), timeline_events, - is_peeking=rooms_for_user_membership_at_to_token.membership + is_peeking=rooms_membership_for_user_at_to_token.membership != Membership.JOIN, filter_send_to_client=True, ) @@ -852,12 +879,12 @@ class SlidingSyncHandler: # Figure out any stripped state events for invite/knocks. This allows the # potential joiner to identify the room. stripped_state: List[JsonDict] = [] - if rooms_for_user_membership_at_to_token.membership in ( + if rooms_membership_for_user_at_to_token.membership in ( Membership.INVITE, Membership.KNOCK, ): invite_or_knock_event = await self.store.get_event( - rooms_for_user_membership_at_to_token.event_id + rooms_membership_for_user_at_to_token.event_id ) stripped_state = [] diff --git a/synapse/storage/databases/main/stream.py b/synapse/storage/databases/main/stream.py index 730e55d135..c5e6537980 100644 --- a/synapse/storage/databases/main/stream.py +++ b/synapse/storage/databases/main/stream.py @@ -112,32 +112,25 @@ class _EventsAround: end: RoomStreamToken -@attr.s(slots=True, frozen=True, auto_attribs=True) -class _CurrentStateDeltaMembershipReturn: - """ - Attributes: - event_id: The "current" membership event ID in this room. - prev_event_id: The previous membership event in this room that was replaced by - the "current" one. May be `None` if there was no previous membership event. - room_id: The room ID of the membership event. - """ - - event_id: str - prev_event_id: Optional[str] - room_id: str - - @attr.s(slots=True, frozen=True, auto_attribs=True) class CurrentStateDeltaMembership: """ Attributes: - event: The "current" membership event in this room. - prev_event: The previous membership event in this room that was replaced by + event_id: The "current" membership event ID in this room. + event_pos: The position of the "current" membership event in the event stream. + prev_event_id: The previous membership event in this room that was replaced by the "current" one. May be `None` if there was no previous membership event. + room_id: The room ID of the membership event. + membership: The membership state of the user in the room + sender: The person who sent the membership event """ - event: EventBase - prev_event: Optional[EventBase] + event_id: str + event_pos: PersistedEventPosition + prev_event_id: Optional[str] + room_id: str + membership: str + sender: str def generate_pagination_where_clause( @@ -808,7 +801,7 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): if not has_changed: return [] - def f(txn: LoggingTransaction) -> List[_CurrentStateDeltaMembershipReturn]: + def f(txn: LoggingTransaction) -> List[CurrentStateDeltaMembership]: # To handle tokens with a non-empty instance_map we fetch more # results than necessary and then filter down min_from_id = from_key.stream @@ -833,7 +826,9 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): s.room_id, s.instance_name, s.stream_id, - e.topological_ordering + e.topological_ordering, + m.membership, + e.sender FROM current_state_delta_stream AS s INNER JOIN events AS e ON e.stream_ordering = s.stream_id INNER JOIN room_memberships AS m ON m.event_id = e.event_id @@ -844,7 +839,7 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): txn.execute(sql, args) - membership_changes: List[_CurrentStateDeltaMembershipReturn] = [] + membership_changes: List[CurrentStateDeltaMembership] = [] for ( event_id, prev_event_id, @@ -852,6 +847,8 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): instance_name, stream_ordering, topological_ordering, + membership, + sender, ) in txn: assert event_id is not None # `prev_event_id` can be `None` @@ -859,6 +856,8 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): assert instance_name is not None assert stream_ordering is not None assert topological_ordering is not None + assert membership is not None + assert sender is not None if _filter_results( from_key, @@ -868,43 +867,33 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): stream_ordering, ): membership_changes.append( - _CurrentStateDeltaMembershipReturn( + CurrentStateDeltaMembership( event_id=event_id, + event_pos=PersistedEventPosition( + instance_name=instance_name, + stream=stream_ordering, + ), prev_event_id=prev_event_id, room_id=room_id, + membership=membership, + sender=sender, ) ) return membership_changes - raw_membership_changes = await self.db_pool.runInteraction( + membership_changes = await self.db_pool.runInteraction( "get_current_state_delta_membership_changes_for_user", f ) - # Fetch all events in one go - event_ids = [] - for m in raw_membership_changes: - event_ids.append(m.event_id) - if m.prev_event_id is not None: - event_ids.append(m.prev_event_id) - - events = await self.get_events(event_ids, get_prev_content=False) - room_ids_to_exclude: AbstractSet[str] = set() if excluded_room_ids is not None: room_ids_to_exclude = set(excluded_room_ids) return [ - CurrentStateDeltaMembership( - event=events[raw_membership_change.event_id], - prev_event=( - events[raw_membership_change.prev_event_id] - if raw_membership_change.prev_event_id - else None - ), - ) - for raw_membership_change in raw_membership_changes - if raw_membership_change.room_id not in room_ids_to_exclude + membership_change + for membership_change in membership_changes + if membership_change.room_id not in room_ids_to_exclude ] @cancellable From 62c6a4e8609f5d563b85f576d0a4d5b764c1f9c2 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 26 Jun 2024 01:10:00 -0500 Subject: [PATCH 42/84] Add `newly_joined` support to `get_sync_room_ids_for_user(...)` --- synapse/handlers/sliding_sync.py | 82 +++++++++- tests/handlers/test_sliding_sync.py | 224 +++++++++++++++++++++++++++- 2 files changed, 300 insertions(+), 6 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index c1cfec5000..97b04698b2 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -18,7 +18,8 @@ # # import logging -from typing import TYPE_CHECKING, Dict, List, Optional, Set, Tuple +from collections import defaultdict +from typing import TYPE_CHECKING, Any, Dict, List, Optional, Set, Tuple import attr from immutabledict import immutabledict @@ -104,6 +105,9 @@ class _RoomMembershipForUser: sender: str newly_joined: bool + def copy_and_replace(self, **kwds: Any) -> "_RoomMembershipForUser": + return attr.evolve(self, **kwds) + class SlidingSyncHandler: def __init__(self, hs: "HomeServer"): @@ -414,6 +418,7 @@ class SlidingSyncHandler: # - 1b) Add back rooms that the user left after the `to_token` # - 1c) Update room membership events to the point in time of the `to_token` # - 2) Add back newly_left rooms (> `from_token` and <= `to_token`) + # - 3) Figure out which rooms are `newly_joined` # 1) ----------------------------------------------------- @@ -529,19 +534,49 @@ class SlidingSyncHandler: last_membership_change_by_room_id_in_from_to_range: Dict[ str, CurrentStateDeltaMembership ] = {} + # We also want to assemble a list of the first membership events during the token + # range so we can step backward to the previous membership that would apply to + # before the token range to see if we have `newly_joined` the room. + first_membership_change_by_room_id_in_from_to_range: Dict[ + str, CurrentStateDeltaMembership + ] = {} + non_join_event_ids_by_room_id_in_from_to_range: Dict[str, List[str]] = ( + defaultdict(list) + ) for ( membership_change ) in current_state_delta_membership_changes_in_from_to_range: - last_membership_change_by_room_id_in_from_to_range[ - membership_change.room_id - ] = membership_change + room_id = membership_change.room_id + + last_membership_change_by_room_id_in_from_to_range[room_id] = ( + membership_change + ) + + # Only set if we haven't already set it + first_membership_change_by_room_id_in_from_to_range.setdefault( + room_id, membership_change + ) + + if membership_change.membership != Membership.JOIN: + non_join_event_ids_by_room_id_in_from_to_range[room_id].append( + membership_change.event_id + ) # 2) Fixup + # + # 3) We also want to assemble a list of possibly newly joined rooms. Someone + # could have left and joined multiple times during the given range but we only + # care about whether they are joined at the end of the token range so we are + # working with the last membership even in the token range. + possibly_newly_joined_room_ids = set() for ( last_membership_change_in_from_to_range ) in last_membership_change_by_room_id_in_from_to_range.values(): room_id = last_membership_change_in_from_to_range.room_id + if last_membership_change_in_from_to_range.membership == Membership.JOIN: + possibly_newly_joined_room_ids.add(room_id) + # 2) Add back newly_left rooms (> `from_token` and <= `to_token`). We # include newly_left rooms because the last event that the user should see # is their own leave event @@ -554,7 +589,44 @@ class SlidingSyncHandler: newly_joined=False, ) - # TODO: Figure out `newly_joined` + # 3) Figure out `newly_joined` + prev_event_ids_before_token_range: List[str] = [] + for possibly_newly_joined_room_id in possibly_newly_joined_room_ids: + non_joins_for_room = non_join_event_ids_by_room_id_in_from_to_range[ + possibly_newly_joined_room_id + ] + if len(non_joins_for_room) > 0: + # We found a `newly_joined` room (we left and joined within the token range) + filtered_sync_room_id_set[room_id] = filtered_sync_room_id_set[ + room_id + ].copy_and_replace(newly_joined=True) + else: + prev_event_id = first_membership_change_by_room_id_in_from_to_range[ + room_id + ].prev_event_id + + if prev_event_id is None: + # We found a `newly_joined` room (we are joining the room for the + # first time within the token range) + filtered_sync_room_id_set[room_id] = filtered_sync_room_id_set[ + room_id + ].copy_and_replace(newly_joined=True) + else: + # Last resort, we need to step back to the previous membership event + # just before the token range to see if we're joined then or not. + prev_event_ids_before_token_range.append(prev_event_id) + + # 3) more + prev_events_before_token_range = await self.store.get_events( + prev_event_ids_before_token_range + ) + for prev_event_before_token_range in prev_events_before_token_range.values(): + if prev_event_before_token_range.membership != Membership.JOIN: + # We found a `newly_joined` room (we left before the token range + # and joined within the token range) + filtered_sync_room_id_set[room_id] = filtered_sync_room_id_set[ + room_id + ].copy_and_replace(newly_joined=True) return filtered_sync_room_id_set diff --git a/tests/handlers/test_sliding_sync.py b/tests/handlers/test_sliding_sync.py index 694fd17a02..c25ca41098 100644 --- a/tests/handlers/test_sliding_sync.py +++ b/tests/handlers/test_sliding_sync.py @@ -116,6 +116,9 @@ class GetSyncRoomIdsForUserTestCase(HomeserverTestCase): room_id_results[room_id].event_id, join_response["event_id"], ) + # We should be considered `newly_joined` because we joined during the token + # range + self.assertEqual(room_id_results[room_id].newly_joined, True) def test_get_already_joined_room(self) -> None: """ @@ -146,6 +149,8 @@ class GetSyncRoomIdsForUserTestCase(HomeserverTestCase): room_id_results[room_id].event_id, join_response["event_id"], ) + # We should *NOT* be `newly_joined` because we joined before the token range + self.assertEqual(room_id_results[room_id].newly_joined, False) def test_get_invited_banned_knocked_room(self) -> None: """ @@ -232,6 +237,11 @@ class GetSyncRoomIdsForUserTestCase(HomeserverTestCase): room_id_results[knock_room_id].event_id, knock_room_membership_state_event.event_id, ) + # We should *NOT* be `newly_joined` because we were not joined at the the time + # of the `to_token`. + self.assertEqual(room_id_results[invited_room_id].newly_joined, False) + self.assertEqual(room_id_results[ban_room_id].newly_joined, False) + self.assertEqual(room_id_results[knock_room_id].newly_joined, False) def test_get_kicked_room(self) -> None: """ @@ -277,6 +287,9 @@ class GetSyncRoomIdsForUserTestCase(HomeserverTestCase): room_id_results[kick_room_id].event_id, kick_response["event_id"], ) + # We should *NOT* be `newly_joined` because we were not joined at the the time + # of the `to_token`. + self.assertEqual(room_id_results[kick_room_id].newly_joined, False) def test_forgotten_rooms(self) -> None: """ @@ -396,6 +409,8 @@ class GetSyncRoomIdsForUserTestCase(HomeserverTestCase): room_id_results[room_id2].event_id, leave_response["event_id"], ) + # We should *NOT* be `newly_joined` because we are instead `newly_left` + self.assertEqual(room_id_results[room_id2].newly_joined, False) def test_no_joins_after_to_token(self) -> None: """ @@ -432,6 +447,8 @@ class GetSyncRoomIdsForUserTestCase(HomeserverTestCase): room_id_results[room_id1].event_id, join_response1["event_id"], ) + # We should be `newly_joined` because we joined during the token range + self.assertEqual(room_id_results[room_id1].newly_joined, True) def test_join_during_range_and_left_room_after_to_token(self) -> None: """ @@ -477,6 +494,8 @@ class GetSyncRoomIdsForUserTestCase(HomeserverTestCase): } ), ) + # We should be `newly_joined` because we joined during the token range + self.assertEqual(room_id_results[room_id1].newly_joined, True) def test_join_before_range_and_left_room_after_to_token(self) -> None: """ @@ -519,6 +538,8 @@ class GetSyncRoomIdsForUserTestCase(HomeserverTestCase): } ), ) + # We should *NOT* be `newly_joined` because we joined before the token range + self.assertEqual(room_id_results[room_id1].newly_joined, False) def test_kicked_before_range_and_left_after_to_token(self) -> None: """ @@ -581,6 +602,8 @@ class GetSyncRoomIdsForUserTestCase(HomeserverTestCase): } ), ) + # We should *NOT* be `newly_joined` because we were kicked + self.assertEqual(room_id_results[kick_room_id].newly_joined, False) def test_newly_left_during_range_and_join_leave_after_to_token(self) -> None: """ @@ -632,6 +655,8 @@ class GetSyncRoomIdsForUserTestCase(HomeserverTestCase): } ), ) + # We should *NOT* be `newly_joined` because we left during the token range + self.assertEqual(room_id_results[room_id1].newly_joined, False) def test_newly_left_during_range_and_join_after_to_token(self) -> None: """ @@ -681,6 +706,8 @@ class GetSyncRoomIdsForUserTestCase(HomeserverTestCase): } ), ) + # We should *NOT* be `newly_joined` because we left during the token range + self.assertEqual(room_id_results[room_id1].newly_joined, False) def test_no_from_token(self) -> None: """ @@ -727,6 +754,9 @@ class GetSyncRoomIdsForUserTestCase(HomeserverTestCase): room_id_results[room_id1].event_id, join_response1["event_id"], ) + # We should *NOT* be `newly_joined` because there is no `from_token` to + # define a "live" range to compare against + self.assertEqual(room_id_results[room_id1].newly_joined, False) def test_from_token_ahead_of_to_token(self) -> None: """ @@ -793,6 +823,8 @@ class GetSyncRoomIdsForUserTestCase(HomeserverTestCase): room_id_results[room_id1].event_id, join_response1["event_id"], ) + # We should *NOT* be `newly_joined` because we joined `room1` before either of the tokens + self.assertEqual(room_id_results[room_id1].newly_joined, False) def test_leave_before_range_and_join_leave_after_to_token(self) -> None: """ @@ -920,6 +952,8 @@ class GetSyncRoomIdsForUserTestCase(HomeserverTestCase): } ), ) + # We should be `newly_joined` because we joined during the token range + self.assertEqual(room_id_results[room_id1].newly_joined, True) def test_join_leave_multiple_times_before_range_and_after_to_token( self, @@ -976,6 +1010,8 @@ class GetSyncRoomIdsForUserTestCase(HomeserverTestCase): } ), ) + # We should *NOT* be `newly_joined` because we joined before the token range + self.assertEqual(room_id_results[room_id1].newly_joined, False) def test_invite_before_range_and_join_leave_after_to_token( self, @@ -1028,8 +1064,11 @@ class GetSyncRoomIdsForUserTestCase(HomeserverTestCase): } ), ) + # We should *NOT* be `newly_joined` because we were only invited before the + # token range + self.assertEqual(room_id_results[room_id1].newly_joined, False) - def test_display_name_changes_in_token_range( + def test_join_and_display_name_changes_in_token_range( self, ) -> None: """ @@ -1101,6 +1140,68 @@ class GetSyncRoomIdsForUserTestCase(HomeserverTestCase): } ), ) + # We should be `newly_joined` because we joined during the token range + self.assertEqual(room_id_results[room_id1].newly_joined, True) + + def test_display_name_changes_in_token_range( + self, + ) -> None: + """ + Test that we point to the correct membership event within the from/to range even + if there is `displayname`/`avatar_url` updates. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + + # We create the room with user2 so the room isn't left with no members when we + # leave and can still re-join. + room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok, is_public=True) + join_response = self.helper.join(room_id1, user1_id, tok=user1_tok) + + after_room1_token = self.event_sources.get_current_token() + + # Update the displayname during the token range + displayname_change_during_token_range_response = self.helper.send_state( + room_id1, + event_type=EventTypes.Member, + state_key=user1_id, + body={ + "membership": Membership.JOIN, + "displayname": "displayname during token range", + }, + tok=user1_tok, + ) + + after_change1_token = self.event_sources.get_current_token() + + room_id_results = self.get_success( + self.sliding_sync_handler.get_sync_room_ids_for_user( + UserID.from_string(user1_id), + from_token=after_room1_token, + to_token=after_change1_token, + ) + ) + + # Room should show up because we were joined during the from/to range + self.assertEqual(room_id_results.keys(), {room_id1}) + # It should be pointing to the latest membership event in the from/to range + self.assertEqual( + room_id_results[room_id1].event_id, + displayname_change_during_token_range_response["event_id"], + "Corresponding map to disambiguate the opaque event IDs: " + + str( + { + "join_response": join_response["event_id"], + "displayname_change_during_token_range_response": displayname_change_during_token_range_response[ + "event_id" + ], + } + ), + ) + # We should *NOT* be `newly_joined` because we joined before the token range + self.assertEqual(room_id_results[room_id1].newly_joined, False) def test_display_name_changes_before_and_after_token_range( self, @@ -1172,6 +1273,8 @@ class GetSyncRoomIdsForUserTestCase(HomeserverTestCase): } ), ) + # We should *NOT* be `newly_joined` because we joined before the token range + self.assertEqual(room_id_results[room_id1].newly_joined, False) def test_display_name_changes_leave_after_token_range( self, @@ -1250,6 +1353,8 @@ class GetSyncRoomIdsForUserTestCase(HomeserverTestCase): } ), ) + # We should be `newly_joined` because we joined during the token range + self.assertEqual(room_id_results[room_id1].newly_joined, True) def test_display_name_changes_join_after_token_range( self, @@ -1298,6 +1403,123 @@ class GetSyncRoomIdsForUserTestCase(HomeserverTestCase): # Room shouldn't show up because we joined after the from/to range self.assertEqual(room_id_results.keys(), set()) + def test_newly_joined_with_leave_join_in_token_range( + self, + ) -> None: + """ + Test that `newly_joined` TODO + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + + # We create the room with user2 so the room isn't left with no members when we + # leave and can still re-join. + room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok, is_public=True) + self.helper.join(room_id1, user1_id, tok=user1_tok) + + after_room1_token = self.event_sources.get_current_token() + + # Leave and join back during the token range + self.helper.leave(room_id1, user1_id, tok=user1_tok) + join_response2 = self.helper.join(room_id1, user1_id, tok=user1_tok) + + after_more_changes_token = self.event_sources.get_current_token() + + room_id_results = self.get_success( + self.sliding_sync_handler.get_sync_room_ids_for_user( + UserID.from_string(user1_id), + from_token=after_room1_token, + to_token=after_more_changes_token, + ) + ) + + # Room should show up because we were joined during the from/to range + self.assertEqual(room_id_results.keys(), {room_id1}) + # It should be pointing to the latest membership event in the from/to range + self.assertEqual( + room_id_results[room_id1].event_id, + join_response2["event_id"], + ) + # We should be considered `newly_joined` because there is some non-join event in + # between our latest join event. + self.assertEqual(room_id_results[room_id1].newly_joined, True) + + def test_newly_joined_only_joins_during_token_range( + self, + ) -> None: + """ + Test that a join and more joins caused by display name changes, all during the + token range, still count as `newly_joined`. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + + before_room1_token = self.event_sources.get_current_token() + + # We create the room with user2 so the room isn't left with no members when we + # leave and can still re-join. + room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok, is_public=True) + # Join, leave, join back to the room before the from/to range + join_response1 = self.helper.join(room_id1, user1_id, tok=user1_tok) + # Update the displayname during the token range (looks like another join) + displayname_change_during_token_range_response1 = self.helper.send_state( + room_id1, + event_type=EventTypes.Member, + state_key=user1_id, + body={ + "membership": Membership.JOIN, + "displayname": "displayname during token range", + }, + tok=user1_tok, + ) + # Update the displayname during the token range (looks like another join) + displayname_change_during_token_range_response2 = self.helper.send_state( + room_id1, + event_type=EventTypes.Member, + state_key=user1_id, + body={ + "membership": Membership.JOIN, + "displayname": "displayname during token range", + }, + tok=user1_tok, + ) + + after_room1_token = self.event_sources.get_current_token() + + room_id_results = self.get_success( + self.sliding_sync_handler.get_sync_room_ids_for_user( + UserID.from_string(user1_id), + from_token=before_room1_token, + to_token=after_room1_token, + ) + ) + + # Room should show up because it was newly_left and joined during the from/to range + self.assertEqual(room_id_results.keys(), {room_id1}) + # It should be pointing to the latest membership event in the from/to range + self.assertEqual( + room_id_results[room_id1].event_id, + displayname_change_during_token_range_response2["event_id"], + "Corresponding map to disambiguate the opaque event IDs: " + + str( + { + "join_response1": join_response1["event_id"], + "displayname_change_during_token_range_response1": displayname_change_during_token_range_response1[ + "event_id" + ], + "displayname_change_during_token_range_response2": displayname_change_during_token_range_response2[ + "event_id" + ], + } + ), + ) + # We should be `newly_joined` because we first joined during the token range + self.assertEqual(room_id_results[room_id1].newly_joined, True) + def test_multiple_rooms_are_not_confused( self, ) -> None: From 39259f66fa8ccd13818b8a5681b81fa020a8d4d2 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 26 Jun 2024 01:16:46 -0500 Subject: [PATCH 43/84] Join both tables with stream_ordering --- synapse/storage/databases/main/stream.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/storage/databases/main/stream.py b/synapse/storage/databases/main/stream.py index c5e6537980..2646dfd9cb 100644 --- a/synapse/storage/databases/main/stream.py +++ b/synapse/storage/databases/main/stream.py @@ -831,7 +831,7 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): e.sender FROM current_state_delta_stream AS s INNER JOIN events AS e ON e.stream_ordering = s.stream_id - INNER JOIN room_memberships AS m ON m.event_id = e.event_id + INNER JOIN room_memberships AS m ON m.event_stream_ordering = s.stream_id WHERE m.user_id = ? AND s.stream_id > ? AND s.stream_id <= ? ORDER BY s.stream_id ASC From c60aca755b35f9e655b2f2c71367ba5806db64e5 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 26 Jun 2024 02:02:34 -0500 Subject: [PATCH 44/84] Fix clause change --- synapse/storage/databases/main/stream.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/synapse/storage/databases/main/stream.py b/synapse/storage/databases/main/stream.py index 2646dfd9cb..562dc6eacf 100644 --- a/synapse/storage/databases/main/stream.py +++ b/synapse/storage/databases/main/stream.py @@ -934,6 +934,7 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): ignore_room_clause, ignore_room_args = make_in_list_sql_clause( txn.database_engine, "e.room_id", excluded_rooms, negative=True ) + ignore_room_clause = f"AND {ignore_room_clause}" args += ignore_room_args sql = """ @@ -948,6 +949,8 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): ignore_room_clause, ) + logger.info("get_membership_changes_for_user: %s", sql) + txn.execute(sql, args) rows = [ From 11db1befa2845f89d09be78e32d53b9b4b9bbad4 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 26 Jun 2024 02:05:25 -0500 Subject: [PATCH 45/84] Remove debug log --- synapse/storage/databases/main/stream.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/synapse/storage/databases/main/stream.py b/synapse/storage/databases/main/stream.py index 562dc6eacf..f6be97698e 100644 --- a/synapse/storage/databases/main/stream.py +++ b/synapse/storage/databases/main/stream.py @@ -949,8 +949,6 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): ignore_room_clause, ) - logger.info("get_membership_changes_for_user: %s", sql) - txn.execute(sql, args) rows = [ From 7395e1042072b3ab9f04898afa3989fda55a0978 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 26 Jun 2024 02:19:31 -0500 Subject: [PATCH 46/84] Fix `builtins.SyntaxError: EOL while scanning string literal (test_sync.py, line 1885)` See https://github.com/element-hq/synapse/actions/runs/9675073109/job/26692003103?pr=17320#step:9:5552 Worked fine locally but failed in CI with Python 3.8 --- tests/rest/client/test_sync.py | 20 +++++--------------- 1 file changed, 5 insertions(+), 15 deletions(-) diff --git a/tests/rest/client/test_sync.py b/tests/rest/client/test_sync.py index 338149f09a..bd1e7d521b 100644 --- a/tests/rest/client/test_sync.py +++ b/tests/rest/client/test_sync.py @@ -1882,9 +1882,7 @@ class SlidingSyncTestCase(unittest.HomeserverTestCase): channel = self.make_request( "POST", self.sync_endpoint - + f"?pos={self.get_success( - from_token.to_string(self.store) - )}", + + f"?pos={self.get_success(from_token.to_string(self.store))}", { "lists": { "foo-list": { @@ -2074,9 +2072,7 @@ class SlidingSyncTestCase(unittest.HomeserverTestCase): channel = self.make_request( "POST", self.sync_endpoint - + f"?pos={self.get_success( - from_token.to_string(self.store) - )}", + + f"?pos={self.get_success(from_token.to_string(self.store))}", { "lists": { "foo-list": { @@ -2323,9 +2319,7 @@ class SlidingSyncTestCase(unittest.HomeserverTestCase): channel = self.make_request( "POST", self.sync_endpoint - + f"?pos={self.get_success( - from_token.to_string(self.store) - )}", + + f"?pos={self.get_success(from_token.to_string(self.store))}", { "lists": { "foo-list": { @@ -2493,9 +2487,7 @@ class SlidingSyncTestCase(unittest.HomeserverTestCase): channel = self.make_request( "POST", self.sync_endpoint - + f"?pos={self.get_success( - from_token.to_string(self.store) - )}", + + f"?pos={self.get_success(from_token.to_string(self.store))}", { "lists": { "foo-list": { @@ -2563,9 +2555,7 @@ class SlidingSyncTestCase(unittest.HomeserverTestCase): channel = self.make_request( "POST", self.sync_endpoint - + f"?pos={self.get_success( - from_token.to_string(self.store) - )}", + + f"?pos={self.get_success(from_token.to_string(self.store))}", { "lists": { "foo-list": { From 9f47513458e064be0b7579c42d672464a4200ffe Mon Sep 17 00:00:00 2001 From: Till Faelligen <2353100+S7evinK@users.noreply.github.com> Date: Wed, 26 Jun 2024 14:09:13 +0200 Subject: [PATCH 47/84] 1.110.0rc1 --- CHANGES.md | 84 +++++++++++++++++++++++++++++++++++++++ changelog.d/17187.feature | 1 - changelog.d/17198.misc | 1 - changelog.d/17254.bugfix | 1 - changelog.d/17255.feature | 1 - changelog.d/17256.feature | 1 - changelog.d/17265.misc | 1 - changelog.d/17266.misc | 1 - changelog.d/17270.feature | 1 - changelog.d/17271.misc | 1 - changelog.d/17272.bugfix | 1 - changelog.d/17273.misc | 1 - changelog.d/17275.bugfix | 1 - changelog.d/17276.feature | 1 - changelog.d/17277.feature | 1 - changelog.d/17279.misc | 1 - changelog.d/17281.feature | 1 - changelog.d/17282.feature | 1 - changelog.d/17283.bugfix | 1 - changelog.d/17284.feature | 1 - changelog.d/17293.feature | 1 - changelog.d/17294.feature | 2 - changelog.d/17295.bugfix | 1 - changelog.d/17296.feature | 1 - changelog.d/17297.misc | 1 - changelog.d/17300.misc | 1 - changelog.d/17301.bugfix | 1 - changelog.d/17304.feature | 2 - changelog.d/17308.doc | 1 - changelog.d/17322.feature | 1 - changelog.d/17324.misc | 1 - changelog.d/17325.misc | 1 - changelog.d/17329.doc | 1 - changelog.d/17331.misc | 1 - changelog.d/17333.misc | 1 - changelog.d/17335.feature | 1 - changelog.d/17336.bugfix | 1 - changelog.d/17338.misc | 1 - changelog.d/17339.misc | 1 - changelog.d/17341.doc | 1 - changelog.d/17347.doc | 1 - changelog.d/17348.doc | 1 - changelog.d/17350.feature | 2 - changelog.d/17358.misc | 1 - debian/changelog | 5 ++- pyproject.toml | 2 +- 46 files changed, 88 insertions(+), 49 deletions(-) delete mode 100644 changelog.d/17187.feature delete mode 100644 changelog.d/17198.misc delete mode 100644 changelog.d/17254.bugfix delete mode 100644 changelog.d/17255.feature delete mode 100644 changelog.d/17256.feature delete mode 100644 changelog.d/17265.misc delete mode 100644 changelog.d/17266.misc delete mode 100644 changelog.d/17270.feature delete mode 100644 changelog.d/17271.misc delete mode 100644 changelog.d/17272.bugfix delete mode 100644 changelog.d/17273.misc delete mode 100644 changelog.d/17275.bugfix delete mode 100644 changelog.d/17276.feature delete mode 100644 changelog.d/17277.feature delete mode 100644 changelog.d/17279.misc delete mode 100644 changelog.d/17281.feature delete mode 100644 changelog.d/17282.feature delete mode 100644 changelog.d/17283.bugfix delete mode 100644 changelog.d/17284.feature delete mode 100644 changelog.d/17293.feature delete mode 100644 changelog.d/17294.feature delete mode 100644 changelog.d/17295.bugfix delete mode 100644 changelog.d/17296.feature delete mode 100644 changelog.d/17297.misc delete mode 100644 changelog.d/17300.misc delete mode 100644 changelog.d/17301.bugfix delete mode 100644 changelog.d/17304.feature delete mode 100644 changelog.d/17308.doc delete mode 100644 changelog.d/17322.feature delete mode 100644 changelog.d/17324.misc delete mode 100644 changelog.d/17325.misc delete mode 100644 changelog.d/17329.doc delete mode 100644 changelog.d/17331.misc delete mode 100644 changelog.d/17333.misc delete mode 100644 changelog.d/17335.feature delete mode 100644 changelog.d/17336.bugfix delete mode 100644 changelog.d/17338.misc delete mode 100644 changelog.d/17339.misc delete mode 100644 changelog.d/17341.doc delete mode 100644 changelog.d/17347.doc delete mode 100644 changelog.d/17348.doc delete mode 100644 changelog.d/17350.feature delete mode 100644 changelog.d/17358.misc diff --git a/CHANGES.md b/CHANGES.md index 9060b84853..e2c8cc937c 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,3 +1,87 @@ +# Synapse 1.110.0rc1 (2024-06-26) + +### Features + +- Add initial implementation of an experimental [MSC3575](https://github.com/matrix-org/matrix-spec-proposals/pull/3575) Sliding Sync `/sync` endpoint. ([\#17187](https://github.com/element-hq/synapse/issues/17187)) +- Add support for [MSC823](https://github.com/matrix-org/matrix-spec-proposals/pull/3823) - Account suspension. ([\#17255](https://github.com/element-hq/synapse/issues/17255)) +- Improve ratelimiting in Synapse (#17256). ([\#17256](https://github.com/element-hq/synapse/issues/17256)) +- Add support for the unstable [MSC4151](https://github.com/matrix-org/matrix-spec-proposals/pull/4151) report room API. ([\#17270](https://github.com/element-hq/synapse/issues/17270), [\#17296](https://github.com/element-hq/synapse/issues/17296)) +- Filter for public and empty rooms added to Admin-API [List Room API](https://element-hq.github.io/synapse/latest/admin_api/rooms.html#list-room-api). ([\#17276](https://github.com/element-hq/synapse/issues/17276)) +- Add `is_dm` filtering to experimental [MSC3575](https://github.com/matrix-org/matrix-spec-proposals/pull/3575) Sliding Sync `/sync` endpoint. ([\#17277](https://github.com/element-hq/synapse/issues/17277)) +- Add `is_encrypted` filtering to experimental [MSC3575](https://github.com/matrix-org/matrix-spec-proposals/pull/3575) Sliding Sync `/sync` endpoint. ([\#17281](https://github.com/element-hq/synapse/issues/17281)) +- Include user membership in events served to clients, per MSC4115. ([\#17282](https://github.com/element-hq/synapse/issues/17282)) +- Do not require user-interactive authentication for uploading cross-signing keys for the first time, per MSC3967. ([\#17284](https://github.com/element-hq/synapse/issues/17284)) +- Add `stream_ordering` sort to experimental [MSC3575](https://github.com/matrix-org/matrix-spec-proposals/pull/3575) Sliding Sync `/sync` endpoint. ([\#17293](https://github.com/element-hq/synapse/issues/17293)) +- `register_new_matrix_user` now supports a --password-file flag, which + is useful for scripting. ([\#17294](https://github.com/element-hq/synapse/issues/17294)) +- `register_new_matrix_user` now supports a --exists-ok flag to allow registration of users that already exist in the database. + This is useful for scripts that bootstrap user accounts with initial passwords. ([\#17304](https://github.com/element-hq/synapse/issues/17304)) +- Add support for via query parameter from MSC415. ([\#17322](https://github.com/element-hq/synapse/issues/17322)) +- Add `is_invite` filtering to experimental [MSC3575](https://github.com/matrix-org/matrix-spec-proposals/pull/3575) Sliding Sync `/sync` endpoint. ([\#17335](https://github.com/element-hq/synapse/issues/17335)) +- Support [MSC3916](https://github.com/matrix-org/matrix-spec-proposals/blob/rav/authentication-for-media/proposals/3916-authentication-for-media.md) + by adding a federation /download endpoint. ([\#17350](https://github.com/element-hq/synapse/issues/17350)) + +### Bugfixes + +- Fix searching for users with their exact localpart whose ID includes a hyphen. ([\#17254](https://github.com/element-hq/synapse/issues/17254)) +- Fix wrong retention policy being used when filtering events. ([\#17272](https://github.com/element-hq/synapse/issues/17272)) +- Fix bug where OTKs were not always included in `/sync` response when using workers. ([\#17275](https://github.com/element-hq/synapse/issues/17275)) +- Fix a long-standing bug where an invalid 'from' parameter to [`/notifications`](https://spec.matrix.org/v1.10/client-server-api/#get_matrixclientv3notifications) would result in an Internal Server Error. ([\#17283](https://github.com/element-hq/synapse/issues/17283)) +- Fix edge case in `/sync` returning the wrong the state when using sharded event persisters. ([\#17295](https://github.com/element-hq/synapse/issues/17295)) +- Add initial implementation of an experimental [MSC3575](https://github.com/matrix-org/matrix-spec-proposals/pull/3575) Sliding Sync `/sync` endpoint. ([\#17301](https://github.com/element-hq/synapse/issues/17301)) +- Fix email notification subject when invited to a space. ([\#17336](https://github.com/element-hq/synapse/issues/17336)) + +### Improved Documentation + +- Add missing quotes for example for `exclude_rooms_from_sync`. ([\#17308](https://github.com/element-hq/synapse/issues/17308)) +- Update header in the README to visually fix the the auto-generated table of contents. ([\#17329](https://github.com/element-hq/synapse/issues/17329)) +- Fix stale references to the Foundation's Security Disclosure Policy. ([\#17341](https://github.com/element-hq/synapse/issues/17341)) +- Add default values for `rc_invites.per_issuer` to docs. ([\#17347](https://github.com/element-hq/synapse/issues/17347)) +- Fix an error in the docs for `search_all_users` parameter under `user_directory`. ([\#17348](https://github.com/element-hq/synapse/issues/17348)) + +### Internal Changes + +- Remove unused `expire_access_token` option in the Synapse Docker config file. Contributed by @AaronDewes. ([\#17198](https://github.com/element-hq/synapse/issues/17198)) +- Use fully-qualified `PersistedEventPosition` when returning `RoomsForUser` to facilitate proper comparisons and `RoomStreamToken` generation. ([\#17265](https://github.com/element-hq/synapse/issues/17265)) +- Add debug logging for when room keys are uploaded, including whether they are replacing other room keys. ([\#17266](https://github.com/element-hq/synapse/issues/17266)) +- Handle OTK uploads off master. ([\#17271](https://github.com/element-hq/synapse/issues/17271)) +- Don't try and resync devices for remote users whose servers are marked as down. ([\#17273](https://github.com/element-hq/synapse/issues/17273)) +- Re-organize Pydantic models and types used in handlers. ([\#17279](https://github.com/element-hq/synapse/issues/17279)) +- Bump `mypy` from 1.8.0 to 1.9.0. ([\#17297](https://github.com/element-hq/synapse/issues/17297)) +- Expose the worker instance that persisted the event on `event.internal_metadata.instance_name`. ([\#17300](https://github.com/element-hq/synapse/issues/17300)) +- Update the README with Element branding, improve headers and fix the #synapse:matrix.org support room link rendering. ([\#17324](https://github.com/element-hq/synapse/issues/17324)) +- This is a changelog so tests will run. ([\#17325](https://github.com/element-hq/synapse/issues/17325)) +- Change path of the experimental [MSC3575](https://github.com/matrix-org/matrix-spec-proposals/pull/3575) Sliding Sync implementation to `/org.matrix.simplified_msc3575/sync` since our simplified API is slightly incompatible with what's in the current MSC. ([\#17331](https://github.com/element-hq/synapse/issues/17331)) +- Handle device lists notifications for large accounts more efficiently in worker mode. ([\#17333](https://github.com/element-hq/synapse/issues/17333), [\#17358](https://github.com/element-hq/synapse/issues/17358)) +- Do not block event sending/receiving while calculating large event auth chains. ([\#17338](https://github.com/element-hq/synapse/issues/17338)) +- Tidy up `parse_integer` docs and call sites to reflect the fact that they require non-negative integers by default, and bring `parse_integer_from_args` default in alignment. Contributed by Denis Kasak (@dkasak). ([\#17339](https://github.com/element-hq/synapse/issues/17339)) + + + +### Updates to locked dependencies + +* Bump authlib from 1.3.0 to 1.3.1. ([\#17343](https://github.com/element-hq/synapse/issues/17343)) +* Bump dawidd6/action-download-artifact from 3.1.4 to 5. ([\#17289](https://github.com/element-hq/synapse/issues/17289)) +* Bump dawidd6/action-download-artifact from 5 to 6. ([\#17313](https://github.com/element-hq/synapse/issues/17313)) +* Bump docker/build-push-action from 5 to 6. ([\#17312](https://github.com/element-hq/synapse/issues/17312)) +* Bump jinja2 from 3.1.3 to 3.1.4. ([\#17287](https://github.com/element-hq/synapse/issues/17287)) +* Bump lazy_static from 1.4.0 to 1.5.0. ([\#17355](https://github.com/element-hq/synapse/issues/17355)) +* Bump msgpack from 1.0.7 to 1.0.8. ([\#17317](https://github.com/element-hq/synapse/issues/17317)) +* Bump netaddr from 1.2.1 to 1.3.0. ([\#17353](https://github.com/element-hq/synapse/issues/17353)) +* Bump packaging from 24.0 to 24.1. ([\#17352](https://github.com/element-hq/synapse/issues/17352)) +* Bump phonenumbers from 8.13.37 to 8.13.39. ([\#17315](https://github.com/element-hq/synapse/issues/17315)) +* Bump regex from 1.10.4 to 1.10.5. ([\#17290](https://github.com/element-hq/synapse/issues/17290)) +* Bump requests from 2.31.0 to 2.32.2. ([\#17345](https://github.com/element-hq/synapse/issues/17345)) +* Bump sentry-sdk from 2.1.1 to 2.3.1. ([\#17263](https://github.com/element-hq/synapse/issues/17263)) +* Bump sentry-sdk from 2.3.1 to 2.6.0. ([\#17351](https://github.com/element-hq/synapse/issues/17351)) +* Bump tornado from 6.4 to 6.4.1. ([\#17344](https://github.com/element-hq/synapse/issues/17344)) +* Bump types-jsonschema from 4.21.0.20240311 to 4.22.0.20240610. ([\#17288](https://github.com/element-hq/synapse/issues/17288)) +* Bump types-netaddr from 1.2.0.20240219 to 1.3.0.20240530. ([\#17314](https://github.com/element-hq/synapse/issues/17314)) +* Bump types-pillow from 10.2.0.20240423 to 10.2.0.20240520. ([\#17285](https://github.com/element-hq/synapse/issues/17285)) +* Bump types-pyyaml from 6.0.12.12 to 6.0.12.20240311. ([\#17316](https://github.com/element-hq/synapse/issues/17316)) +* Bump typing-extensions from 4.11.0 to 4.12.2. ([\#17354](https://github.com/element-hq/synapse/issues/17354)) +* Bump urllib3 from 2.0.7 to 2.2.2. ([\#17346](https://github.com/element-hq/synapse/issues/17346)) + # Synapse 1.109.0 (2024-06-18) ### Internal Changes diff --git a/changelog.d/17187.feature b/changelog.d/17187.feature deleted file mode 100644 index 50383cb4a4..0000000000 --- a/changelog.d/17187.feature +++ /dev/null @@ -1 +0,0 @@ -Add initial implementation of an experimental [MSC3575](https://github.com/matrix-org/matrix-spec-proposals/pull/3575) Sliding Sync `/sync` endpoint. diff --git a/changelog.d/17198.misc b/changelog.d/17198.misc deleted file mode 100644 index 8973eb2bac..0000000000 --- a/changelog.d/17198.misc +++ /dev/null @@ -1 +0,0 @@ -Remove unused `expire_access_token` option in the Synapse Docker config file. Contributed by @AaronDewes. \ No newline at end of file diff --git a/changelog.d/17254.bugfix b/changelog.d/17254.bugfix deleted file mode 100644 index b0d61309e2..0000000000 --- a/changelog.d/17254.bugfix +++ /dev/null @@ -1 +0,0 @@ -Fix searching for users with their exact localpart whose ID includes a hyphen. diff --git a/changelog.d/17255.feature b/changelog.d/17255.feature deleted file mode 100644 index 4093de1146..0000000000 --- a/changelog.d/17255.feature +++ /dev/null @@ -1 +0,0 @@ -Add support for [MSC823](https://github.com/matrix-org/matrix-spec-proposals/pull/3823) - Account suspension. \ No newline at end of file diff --git a/changelog.d/17256.feature b/changelog.d/17256.feature deleted file mode 100644 index 6ec4cb7a31..0000000000 --- a/changelog.d/17256.feature +++ /dev/null @@ -1 +0,0 @@ - Improve ratelimiting in Synapse (#17256). \ No newline at end of file diff --git a/changelog.d/17265.misc b/changelog.d/17265.misc deleted file mode 100644 index e6d4d8b4ee..0000000000 --- a/changelog.d/17265.misc +++ /dev/null @@ -1 +0,0 @@ -Use fully-qualified `PersistedEventPosition` when returning `RoomsForUser` to facilitate proper comparisons and `RoomStreamToken` generation. diff --git a/changelog.d/17266.misc b/changelog.d/17266.misc deleted file mode 100644 index ce8c4ab086..0000000000 --- a/changelog.d/17266.misc +++ /dev/null @@ -1 +0,0 @@ -Add debug logging for when room keys are uploaded, including whether they are replacing other room keys. \ No newline at end of file diff --git a/changelog.d/17270.feature b/changelog.d/17270.feature deleted file mode 100644 index 4ea5e7be85..0000000000 --- a/changelog.d/17270.feature +++ /dev/null @@ -1 +0,0 @@ -Add support for the unstable [MSC4151](https://github.com/matrix-org/matrix-spec-proposals/pull/4151) report room API. diff --git a/changelog.d/17271.misc b/changelog.d/17271.misc deleted file mode 100644 index 915d717ad7..0000000000 --- a/changelog.d/17271.misc +++ /dev/null @@ -1 +0,0 @@ -Handle OTK uploads off master. diff --git a/changelog.d/17272.bugfix b/changelog.d/17272.bugfix deleted file mode 100644 index 83e7ca426a..0000000000 --- a/changelog.d/17272.bugfix +++ /dev/null @@ -1 +0,0 @@ -Fix wrong retention policy being used when filtering events. diff --git a/changelog.d/17273.misc b/changelog.d/17273.misc deleted file mode 100644 index 2c1c6bc0d5..0000000000 --- a/changelog.d/17273.misc +++ /dev/null @@ -1 +0,0 @@ -Don't try and resync devices for remote users whose servers are marked as down. diff --git a/changelog.d/17275.bugfix b/changelog.d/17275.bugfix deleted file mode 100644 index eb522bb997..0000000000 --- a/changelog.d/17275.bugfix +++ /dev/null @@ -1 +0,0 @@ -Fix bug where OTKs were not always included in `/sync` response when using workers. diff --git a/changelog.d/17276.feature b/changelog.d/17276.feature deleted file mode 100644 index a1edfae0aa..0000000000 --- a/changelog.d/17276.feature +++ /dev/null @@ -1 +0,0 @@ -Filter for public and empty rooms added to Admin-API [List Room API](https://element-hq.github.io/synapse/latest/admin_api/rooms.html#list-room-api). diff --git a/changelog.d/17277.feature b/changelog.d/17277.feature deleted file mode 100644 index 5c16342c11..0000000000 --- a/changelog.d/17277.feature +++ /dev/null @@ -1 +0,0 @@ -Add `is_dm` filtering to experimental [MSC3575](https://github.com/matrix-org/matrix-spec-proposals/pull/3575) Sliding Sync `/sync` endpoint. diff --git a/changelog.d/17279.misc b/changelog.d/17279.misc deleted file mode 100644 index 2090b11d7f..0000000000 --- a/changelog.d/17279.misc +++ /dev/null @@ -1 +0,0 @@ -Re-organize Pydantic models and types used in handlers. diff --git a/changelog.d/17281.feature b/changelog.d/17281.feature deleted file mode 100644 index fce512692c..0000000000 --- a/changelog.d/17281.feature +++ /dev/null @@ -1 +0,0 @@ -Add `is_encrypted` filtering to experimental [MSC3575](https://github.com/matrix-org/matrix-spec-proposals/pull/3575) Sliding Sync `/sync` endpoint. diff --git a/changelog.d/17282.feature b/changelog.d/17282.feature deleted file mode 100644 index 334709a3a7..0000000000 --- a/changelog.d/17282.feature +++ /dev/null @@ -1 +0,0 @@ -Include user membership in events served to clients, per MSC4115. \ No newline at end of file diff --git a/changelog.d/17283.bugfix b/changelog.d/17283.bugfix deleted file mode 100644 index 98c1f05cc2..0000000000 --- a/changelog.d/17283.bugfix +++ /dev/null @@ -1 +0,0 @@ -Fix a long-standing bug where an invalid 'from' parameter to [`/notifications`](https://spec.matrix.org/v1.10/client-server-api/#get_matrixclientv3notifications) would result in an Internal Server Error. \ No newline at end of file diff --git a/changelog.d/17284.feature b/changelog.d/17284.feature deleted file mode 100644 index 015d925e7c..0000000000 --- a/changelog.d/17284.feature +++ /dev/null @@ -1 +0,0 @@ -Do not require user-interactive authentication for uploading cross-signing keys for the first time, per MSC3967. \ No newline at end of file diff --git a/changelog.d/17293.feature b/changelog.d/17293.feature deleted file mode 100644 index 60ca7721a0..0000000000 --- a/changelog.d/17293.feature +++ /dev/null @@ -1 +0,0 @@ -Add `stream_ordering` sort to experimental [MSC3575](https://github.com/matrix-org/matrix-spec-proposals/pull/3575) Sliding Sync `/sync` endpoint. diff --git a/changelog.d/17294.feature b/changelog.d/17294.feature deleted file mode 100644 index 33aac7b0bc..0000000000 --- a/changelog.d/17294.feature +++ /dev/null @@ -1,2 +0,0 @@ -`register_new_matrix_user` now supports a --password-file flag, which -is useful for scripting. diff --git a/changelog.d/17295.bugfix b/changelog.d/17295.bugfix deleted file mode 100644 index 4484253bb8..0000000000 --- a/changelog.d/17295.bugfix +++ /dev/null @@ -1 +0,0 @@ -Fix edge case in `/sync` returning the wrong the state when using sharded event persisters. diff --git a/changelog.d/17296.feature b/changelog.d/17296.feature deleted file mode 100644 index 4ea5e7be85..0000000000 --- a/changelog.d/17296.feature +++ /dev/null @@ -1 +0,0 @@ -Add support for the unstable [MSC4151](https://github.com/matrix-org/matrix-spec-proposals/pull/4151) report room API. diff --git a/changelog.d/17297.misc b/changelog.d/17297.misc deleted file mode 100644 index 7ec351d2c1..0000000000 --- a/changelog.d/17297.misc +++ /dev/null @@ -1 +0,0 @@ -Bump `mypy` from 1.8.0 to 1.9.0. \ No newline at end of file diff --git a/changelog.d/17300.misc b/changelog.d/17300.misc deleted file mode 100644 index cdc40bb2e5..0000000000 --- a/changelog.d/17300.misc +++ /dev/null @@ -1 +0,0 @@ -Expose the worker instance that persisted the event on `event.internal_metadata.instance_name`. diff --git a/changelog.d/17301.bugfix b/changelog.d/17301.bugfix deleted file mode 100644 index 50383cb4a4..0000000000 --- a/changelog.d/17301.bugfix +++ /dev/null @@ -1 +0,0 @@ -Add initial implementation of an experimental [MSC3575](https://github.com/matrix-org/matrix-spec-proposals/pull/3575) Sliding Sync `/sync` endpoint. diff --git a/changelog.d/17304.feature b/changelog.d/17304.feature deleted file mode 100644 index a969d8bf58..0000000000 --- a/changelog.d/17304.feature +++ /dev/null @@ -1,2 +0,0 @@ -`register_new_matrix_user` now supports a --exists-ok flag to allow registration of users that already exist in the database. -This is useful for scripts that bootstrap user accounts with initial passwords. diff --git a/changelog.d/17308.doc b/changelog.d/17308.doc deleted file mode 100644 index 7ae080a684..0000000000 --- a/changelog.d/17308.doc +++ /dev/null @@ -1 +0,0 @@ -Add missing quotes for example for `exclude_rooms_from_sync`. diff --git a/changelog.d/17322.feature b/changelog.d/17322.feature deleted file mode 100644 index 85386c2df7..0000000000 --- a/changelog.d/17322.feature +++ /dev/null @@ -1 +0,0 @@ -Add support for via query parameter from MSC415. diff --git a/changelog.d/17324.misc b/changelog.d/17324.misc deleted file mode 100644 index c0d7196ee0..0000000000 --- a/changelog.d/17324.misc +++ /dev/null @@ -1 +0,0 @@ -Update the README with Element branding, improve headers and fix the #synapse:matrix.org support room link rendering. \ No newline at end of file diff --git a/changelog.d/17325.misc b/changelog.d/17325.misc deleted file mode 100644 index 1a4ce7ceec..0000000000 --- a/changelog.d/17325.misc +++ /dev/null @@ -1 +0,0 @@ -This is a changelog so tests will run. \ No newline at end of file diff --git a/changelog.d/17329.doc b/changelog.d/17329.doc deleted file mode 100644 index 2486256dad..0000000000 --- a/changelog.d/17329.doc +++ /dev/null @@ -1 +0,0 @@ -Update header in the README to visually fix the the auto-generated table of contents. \ No newline at end of file diff --git a/changelog.d/17331.misc b/changelog.d/17331.misc deleted file mode 100644 index 79d3f33996..0000000000 --- a/changelog.d/17331.misc +++ /dev/null @@ -1 +0,0 @@ -Change path of the experimental [MSC3575](https://github.com/matrix-org/matrix-spec-proposals/pull/3575) Sliding Sync implementation to `/org.matrix.simplified_msc3575/sync` since our simplified API is slightly incompatible with what's in the current MSC. diff --git a/changelog.d/17333.misc b/changelog.d/17333.misc deleted file mode 100644 index d3ef0b3777..0000000000 --- a/changelog.d/17333.misc +++ /dev/null @@ -1 +0,0 @@ -Handle device lists notifications for large accounts more efficiently in worker mode. diff --git a/changelog.d/17335.feature b/changelog.d/17335.feature deleted file mode 100644 index c6beed42ed..0000000000 --- a/changelog.d/17335.feature +++ /dev/null @@ -1 +0,0 @@ -Add `is_invite` filtering to experimental [MSC3575](https://github.com/matrix-org/matrix-spec-proposals/pull/3575) Sliding Sync `/sync` endpoint. diff --git a/changelog.d/17336.bugfix b/changelog.d/17336.bugfix deleted file mode 100644 index 618834302e..0000000000 --- a/changelog.d/17336.bugfix +++ /dev/null @@ -1 +0,0 @@ -Fix email notification subject when invited to a space. diff --git a/changelog.d/17338.misc b/changelog.d/17338.misc deleted file mode 100644 index 1a81bdef85..0000000000 --- a/changelog.d/17338.misc +++ /dev/null @@ -1 +0,0 @@ -Do not block event sending/receiving while calculating large event auth chains. diff --git a/changelog.d/17339.misc b/changelog.d/17339.misc deleted file mode 100644 index 1d7cb96c8b..0000000000 --- a/changelog.d/17339.misc +++ /dev/null @@ -1 +0,0 @@ -Tidy up `parse_integer` docs and call sites to reflect the fact that they require non-negative integers by default, and bring `parse_integer_from_args` default in alignment. Contributed by Denis Kasak (@dkasak). diff --git a/changelog.d/17341.doc b/changelog.d/17341.doc deleted file mode 100644 index 353c8adbe8..0000000000 --- a/changelog.d/17341.doc +++ /dev/null @@ -1 +0,0 @@ -Fix stale references to the Foundation's Security Disclosure Policy. diff --git a/changelog.d/17347.doc b/changelog.d/17347.doc deleted file mode 100644 index 6cd41be60f..0000000000 --- a/changelog.d/17347.doc +++ /dev/null @@ -1 +0,0 @@ -Add default values for `rc_invites.per_issuer` to docs. diff --git a/changelog.d/17348.doc b/changelog.d/17348.doc deleted file mode 100644 index 4ce42bbadb..0000000000 --- a/changelog.d/17348.doc +++ /dev/null @@ -1 +0,0 @@ -Fix an error in the docs for `search_all_users` parameter under `user_directory`. diff --git a/changelog.d/17350.feature b/changelog.d/17350.feature deleted file mode 100644 index 709366f5b8..0000000000 --- a/changelog.d/17350.feature +++ /dev/null @@ -1,2 +0,0 @@ -Support [MSC3916](https://github.com/matrix-org/matrix-spec-proposals/blob/rav/authentication-for-media/proposals/3916-authentication-for-media.md) -by adding a federation /download endpoint. \ No newline at end of file diff --git a/changelog.d/17358.misc b/changelog.d/17358.misc deleted file mode 100644 index d3ef0b3777..0000000000 --- a/changelog.d/17358.misc +++ /dev/null @@ -1 +0,0 @@ -Handle device lists notifications for large accounts more efficiently in worker mode. diff --git a/debian/changelog b/debian/changelog index 731eacf20f..ceef366a3f 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,8 +1,9 @@ -matrix-synapse-py3 (1.109.0+nmu1) UNRELEASED; urgency=medium +matrix-synapse-py3 (1.110.0~rc1) stable; urgency=medium * `register_new_matrix_user` now supports a --password-file and a --exists-ok flag. + * New Synapse release 1.110.0rc1. - -- Synapse Packaging team Tue, 18 Jun 2024 13:29:36 +0100 + -- Synapse Packaging team Wed, 26 Jun 2024 14:07:56 +0200 matrix-synapse-py3 (1.109.0) stable; urgency=medium diff --git a/pyproject.toml b/pyproject.toml index 1485016a5a..19998c1acf 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -96,7 +96,7 @@ module-name = "synapse.synapse_rust" [tool.poetry] name = "matrix-synapse" -version = "1.109.0" +version = "1.110.0rc1" description = "Homeserver for the Matrix decentralised comms protocol" authors = ["Matrix.org Team and Contributors "] license = "AGPL-3.0-or-later" From 315b8d20324065f645b38fb0808f9f82109b3127 Mon Sep 17 00:00:00 2001 From: Till Faelligen <2353100+S7evinK@users.noreply.github.com> Date: Wed, 26 Jun 2024 14:39:57 +0200 Subject: [PATCH 48/84] Update changelog --- CHANGES.md | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index e2c8cc937c..3171ac32ff 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -4,22 +4,21 @@ - Add initial implementation of an experimental [MSC3575](https://github.com/matrix-org/matrix-spec-proposals/pull/3575) Sliding Sync `/sync` endpoint. ([\#17187](https://github.com/element-hq/synapse/issues/17187)) - Add support for [MSC823](https://github.com/matrix-org/matrix-spec-proposals/pull/3823) - Account suspension. ([\#17255](https://github.com/element-hq/synapse/issues/17255)) -- Improve ratelimiting in Synapse (#17256). ([\#17256](https://github.com/element-hq/synapse/issues/17256)) +- Improve ratelimiting in Synapse. ([\#17256](https://github.com/element-hq/synapse/issues/17256)) - Add support for the unstable [MSC4151](https://github.com/matrix-org/matrix-spec-proposals/pull/4151) report room API. ([\#17270](https://github.com/element-hq/synapse/issues/17270), [\#17296](https://github.com/element-hq/synapse/issues/17296)) - Filter for public and empty rooms added to Admin-API [List Room API](https://element-hq.github.io/synapse/latest/admin_api/rooms.html#list-room-api). ([\#17276](https://github.com/element-hq/synapse/issues/17276)) - Add `is_dm` filtering to experimental [MSC3575](https://github.com/matrix-org/matrix-spec-proposals/pull/3575) Sliding Sync `/sync` endpoint. ([\#17277](https://github.com/element-hq/synapse/issues/17277)) - Add `is_encrypted` filtering to experimental [MSC3575](https://github.com/matrix-org/matrix-spec-proposals/pull/3575) Sliding Sync `/sync` endpoint. ([\#17281](https://github.com/element-hq/synapse/issues/17281)) -- Include user membership in events served to clients, per MSC4115. ([\#17282](https://github.com/element-hq/synapse/issues/17282)) -- Do not require user-interactive authentication for uploading cross-signing keys for the first time, per MSC3967. ([\#17284](https://github.com/element-hq/synapse/issues/17284)) +- Include user membership in events served to clients, per [MSC4115](https://github.com/matrix-org/matrix-spec-proposals/pull/4115). ([\#17282](https://github.com/element-hq/synapse/issues/17282)) +- Do not require user-interactive authentication for uploading cross-signing keys for the first time, per [MSC3967](https://github.com/matrix-org/matrix-spec-proposals/pull/3967). ([\#17284](https://github.com/element-hq/synapse/issues/17284)) - Add `stream_ordering` sort to experimental [MSC3575](https://github.com/matrix-org/matrix-spec-proposals/pull/3575) Sliding Sync `/sync` endpoint. ([\#17293](https://github.com/element-hq/synapse/issues/17293)) - `register_new_matrix_user` now supports a --password-file flag, which is useful for scripting. ([\#17294](https://github.com/element-hq/synapse/issues/17294)) - `register_new_matrix_user` now supports a --exists-ok flag to allow registration of users that already exist in the database. This is useful for scripts that bootstrap user accounts with initial passwords. ([\#17304](https://github.com/element-hq/synapse/issues/17304)) -- Add support for via query parameter from MSC415. ([\#17322](https://github.com/element-hq/synapse/issues/17322)) +- Add support for via query parameter from [MSC4156](https://github.com/matrix-org/matrix-spec-proposals/pull/4156). ([\#17322](https://github.com/element-hq/synapse/issues/17322)) - Add `is_invite` filtering to experimental [MSC3575](https://github.com/matrix-org/matrix-spec-proposals/pull/3575) Sliding Sync `/sync` endpoint. ([\#17335](https://github.com/element-hq/synapse/issues/17335)) -- Support [MSC3916](https://github.com/matrix-org/matrix-spec-proposals/blob/rav/authentication-for-media/proposals/3916-authentication-for-media.md) - by adding a federation /download endpoint. ([\#17350](https://github.com/element-hq/synapse/issues/17350)) +- Support [MSC3916](https://github.com/matrix-org/matrix-spec-proposals/blob/rav/authentication-for-media/proposals/3916-authentication-for-media.md) by adding a federation /download endpoint. ([\#17350](https://github.com/element-hq/synapse/issues/17350)) ### Bugfixes @@ -47,10 +46,8 @@ - Handle OTK uploads off master. ([\#17271](https://github.com/element-hq/synapse/issues/17271)) - Don't try and resync devices for remote users whose servers are marked as down. ([\#17273](https://github.com/element-hq/synapse/issues/17273)) - Re-organize Pydantic models and types used in handlers. ([\#17279](https://github.com/element-hq/synapse/issues/17279)) -- Bump `mypy` from 1.8.0 to 1.9.0. ([\#17297](https://github.com/element-hq/synapse/issues/17297)) - Expose the worker instance that persisted the event on `event.internal_metadata.instance_name`. ([\#17300](https://github.com/element-hq/synapse/issues/17300)) - Update the README with Element branding, improve headers and fix the #synapse:matrix.org support room link rendering. ([\#17324](https://github.com/element-hq/synapse/issues/17324)) -- This is a changelog so tests will run. ([\#17325](https://github.com/element-hq/synapse/issues/17325)) - Change path of the experimental [MSC3575](https://github.com/matrix-org/matrix-spec-proposals/pull/3575) Sliding Sync implementation to `/org.matrix.simplified_msc3575/sync` since our simplified API is slightly incompatible with what's in the current MSC. ([\#17331](https://github.com/element-hq/synapse/issues/17331)) - Handle device lists notifications for large accounts more efficiently in worker mode. ([\#17333](https://github.com/element-hq/synapse/issues/17333), [\#17358](https://github.com/element-hq/synapse/issues/17358)) - Do not block event sending/receiving while calculating large event auth chains. ([\#17338](https://github.com/element-hq/synapse/issues/17338)) @@ -75,6 +72,7 @@ * Bump sentry-sdk from 2.1.1 to 2.3.1. ([\#17263](https://github.com/element-hq/synapse/issues/17263)) * Bump sentry-sdk from 2.3.1 to 2.6.0. ([\#17351](https://github.com/element-hq/synapse/issues/17351)) * Bump tornado from 6.4 to 6.4.1. ([\#17344](https://github.com/element-hq/synapse/issues/17344)) +* Bump mypy from 1.8.0 to 1.9.0. ([\#17297](https://github.com/element-hq/synapse/issues/17297)) * Bump types-jsonschema from 4.21.0.20240311 to 4.22.0.20240610. ([\#17288](https://github.com/element-hq/synapse/issues/17288)) * Bump types-netaddr from 1.2.0.20240219 to 1.3.0.20240530. ([\#17314](https://github.com/element-hq/synapse/issues/17314)) * Bump types-pillow from 10.2.0.20240423 to 10.2.0.20240520. ([\#17285](https://github.com/element-hq/synapse/issues/17285)) From a8dcd686fb7d4744b3364f35f07c9b5ce2b895b9 Mon Sep 17 00:00:00 2001 From: Till Faelligen <2353100+S7evinK@users.noreply.github.com> Date: Wed, 26 Jun 2024 15:10:49 +0200 Subject: [PATCH 49/84] Fix typo --- CHANGES.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGES.md b/CHANGES.md index 3171ac32ff..09722ca3db 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -3,7 +3,7 @@ ### Features - Add initial implementation of an experimental [MSC3575](https://github.com/matrix-org/matrix-spec-proposals/pull/3575) Sliding Sync `/sync` endpoint. ([\#17187](https://github.com/element-hq/synapse/issues/17187)) -- Add support for [MSC823](https://github.com/matrix-org/matrix-spec-proposals/pull/3823) - Account suspension. ([\#17255](https://github.com/element-hq/synapse/issues/17255)) +- Add experimental support for [MSC3823](https://github.com/matrix-org/matrix-spec-proposals/pull/3823) - Account suspension. ([\#17255](https://github.com/element-hq/synapse/issues/17255)) - Improve ratelimiting in Synapse. ([\#17256](https://github.com/element-hq/synapse/issues/17256)) - Add support for the unstable [MSC4151](https://github.com/matrix-org/matrix-spec-proposals/pull/4151) report room API. ([\#17270](https://github.com/element-hq/synapse/issues/17270), [\#17296](https://github.com/element-hq/synapse/issues/17296)) - Filter for public and empty rooms added to Admin-API [List Room API](https://element-hq.github.io/synapse/latest/admin_api/rooms.html#list-room-api). ([\#17276](https://github.com/element-hq/synapse/issues/17276)) From b924a8e1a9c3772abf1d19602cfc52591bc6bc62 Mon Sep 17 00:00:00 2001 From: Till <2353100+S7evinK@users.noreply.github.com> Date: Wed, 26 Jun 2024 18:01:39 +0200 Subject: [PATCH 50/84] Fix uploading packages to PyPi (#17363) As per https://github.com/sphinx-doc/sphinx/issues/3921#issuecomment-315581557, we need double underscores. Running `rst2html README.rst > /dev/null` found some more warnings. --- README.rst | 14 +++++++------- changelog.d/17363.misc | 1 + 2 files changed, 8 insertions(+), 7 deletions(-) create mode 100644 changelog.d/17363.misc diff --git a/README.rst b/README.rst index 145315a7fe..9ecb6b5816 100644 --- a/README.rst +++ b/README.rst @@ -5,9 +5,9 @@ |support| |development| |documentation| |license| |pypi| |python| -Synapse is an open source `Matrix `_ homeserver +Synapse is an open source `Matrix `__ homeserver implementation, written and maintained by `Element `_. -`Matrix `_ is the open standard for +`Matrix `__ is the open standard for secure and interoperable real time communications. You can directly run and manage the source code in this repository, available under an AGPL license. There is no support provided from Element unless you have a @@ -119,7 +119,7 @@ impact to other applications will be minimal. 🧪 Testing a new installation -============================ +============================= The easiest way to try out your new Synapse installation is by connecting to it from a web client. @@ -173,10 +173,10 @@ As when logging in, you will need to specify a "Custom server". Specify your desired ``localpart`` in the 'User name' box. 🎯 Troubleshooting and support -============================= +============================== 🚀 Professional support ----------------------- +----------------------- Enterprise quality support for Synapse including SLAs is available as part of an `Element Server Suite (ESS) ` subscription. @@ -185,7 +185,7 @@ If you are an existing ESS subscriber then you can raise a `support request `. 🤝 Community support -------------------- +-------------------- The `Admin FAQ `_ includes tips on dealing with some common problems. For more details, see @@ -202,7 +202,7 @@ issues for support requests, only for bug reports and feature requests. .. _docs: docs 🪪 Identity Servers -================== +=================== Identity servers have the job of mapping email addresses and other 3rd Party IDs (3PIDs) to Matrix user IDs, as well as verifying the ownership of 3PIDs diff --git a/changelog.d/17363.misc b/changelog.d/17363.misc new file mode 100644 index 0000000000..555e2225ba --- /dev/null +++ b/changelog.d/17363.misc @@ -0,0 +1 @@ +Fix uploading packages to PyPi. \ No newline at end of file From 75c19bf57ac4c7623cff70d672dc8ad82238bfb8 Mon Sep 17 00:00:00 2001 From: Till <2353100+S7evinK@users.noreply.github.com> Date: Wed, 26 Jun 2024 18:01:39 +0200 Subject: [PATCH 51/84] Fix uploading packages to PyPi (#17363) As per https://github.com/sphinx-doc/sphinx/issues/3921#issuecomment-315581557, we need double underscores. Running `rst2html README.rst > /dev/null` found some more warnings. --- README.rst | 14 +++++++------- changelog.d/17363.misc | 1 + 2 files changed, 8 insertions(+), 7 deletions(-) create mode 100644 changelog.d/17363.misc diff --git a/README.rst b/README.rst index 145315a7fe..9ecb6b5816 100644 --- a/README.rst +++ b/README.rst @@ -5,9 +5,9 @@ |support| |development| |documentation| |license| |pypi| |python| -Synapse is an open source `Matrix `_ homeserver +Synapse is an open source `Matrix `__ homeserver implementation, written and maintained by `Element `_. -`Matrix `_ is the open standard for +`Matrix `__ is the open standard for secure and interoperable real time communications. You can directly run and manage the source code in this repository, available under an AGPL license. There is no support provided from Element unless you have a @@ -119,7 +119,7 @@ impact to other applications will be minimal. 🧪 Testing a new installation -============================ +============================= The easiest way to try out your new Synapse installation is by connecting to it from a web client. @@ -173,10 +173,10 @@ As when logging in, you will need to specify a "Custom server". Specify your desired ``localpart`` in the 'User name' box. 🎯 Troubleshooting and support -============================= +============================== 🚀 Professional support ----------------------- +----------------------- Enterprise quality support for Synapse including SLAs is available as part of an `Element Server Suite (ESS) ` subscription. @@ -185,7 +185,7 @@ If you are an existing ESS subscriber then you can raise a `support request `. 🤝 Community support -------------------- +-------------------- The `Admin FAQ `_ includes tips on dealing with some common problems. For more details, see @@ -202,7 +202,7 @@ issues for support requests, only for bug reports and feature requests. .. _docs: docs 🪪 Identity Servers -================== +=================== Identity servers have the job of mapping email addresses and other 3rd Party IDs (3PIDs) to Matrix user IDs, as well as verifying the ownership of 3PIDs diff --git a/changelog.d/17363.misc b/changelog.d/17363.misc new file mode 100644 index 0000000000..555e2225ba --- /dev/null +++ b/changelog.d/17363.misc @@ -0,0 +1 @@ +Fix uploading packages to PyPi. \ No newline at end of file From f75da9cc53968cccf73b44834c9f0a619730ab09 Mon Sep 17 00:00:00 2001 From: Till Faelligen <2353100+S7evinK@users.noreply.github.com> Date: Wed, 26 Jun 2024 18:14:57 +0200 Subject: [PATCH 52/84] 1.110.0rc2 --- CHANGES.md | 9 +++++++++ changelog.d/17363.misc | 1 - debian/changelog | 6 ++++++ pyproject.toml | 2 +- 4 files changed, 16 insertions(+), 2 deletions(-) delete mode 100644 changelog.d/17363.misc diff --git a/CHANGES.md b/CHANGES.md index 09722ca3db..94f187b951 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,3 +1,12 @@ +# Synapse 1.110.0rc2 (2024-06-26) + +### Internal Changes + +- Fix uploading packages to PyPi. ([\#17363](https://github.com/element-hq/synapse/issues/17363)) + + + + # Synapse 1.110.0rc1 (2024-06-26) ### Features diff --git a/changelog.d/17363.misc b/changelog.d/17363.misc deleted file mode 100644 index 555e2225ba..0000000000 --- a/changelog.d/17363.misc +++ /dev/null @@ -1 +0,0 @@ -Fix uploading packages to PyPi. \ No newline at end of file diff --git a/debian/changelog b/debian/changelog index ceef366a3f..59aa841650 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,9 @@ +matrix-synapse-py3 (1.110.0~rc2) stable; urgency=medium + + * New Synapse release 1.110.0rc2. + + -- Synapse Packaging team Wed, 26 Jun 2024 18:14:48 +0200 + matrix-synapse-py3 (1.110.0~rc1) stable; urgency=medium * `register_new_matrix_user` now supports a --password-file and a --exists-ok flag. diff --git a/pyproject.toml b/pyproject.toml index 19998c1acf..bbf9c78420 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -96,7 +96,7 @@ module-name = "synapse.synapse_rust" [tool.poetry] name = "matrix-synapse" -version = "1.110.0rc1" +version = "1.110.0rc2" description = "Homeserver for the Matrix decentralised comms protocol" authors = ["Matrix.org Team and Contributors "] license = "AGPL-3.0-or-later" From 2bf39231ede3a9bcad65ad3f1321e788acfdcd15 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 26 Jun 2024 18:40:36 -0500 Subject: [PATCH 53/84] Add some tests for `get_current_state_delta_membership_changes_for_user(...)` --- synapse/storage/databases/main/stream.py | 14 +- tests/storage/test_stream.py | 515 +++++++++++++++++++++++ 2 files changed, 523 insertions(+), 6 deletions(-) diff --git a/synapse/storage/databases/main/stream.py b/synapse/storage/databases/main/stream.py index f6be97698e..e222f36bab 100644 --- a/synapse/storage/databases/main/stream.py +++ b/synapse/storage/databases/main/stream.py @@ -63,7 +63,7 @@ from typing_extensions import Literal from twisted.internet import defer -from synapse.api.constants import Direction +from synapse.api.constants import Direction, EventTypes from synapse.api.filtering import Filter from synapse.events import EventBase from synapse.logging.context import make_deferred_yieldable, run_in_background @@ -807,7 +807,7 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): min_from_id = from_key.stream max_to_id = to_key.get_max_stream_pos() - args: List[Any] = [user_id, min_from_id, max_to_id] + args: List[Any] = [min_from_id, max_to_id, user_id, EventTypes.Member] # TODO: It would be good to assert that the `from_token`/`to_token` is >= # the first row in `current_state_delta_stream` for the rooms we're @@ -824,16 +824,18 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): e.event_id, s.prev_event_id, s.room_id, - s.instance_name, - s.stream_id, + e.instance_name, + e.stream_ordering, e.topological_ordering, m.membership, e.sender FROM current_state_delta_stream AS s INNER JOIN events AS e ON e.stream_ordering = s.stream_id INNER JOIN room_memberships AS m ON m.event_stream_ordering = s.stream_id - WHERE m.user_id = ? - AND s.stream_id > ? AND s.stream_id <= ? + WHERE s.stream_id > ? AND s.stream_id <= ? + AND m.user_id = ? + AND s.state_key = m.user_id + AND s.type = ? ORDER BY s.stream_id ASC """ diff --git a/tests/storage/test_stream.py b/tests/storage/test_stream.py index fe1e873e15..64f123987a 100644 --- a/tests/storage/test_stream.py +++ b/tests/storage/test_stream.py @@ -28,9 +28,12 @@ from twisted.test.proto_helpers import MemoryReactor from synapse.api.constants import Direction, EventTypes, RelationTypes from synapse.api.filtering import Filter +from synapse.api.room_versions import RoomVersions +from synapse.events import make_event_from_dict from synapse.rest import admin from synapse.rest.client import login, room from synapse.server import HomeServer +from synapse.storage.databases.main.stream import CurrentStateDeltaMembership from synapse.types import JsonDict, PersistedEventPosition, RoomStreamToken from synapse.util import Clock @@ -543,3 +546,515 @@ class GetLastEventInRoomBeforeStreamOrderingTestCase(HomeserverTestCase): } ), ) + + +class GetCurrentStateDeltaMembershipChangesForUserTestCase(HomeserverTestCase): + """ + Test `get_current_state_delta_membership_changes_for_user(...)` + """ + + servlets = [ + admin.register_servlets, + room.register_servlets, + login.register_servlets, + ] + + def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None: + self.store = hs.get_datastores().main + self.event_sources = hs.get_event_sources() + self.state_handler = self.hs.get_state_handler() + persistence = hs.get_storage_controllers().persistence + assert persistence is not None + self.persistence = persistence + + def test_returns_membership_events(self) -> None: + """ + A basic test that a membership event in the token range is returned for the user. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + + before_room1_token = self.event_sources.get_current_token() + + room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok) + join_response = self.helper.join(room_id1, user1_id, tok=user1_tok) + join_pos = self.get_success( + self.store.get_position_for_event(join_response["event_id"]) + ) + + after_room1_token = self.event_sources.get_current_token() + + membership_changes = self.get_success( + self.store.get_current_state_delta_membership_changes_for_user( + user1_id, + from_key=before_room1_token.room_key, + to_key=after_room1_token.room_key, + ) + ) + + # Let the whole diff show on failure + self.maxDiff = None + self.assertEqual( + membership_changes, + [ + CurrentStateDeltaMembership( + event_id=join_response["event_id"], + event_pos=join_pos, + prev_event_id=None, + room_id=room_id1, + membership="join", + sender=user1_id, + ) + ], + ) + + def test_server_left_after_us_room(self) -> None: + """ + Test that when probing over part of the DAG where the server left the room *after + us*, we still see the join and leave changes. + + This is to make sure we play nicely with this behavior: When the server leaves a + room, it will insert new rows with `event_id = null` into the + `current_state_delta_stream` table for all current state. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + + before_room1_token = self.event_sources.get_current_token() + + room_id1 = self.helper.create_room_as( + user2_id, + tok=user2_tok, + extra_content={ + "power_level_content_override": { + "users": { + user2_id: 100, + # Allow user1 to send state in the room + user1_id: 100, + } + } + }, + ) + join_response1 = self.helper.join(room_id1, user1_id, tok=user1_tok) + join_pos1 = self.get_success( + self.store.get_position_for_event(join_response1["event_id"]) + ) + # Make sure random other non-member state that happens to have a state_key + # matching the user ID doesn't mess with things. + self.helper.send_state( + room_id1, + event_type="foobarbazdummy", + state_key=user1_id, + body={"foo": "bar"}, + tok=user1_tok, + ) + # User1 should leave the room first + leave_response1 = self.helper.leave(room_id1, user1_id, tok=user1_tok) + leave_pos1 = self.get_success( + self.store.get_position_for_event(leave_response1["event_id"]) + ) + + # User2 should also leave the room (everyone has left the room which means the + # server is no longer in the room). + self.helper.leave(room_id1, user2_id, tok=user2_tok) + + after_room1_token = self.event_sources.get_current_token() + + membership_changes = self.get_success( + self.store.get_current_state_delta_membership_changes_for_user( + user1_id, + from_key=before_room1_token.room_key, + to_key=after_room1_token.room_key, + ) + ) + + # Let the whole diff show on failure + self.maxDiff = None + self.assertEqual( + membership_changes, + [ + CurrentStateDeltaMembership( + event_id=join_response1["event_id"], + event_pos=join_pos1, + prev_event_id=None, + room_id=room_id1, + membership="join", + sender=user1_id, + ), + CurrentStateDeltaMembership( + event_id=leave_response1["event_id"], + event_pos=leave_pos1, + prev_event_id=join_response1["event_id"], + room_id=room_id1, + membership="leave", + sender=user1_id, + ), + ], + ) + + def test_server_left_room(self) -> None: + """ + Test that when probing over part of the DAG where we leave the room causing the + server to leave the room (because we were the last local user in the room), we + still see the join and leave changes. + + This is to make sure we play nicely with this behavior: When the server leaves a + room, it will insert new rows with `event_id = null` into the + `current_state_delta_stream` table for all current state. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + + before_room1_token = self.event_sources.get_current_token() + + room_id1 = self.helper.create_room_as( + user2_id, + tok=user2_tok, + extra_content={ + "power_level_content_override": { + "users": { + user2_id: 100, + # Allow user1 to send state in the room + user1_id: 100, + } + } + }, + ) + join_response1 = self.helper.join(room_id1, user1_id, tok=user1_tok) + join_pos1 = self.get_success( + self.store.get_position_for_event(join_response1["event_id"]) + ) + # Make sure random other non-member state that happens to have a state_key + # matching the user ID doesn't mess with things. + self.helper.send_state( + room_id1, + event_type="foobarbazdummy", + state_key=user1_id, + body={"foo": "bar"}, + tok=user1_tok, + ) + + # User2 should leave the room first. + self.helper.leave(room_id1, user2_id, tok=user2_tok) + + # User1 (the person we're testing with) should also leave the room (everyone has + # left the room which means the server is no longer in the room). + leave_response1 = self.helper.leave(room_id1, user1_id, tok=user1_tok) + leave_pos1 = self.get_success( + self.store.get_position_for_event(leave_response1["event_id"]) + ) + + after_room1_token = self.event_sources.get_current_token() + + membership_changes = self.get_success( + self.store.get_current_state_delta_membership_changes_for_user( + user1_id, + from_key=before_room1_token.room_key, + to_key=after_room1_token.room_key, + ) + ) + + # Let the whole diff show on failure + self.maxDiff = None + self.assertEqual( + membership_changes, + [ + CurrentStateDeltaMembership( + event_id=join_response1["event_id"], + event_pos=join_pos1, + prev_event_id=None, + room_id=room_id1, + membership="join", + sender=user1_id, + ), + CurrentStateDeltaMembership( + event_id=leave_response1["event_id"], + event_pos=leave_pos1, + prev_event_id=join_response1["event_id"], + room_id=room_id1, + membership="leave", + sender=user1_id, + ), + ], + ) + + def test_membership_persisted_in_same_batch(self) -> None: + """ + Test batch of membership events being processed at once. This will result in all + of the memberships being stored in the `current_state_delta_stream` table with + the same `stream_ordering` even though the individual events have different + `stream_ordering`s. + """ + user1_id = self.register_user("user1", "pass") + _user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + user3_id = self.register_user("user3", "pass") + _user3_tok = self.login(user3_id, "pass") + user4_id = self.register_user("user4", "pass") + _user4_tok = self.login(user4_id, "pass") + + before_room1_token = self.event_sources.get_current_token() + + # User2 is just the designated person to create the room (we do this across the + # tests to be consistent) + room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok) + + # Persist the user1, user3, and user4 join events in the same batch so they all + # end up in the `current_state_delta_stream` table with the same + # stream_ordering. + join_event1 = make_event_from_dict( + { + "sender": user1_id, + "type": EventTypes.Member, + "state_key": user1_id, + "content": {"membership": "join"}, + "room_id": room_id1, + "depth": 0, + "origin_server_ts": 0, + "prev_events": [], + "auth_events": [], + }, + room_version=RoomVersions.V10, + ) + join_event_context1 = self.get_success( + self.state_handler.compute_event_context(join_event1) + ) + join_event3 = make_event_from_dict( + { + "sender": user3_id, + "type": EventTypes.Member, + "state_key": user3_id, + "content": {"membership": "join"}, + "room_id": room_id1, + "depth": 1, + "origin_server_ts": 1, + "prev_events": [], + "auth_events": [], + }, + room_version=RoomVersions.V10, + ) + join_event_context3 = self.get_success( + self.state_handler.compute_event_context(join_event3) + ) + join_event4 = make_event_from_dict( + { + "sender": user4_id, + "type": EventTypes.Member, + "state_key": user4_id, + "content": {"membership": "join"}, + "room_id": room_id1, + "depth": 2, + "origin_server_ts": 2, + "prev_events": [], + "auth_events": [], + }, + room_version=RoomVersions.V10, + ) + join_event_context4 = self.get_success( + self.state_handler.compute_event_context(join_event4) + ) + self.get_success( + self.persistence.persist_events( + [ + (join_event1, join_event_context1), + (join_event3, join_event_context3), + (join_event4, join_event_context4), + ] + ) + ) + + after_room1_token = self.event_sources.get_current_token() + + # Let's get membership changes from user3's perspective because it was in the + # middle of the batch. This way, if rows in` current_state_delta_stream` are + # stored with the first or last event's `stream_ordering`, we will still catch + # bugs. + membership_changes = self.get_success( + self.store.get_current_state_delta_membership_changes_for_user( + user3_id, + from_key=before_room1_token.room_key, + to_key=after_room1_token.room_key, + ) + ) + + join_pos3 = self.get_success( + self.store.get_position_for_event(join_event3.event_id) + ) + + # Let the whole diff show on failure + self.maxDiff = None + self.assertEqual( + membership_changes, + [ + CurrentStateDeltaMembership( + event_id=join_event3.event_id, + event_pos=join_pos3, + prev_event_id=None, + room_id=room_id1, + membership="join", + sender=user1_id, + ), + ], + ) + + # TODO: Test remote join where the first rows will just be the state when you joined + + # TODO: Test state reset where the user gets removed from the room (when there is no + # corresponding leave event) + + def test_excluded_room_ids(self) -> None: + """ + Test that the `excluded_room_ids` option excludes changes from the specified rooms. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + + before_room1_token = self.event_sources.get_current_token() + + room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok) + join_response1 = self.helper.join(room_id1, user1_id, tok=user1_tok) + join_pos1 = self.get_success( + self.store.get_position_for_event(join_response1["event_id"]) + ) + + room_id2 = self.helper.create_room_as(user2_id, tok=user2_tok) + join_response2 = self.helper.join(room_id2, user1_id, tok=user1_tok) + join_pos2 = self.get_success( + self.store.get_position_for_event(join_response2["event_id"]) + ) + + after_room1_token = self.event_sources.get_current_token() + + # First test the the room is returned without the `excluded_room_ids` option + membership_changes = self.get_success( + self.store.get_current_state_delta_membership_changes_for_user( + user1_id, + from_key=before_room1_token.room_key, + to_key=after_room1_token.room_key, + ) + ) + + # Let the whole diff show on failure + self.maxDiff = None + self.assertEqual( + membership_changes, + [ + CurrentStateDeltaMembership( + event_id=join_response1["event_id"], + event_pos=join_pos1, + prev_event_id=None, + room_id=room_id1, + membership="join", + sender=user1_id, + ), + CurrentStateDeltaMembership( + event_id=join_response2["event_id"], + event_pos=join_pos2, + prev_event_id=None, + room_id=room_id2, + membership="join", + sender=user1_id, + ), + ], + ) + + # The test that `excluded_room_ids` excludes room2 as expected + membership_changes = self.get_success( + self.store.get_current_state_delta_membership_changes_for_user( + user1_id, + from_key=before_room1_token.room_key, + to_key=after_room1_token.room_key, + excluded_room_ids=[room_id2], + ) + ) + + # Let the whole diff show on failure + self.maxDiff = None + self.assertEqual( + membership_changes, + [ + CurrentStateDeltaMembership( + event_id=join_response1["event_id"], + event_pos=join_pos1, + prev_event_id=None, + room_id=room_id1, + membership="join", + sender=user1_id, + ) + ], + ) + + +# class GetCurrentStateDeltaMembershipChangesForUserFederationTestCase(BaseMultiWorkerStreamTestCase): +# """ +# TODO +# """ + +# servlets = [ +# admin.register_servlets_for_client_rest_resource, +# room.register_servlets, +# login.register_servlets, +# ] + +# def default_config(self) -> dict: +# conf = super().default_config() +# conf["federation_custom_ca_list"] = [get_test_ca_cert_file()] +# return conf + +# def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None: +# self.sliding_sync_handler = self.hs.get_sliding_sync_handler() +# self.store = self.hs.get_datastores().main +# self.event_sources = hs.get_event_sources() + + +# def test_sharded_event_persisters(self) -> None: +# """ +# TODO +# """ +# user1_id = self.register_user("user1", "pass") +# user1_tok = self.login(user1_id, "pass") +# user2_id = self.register_user("user2", "pass") +# user2_tok = self.login(user2_id, "pass") + +# remote_hs = self.make_worker_hs("synapse.app.generic_worker") + +# channel = make_request( +# self.reactor, +# self._hs_to_site[hs], +# "GET", +# f"/_matrix/media/r0/download/{target}/{media_id}", +# shorthand=False, +# access_token=self.access_token, +# await_result=False, +# ) + +# remote_hs + +# worker_store2 = worker_hs2.get_datastores().main +# assert isinstance(worker_store2._stream_id_gen, MultiWriterIdGenerator) +# actx = worker_store2._stream_id_gen.get_next() + +# self.assertEqual( +# room_id_results.keys(), +# { +# room_id1, +# # room_id2 shouldn't show up because we left before the from/to range +# # and the join event during the range happened while worker2 was stuck. +# # This means that from the perspective of the master, where the +# # `stuck_activity_token` is generated, the stream position for worker2 +# # wasn't advanced to the join yet. Looking at the `instance_map`, the +# # join technically comes after `stuck_activity_token``. +# # +# # room_id2, +# room_id3, +# }, +# ) From ec2d8dc1e3c602dadb4fac289bcd38b211f6b34d Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 26 Jun 2024 18:52:24 -0500 Subject: [PATCH 54/84] Create events using helper --- tests/storage/test_stream.py | 76 +++++++++++++----------------------- 1 file changed, 28 insertions(+), 48 deletions(-) diff --git a/tests/storage/test_stream.py b/tests/storage/test_stream.py index 64f123987a..39cb5a25c5 100644 --- a/tests/storage/test_stream.py +++ b/tests/storage/test_stream.py @@ -37,6 +37,7 @@ from synapse.storage.databases.main.stream import CurrentStateDeltaMembership from synapse.types import JsonDict, PersistedEventPosition, RoomStreamToken from synapse.util import Clock +from tests.test_utils.event_injection import create_event from tests.unittest import HomeserverTestCase logger = logging.getLogger(__name__) @@ -809,56 +810,35 @@ class GetCurrentStateDeltaMembershipChangesForUserTestCase(HomeserverTestCase): # Persist the user1, user3, and user4 join events in the same batch so they all # end up in the `current_state_delta_stream` table with the same # stream_ordering. - join_event1 = make_event_from_dict( - { - "sender": user1_id, - "type": EventTypes.Member, - "state_key": user1_id, - "content": {"membership": "join"}, - "room_id": room_id1, - "depth": 0, - "origin_server_ts": 0, - "prev_events": [], - "auth_events": [], - }, - room_version=RoomVersions.V10, + join_event1, join_event_context1 = self.get_success( + create_event( + self.hs, + sender=user1_id, + type=EventTypes.Member, + state_key=user1_id, + content={"membership": "join"}, + room_id=room_id1, + ) ) - join_event_context1 = self.get_success( - self.state_handler.compute_event_context(join_event1) + join_event3, join_event_context3 = self.get_success( + create_event( + self.hs, + sender=user3_id, + type=EventTypes.Member, + state_key=user3_id, + content={"membership": "join"}, + room_id=room_id1, + ) ) - join_event3 = make_event_from_dict( - { - "sender": user3_id, - "type": EventTypes.Member, - "state_key": user3_id, - "content": {"membership": "join"}, - "room_id": room_id1, - "depth": 1, - "origin_server_ts": 1, - "prev_events": [], - "auth_events": [], - }, - room_version=RoomVersions.V10, - ) - join_event_context3 = self.get_success( - self.state_handler.compute_event_context(join_event3) - ) - join_event4 = make_event_from_dict( - { - "sender": user4_id, - "type": EventTypes.Member, - "state_key": user4_id, - "content": {"membership": "join"}, - "room_id": room_id1, - "depth": 2, - "origin_server_ts": 2, - "prev_events": [], - "auth_events": [], - }, - room_version=RoomVersions.V10, - ) - join_event_context4 = self.get_success( - self.state_handler.compute_event_context(join_event4) + join_event4, join_event_context4 = self.get_success( + create_event( + self.hs, + sender=user4_id, + type=EventTypes.Member, + state_key=user4_id, + content={"membership": "join"}, + room_id=room_id1, + ) ) self.get_success( self.persistence.persist_events( From 0b9a903ca12831e431b596daacf127e53ecbd050 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 26 Jun 2024 19:35:18 -0500 Subject: [PATCH 55/84] Add test that remotely joins room --- tests/storage/test_stream.py | 239 ++++++++++++++++++++++++++--------- 1 file changed, 178 insertions(+), 61 deletions(-) diff --git a/tests/storage/test_stream.py b/tests/storage/test_stream.py index 39cb5a25c5..3b825dbbbe 100644 --- a/tests/storage/test_stream.py +++ b/tests/storage/test_stream.py @@ -21,24 +21,32 @@ import logging from typing import List, Tuple +from unittest.mock import AsyncMock, patch from immutabledict import immutabledict from twisted.test.proto_helpers import MemoryReactor -from synapse.api.constants import Direction, EventTypes, RelationTypes +from synapse.api.constants import Direction, EventTypes, Membership, RelationTypes from synapse.api.filtering import Filter -from synapse.api.room_versions import RoomVersions -from synapse.events import make_event_from_dict +from synapse.crypto.event_signing import add_hashes_and_signatures +from synapse.events import FrozenEventV3 +from synapse.federation.federation_client import SendJoinResult from synapse.rest import admin from synapse.rest.client import login, room from synapse.server import HomeServer from synapse.storage.databases.main.stream import CurrentStateDeltaMembership -from synapse.types import JsonDict, PersistedEventPosition, RoomStreamToken +from synapse.types import ( + JsonDict, + PersistedEventPosition, + RoomStreamToken, + UserID, + create_requester, +) from synapse.util import Clock from tests.test_utils.event_injection import create_event -from tests.unittest import HomeserverTestCase +from tests.unittest import FederatingHomeserverTestCase, HomeserverTestCase logger = logging.getLogger(__name__) @@ -884,8 +892,6 @@ class GetCurrentStateDeltaMembershipChangesForUserTestCase(HomeserverTestCase): ], ) - # TODO: Test remote join where the first rows will just be the state when you joined - # TODO: Test state reset where the user gets removed from the room (when there is no # corresponding leave event) @@ -974,67 +980,178 @@ class GetCurrentStateDeltaMembershipChangesForUserTestCase(HomeserverTestCase): ) -# class GetCurrentStateDeltaMembershipChangesForUserFederationTestCase(BaseMultiWorkerStreamTestCase): -# """ -# TODO -# """ +class GetCurrentStateDeltaMembershipChangesForUserFederationTestCase( + FederatingHomeserverTestCase +): + """ + Test `get_current_state_delta_membership_changes_for_user(...)` when joining remote federated rooms. + """ -# servlets = [ -# admin.register_servlets_for_client_rest_resource, -# room.register_servlets, -# login.register_servlets, -# ] + servlets = [ + admin.register_servlets_for_client_rest_resource, + room.register_servlets, + login.register_servlets, + ] -# def default_config(self) -> dict: -# conf = super().default_config() -# conf["federation_custom_ca_list"] = [get_test_ca_cert_file()] -# return conf + def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None: + self.sliding_sync_handler = self.hs.get_sliding_sync_handler() + self.store = self.hs.get_datastores().main + self.event_sources = hs.get_event_sources() + self.room_member_handler = hs.get_room_member_handler() -# def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None: -# self.sliding_sync_handler = self.hs.get_sliding_sync_handler() -# self.store = self.hs.get_datastores().main -# self.event_sources = hs.get_event_sources() + def test_remote_join(self) -> None: + """ + Test remote join where the first rows will just be the state when you joined + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + intially_unjoined_room_id = f"!example:{self.OTHER_SERVER_NAME}" -# def test_sharded_event_persisters(self) -> None: -# """ -# TODO -# """ -# user1_id = self.register_user("user1", "pass") -# user1_tok = self.login(user1_id, "pass") -# user2_id = self.register_user("user2", "pass") -# user2_tok = self.login(user2_id, "pass") + # Remotely join a room on another homeserver. + # + # To do this we have to mock the responses from the remote homeserver. We also + # patch out a bunch of event checks on our end. + create_event_source = { + "auth_events": [], + "content": { + "creator": f"@creator:{self.OTHER_SERVER_NAME}", + "room_version": self.hs.config.server.default_room_version.identifier, + }, + "depth": 0, + "origin_server_ts": 0, + "prev_events": [], + "room_id": intially_unjoined_room_id, + "sender": f"@creator:{self.OTHER_SERVER_NAME}", + "state_key": "", + "type": EventTypes.Create, + } + self.add_hashes_and_signatures_from_other_server( + create_event_source, + self.hs.config.server.default_room_version, + ) + create_event = FrozenEventV3( + create_event_source, + self.hs.config.server.default_room_version, + {}, + None, + ) + creator_join_event_source = { + "auth_events": [create_event.event_id], + "content": { + "membership": "join", + }, + "depth": 1, + "origin_server_ts": 1, + "prev_events": [], + "room_id": intially_unjoined_room_id, + "sender": f"@creator:{self.OTHER_SERVER_NAME}", + "state_key": f"@creator:{self.OTHER_SERVER_NAME}", + "type": EventTypes.Member, + } + self.add_hashes_and_signatures_from_other_server( + creator_join_event_source, + self.hs.config.server.default_room_version, + ) + creator_join_event = FrozenEventV3( + creator_join_event_source, + self.hs.config.server.default_room_version, + {}, + None, + ) -# remote_hs = self.make_worker_hs("synapse.app.generic_worker") + # Our local user is going to remote join the room + join_event_source = { + "auth_events": [create_event.event_id], + "content": {"membership": "join"}, + "depth": 1, + "origin_server_ts": 100, + "prev_events": [creator_join_event.event_id], + "sender": user1_id, + "state_key": user1_id, + "room_id": intially_unjoined_room_id, + "type": EventTypes.Member, + } + add_hashes_and_signatures( + self.hs.config.server.default_room_version, + join_event_source, + self.hs.hostname, + self.hs.signing_key, + ) + join_event = FrozenEventV3( + join_event_source, + self.hs.config.server.default_room_version, + {}, + None, + ) -# channel = make_request( -# self.reactor, -# self._hs_to_site[hs], -# "GET", -# f"/_matrix/media/r0/download/{target}/{media_id}", -# shorthand=False, -# access_token=self.access_token, -# await_result=False, -# ) + mock_make_membership_event = AsyncMock( + return_value=( + self.OTHER_SERVER_NAME, + join_event, + self.hs.config.server.default_room_version, + ) + ) + mock_send_join = AsyncMock( + return_value=SendJoinResult( + join_event, + self.OTHER_SERVER_NAME, + state=[create_event, creator_join_event], + auth_chain=[create_event, creator_join_event], + partial_state=False, + servers_in_room=frozenset(), + ) + ) -# remote_hs + with patch.object( + self.room_member_handler.federation_handler.federation_client, + "make_membership_event", + mock_make_membership_event, + ), patch.object( + self.room_member_handler.federation_handler.federation_client, + "send_join", + mock_send_join, + ), patch( + "synapse.event_auth._is_membership_change_allowed", + return_value=None, + ), patch( + "synapse.handlers.federation_event.check_state_dependent_auth_rules", + return_value=None, + ): + self.get_success( + self.room_member_handler.update_membership( + requester=create_requester(user1_id), + target=UserID.from_string(user1_id), + room_id=intially_unjoined_room_id, + action=Membership.JOIN, + remote_room_hosts=[self.OTHER_SERVER_NAME], + ) + ) -# worker_store2 = worker_hs2.get_datastores().main -# assert isinstance(worker_store2._stream_id_gen, MultiWriterIdGenerator) -# actx = worker_store2._stream_id_gen.get_next() + events_db_dump = self.get_success( + self.store.db_pool.simple_select_list( + table="events", + keyvalues={}, + retcols=[ + "*", + ], + desc="debug dump events", + ) + ) -# self.assertEqual( -# room_id_results.keys(), -# { -# room_id1, -# # room_id2 shouldn't show up because we left before the from/to range -# # and the join event during the range happened while worker2 was stuck. -# # This means that from the perspective of the master, where the -# # `stuck_activity_token` is generated, the stream position for worker2 -# # wasn't advanced to the join yet. Looking at the `instance_map`, the -# # join technically comes after `stuck_activity_token``. -# # -# # room_id2, -# room_id3, -# }, -# ) + logger.info("events_db_dump: %s", events_db_dump) + + current_state_delta_stream_db_dump = self.get_success( + self.store.db_pool.simple_select_list( + table="current_state_delta_stream", + keyvalues={}, + retcols=[ + "*", + ], + desc="debug dump current_state_delta_stream", + ) + ) + + logger.info( + "current_state_delta_stream_db_dump: %s", current_state_delta_stream_db_dump + ) From 48d0acfbcda30f956d79cef873fa762f88530341 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 26 Jun 2024 19:52:12 -0500 Subject: [PATCH 56/84] Actually test `get_current_state_delta_membership_changes_for_user(...)` in remote join test --- tests/storage/test_stream.py | 59 ++++++++++++++++++++++-------------- 1 file changed, 36 insertions(+), 23 deletions(-) diff --git a/tests/storage/test_stream.py b/tests/storage/test_stream.py index 3b825dbbbe..dfca17db64 100644 --- a/tests/storage/test_stream.py +++ b/tests/storage/test_stream.py @@ -1001,10 +1001,13 @@ class GetCurrentStateDeltaMembershipChangesForUserFederationTestCase( def test_remote_join(self) -> None: """ - Test remote join where the first rows will just be the state when you joined + Test remote join where the first rows in `current_state_delta_stream` will just + be the state when you joined the remote room. """ user1_id = self.register_user("user1", "pass") - user1_tok = self.login(user1_id, "pass") + _user1_tok = self.login(user1_id, "pass") + + before_join_token = self.event_sources.get_current_token() intially_unjoined_room_id = f"!example:{self.OTHER_SERVER_NAME}" @@ -1128,30 +1131,40 @@ class GetCurrentStateDeltaMembershipChangesForUserFederationTestCase( ) ) - events_db_dump = self.get_success( - self.store.db_pool.simple_select_list( - table="events", - keyvalues={}, - retcols=[ - "*", - ], - desc="debug dump events", + after_join_token = self.event_sources.get_current_token() + + # Get the membership changes for the user at this point, the + # `current_state_delta_stream` table should look like: + # + # | stream_id | room_id | type | state_key | event_id | prev_event_id | instance_name | + # |-----------|------------------------------|-----------------|------------------------------|----------|---------------|----------------| + # | 2 | '!example:other.example.com' | 'm.room.member' | '@user1:test' | '$xxx' | None | 'master' | + # | 2 | '!example:other.example.com' | 'm.room.create' | '' | '$xxx' | None | 'master' | + # | 2 | '!example:other.example.com' | 'm.room.member' | '@creator:other.example.com' | '$xxx' | None | 'master' | + membership_changes = self.get_success( + self.store.get_current_state_delta_membership_changes_for_user( + user1_id, + from_key=before_join_token.room_key, + to_key=after_join_token.room_key, ) ) - logger.info("events_db_dump: %s", events_db_dump) - - current_state_delta_stream_db_dump = self.get_success( - self.store.db_pool.simple_select_list( - table="current_state_delta_stream", - keyvalues={}, - retcols=[ - "*", - ], - desc="debug dump current_state_delta_stream", - ) + join_pos = self.get_success( + self.store.get_position_for_event(join_event.event_id) ) - logger.info( - "current_state_delta_stream_db_dump: %s", current_state_delta_stream_db_dump + # Let the whole diff show on failure + self.maxDiff = None + self.assertEqual( + membership_changes, + [ + CurrentStateDeltaMembership( + event_id=join_event.event_id, + event_pos=join_pos, + prev_event_id=None, + room_id=intially_unjoined_room_id, + membership="join", + sender=user1_id, + ), + ], ) From 2a944ffcef16744ade6b0172fcb98c7eeb281766 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 26 Jun 2024 20:20:17 -0500 Subject: [PATCH 57/84] Add state of the db in each situation --- tests/storage/test_stream.py | 68 ++++++++++++++++++++++++++++++++++-- 1 file changed, 66 insertions(+), 2 deletions(-) diff --git a/tests/storage/test_stream.py b/tests/storage/test_stream.py index dfca17db64..2ac88f18ea 100644 --- a/tests/storage/test_stream.py +++ b/tests/storage/test_stream.py @@ -673,6 +673,29 @@ class GetCurrentStateDeltaMembershipChangesForUserTestCase(HomeserverTestCase): after_room1_token = self.event_sources.get_current_token() + # Get the membership changes for the user. + # + # At this point, the `current_state_delta_stream` table should look like the + # following. When the server leaves a room, it will insert new rows with + # `event_id = null` for all current state. + # + # | stream_id | room_id | type | state_key | event_id | prev_event_id | instance_name | + # |-----------|----------|-----------------------------|----------------|----------|---------------|---------------| + # | 2 | !x:test | 'm.room.create' | '' | $xxx | None | 'master' | + # | 3 | !x:test | 'm.room.member' | '@user2:test' | $aaa | None | 'master' | + # | 4 | !x:test | 'm.room.history_visibility' | '' | $xxx | None | 'master' | + # | 4 | !x:test | 'm.room.join_rules' | '' | $xxx | None | 'master' | + # | 4 | !x:test | 'm.room.power_levels' | '' | $xxx | None | 'master' | + # | 7 | !x:test | 'm.room.member' | '@user1:test' | $ooo | None | 'master' | + # | 8 | !x:test | 'foobarbazdummy' | '@user1:test' | $xxx | None | 'master' | + # | 9 | !x:test | 'm.room.member' | '@user1:test' | $ppp | $ooo | 'master' | + # | 10 | !x:test | 'foobarbazdummy' | '@user1:test' | None | $xxx | 'master' | + # | 10 | !x:test | 'm.room.create' | '' | None | $xxx | 'master' | + # | 10 | !x:test | 'm.room.history_visibility' | '' | None | $xxx | 'master' | + # | 10 | !x:test | 'm.room.join_rules' | '' | None | $xxx | 'master' | + # | 10 | !x:test | 'm.room.member' | '@user1:test' | None | $ppp | 'master' | + # | 10 | !x:test | 'm.room.member' | '@user2:test' | None | $aaa | 'master' | + # | 10 | !x:test | 'm.room.power_levels' | | None | $xxx | 'master' | membership_changes = self.get_success( self.store.get_current_state_delta_membership_changes_for_user( user1_id, @@ -761,6 +784,29 @@ class GetCurrentStateDeltaMembershipChangesForUserTestCase(HomeserverTestCase): after_room1_token = self.event_sources.get_current_token() + # Get the membership changes for the user. + # + # At this point, the `current_state_delta_stream` table should look like the + # following. When the server leaves a room, it will insert new rows with + # `event_id = null` for all current state. + # + # | stream_id | room_id | type | state_key | event_id | prev_event_id | instance_name | + # |-----------|-----------|-----------------------------|---------------|----------|---------------|---------------| + # | 2 | '!x:test' | 'm.room.create' | '' | '$xxx' | None | 'master' | + # | 3 | '!x:test' | 'm.room.member' | '@user2:test' | '$aaa' | None | 'master' | + # | 4 | '!x:test' | 'm.room.history_visibility' | '' | '$xxx' | None | 'master' | + # | 4 | '!x:test' | 'm.room.join_rules' | '' | '$xxx' | None | 'master' | + # | 4 | '!x:test' | 'm.room.power_levels' | '' | '$xxx' | None | 'master' | + # | 7 | '!x:test' | 'm.room.member' | '@user1:test' | '$ooo' | None | 'master' | + # | 8 | '!x:test' | 'foobarbazdummy' | '@user1:test' | '$xxx' | None | 'master' | + # | 9 | '!x:test' | 'm.room.member' | '@user2:test' | '$bbb' | '$aaa' | 'master' | + # | 10 | '!x:test' | 'foobarbazdummy' | '@user1:test' | None | '$xxx' | 'master' | + # | 10 | '!x:test' | 'm.room.create' | '' | None | '$xxx' | 'master' | + # | 10 | '!x:test' | 'm.room.history_visibility' | '' | None | '$xxx' | 'master' | + # | 10 | '!x:test' | 'm.room.join_rules' | '' | None | '$xxx' | 'master' | + # | 10 | '!x:test' | 'm.room.member' | '@user1:test' | None | '$ooo' | 'master' | + # | 10 | '!x:test' | 'm.room.member' | '@user2:test' | None | '$bbb' | 'master' | + # | 10 | '!x:test' | 'm.room.power_levels' | '' | None | '$xxx' | 'master' | membership_changes = self.get_success( self.store.get_current_state_delta_membership_changes_for_user( user1_id, @@ -864,6 +910,21 @@ class GetCurrentStateDeltaMembershipChangesForUserTestCase(HomeserverTestCase): # middle of the batch. This way, if rows in` current_state_delta_stream` are # stored with the first or last event's `stream_ordering`, we will still catch # bugs. + # + # At this point, the `current_state_delta_stream` table should look like (notice + # those three memberships at the end with `stream_id=7` because we persisted + # them in the same batch): + # + # | stream_id | room_id | type | state_key | event_id | prev_event_id | instance_name | + # |-----------|-----------|----------------------------|------------------|----------|---------------|---------------| + # | 2 | '!x:test' | 'm.room.create' | '' | '$xxx' | None | 'master' | + # | 3 | '!x:test' | 'm.room.member' | '@user2:test' | '$xxx' | None | 'master' | + # | 4 | '!x:test' | 'm.room.history_visibility'| '' | '$xxx' | None | 'master' | + # | 4 | '!x:test' | 'm.room.join_rules' | '' | '$xxx' | None | 'master' | + # | 4 | '!x:test' | 'm.room.power_levels' | '' | '$xxx' | None | 'master' | + # | 7 | '!x:test' | 'm.room.member' | '@user3:test' | '$xxx' | None | 'master' | + # | 7 | '!x:test' | 'm.room.member' | '@user1:test' | '$xxx' | None | 'master' | + # | 7 | '!x:test' | 'm.room.member' | '@user4:test' | '$xxx' | None | 'master' | membership_changes = self.get_success( self.store.get_current_state_delta_membership_changes_for_user( user3_id, @@ -1133,8 +1194,11 @@ class GetCurrentStateDeltaMembershipChangesForUserFederationTestCase( after_join_token = self.event_sources.get_current_token() - # Get the membership changes for the user at this point, the - # `current_state_delta_stream` table should look like: + # Get the membership changes for the user. + # + # At this point, the `current_state_delta_stream` table should look like the + # following. Notice that all of the events are at the same `stream_id` because + # the current state starts out where we remotely joined: # # | stream_id | room_id | type | state_key | event_id | prev_event_id | instance_name | # |-----------|------------------------------|-----------------|------------------------------|----------|---------------|----------------| From 8df39d1baff8cac6aa446c8b71b3a64a8bf29a1e Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 26 Jun 2024 20:22:03 -0500 Subject: [PATCH 58/84] Remove redundant `instance_name` column --- tests/storage/test_stream.py | 98 ++++++++++++++++++------------------ 1 file changed, 49 insertions(+), 49 deletions(-) diff --git a/tests/storage/test_stream.py b/tests/storage/test_stream.py index 2ac88f18ea..840f980344 100644 --- a/tests/storage/test_stream.py +++ b/tests/storage/test_stream.py @@ -679,23 +679,23 @@ class GetCurrentStateDeltaMembershipChangesForUserTestCase(HomeserverTestCase): # following. When the server leaves a room, it will insert new rows with # `event_id = null` for all current state. # - # | stream_id | room_id | type | state_key | event_id | prev_event_id | instance_name | - # |-----------|----------|-----------------------------|----------------|----------|---------------|---------------| - # | 2 | !x:test | 'm.room.create' | '' | $xxx | None | 'master' | - # | 3 | !x:test | 'm.room.member' | '@user2:test' | $aaa | None | 'master' | - # | 4 | !x:test | 'm.room.history_visibility' | '' | $xxx | None | 'master' | - # | 4 | !x:test | 'm.room.join_rules' | '' | $xxx | None | 'master' | - # | 4 | !x:test | 'm.room.power_levels' | '' | $xxx | None | 'master' | - # | 7 | !x:test | 'm.room.member' | '@user1:test' | $ooo | None | 'master' | - # | 8 | !x:test | 'foobarbazdummy' | '@user1:test' | $xxx | None | 'master' | - # | 9 | !x:test | 'm.room.member' | '@user1:test' | $ppp | $ooo | 'master' | - # | 10 | !x:test | 'foobarbazdummy' | '@user1:test' | None | $xxx | 'master' | - # | 10 | !x:test | 'm.room.create' | '' | None | $xxx | 'master' | - # | 10 | !x:test | 'm.room.history_visibility' | '' | None | $xxx | 'master' | - # | 10 | !x:test | 'm.room.join_rules' | '' | None | $xxx | 'master' | - # | 10 | !x:test | 'm.room.member' | '@user1:test' | None | $ppp | 'master' | - # | 10 | !x:test | 'm.room.member' | '@user2:test' | None | $aaa | 'master' | - # | 10 | !x:test | 'm.room.power_levels' | | None | $xxx | 'master' | + # | stream_id | room_id | type | state_key | event_id | prev_event_id | + # |-----------|----------|-----------------------------|----------------|----------|---------------| + # | 2 | !x:test | 'm.room.create' | '' | $xxx | None | + # | 3 | !x:test | 'm.room.member' | '@user2:test' | $aaa | None | + # | 4 | !x:test | 'm.room.history_visibility' | '' | $xxx | None | + # | 4 | !x:test | 'm.room.join_rules' | '' | $xxx | None | + # | 4 | !x:test | 'm.room.power_levels' | '' | $xxx | None | + # | 7 | !x:test | 'm.room.member' | '@user1:test' | $ooo | None | + # | 8 | !x:test | 'foobarbazdummy' | '@user1:test' | $xxx | None | + # | 9 | !x:test | 'm.room.member' | '@user1:test' | $ppp | $ooo | + # | 10 | !x:test | 'foobarbazdummy' | '@user1:test' | None | $xxx | + # | 10 | !x:test | 'm.room.create' | '' | None | $xxx | + # | 10 | !x:test | 'm.room.history_visibility' | '' | None | $xxx | + # | 10 | !x:test | 'm.room.join_rules' | '' | None | $xxx | + # | 10 | !x:test | 'm.room.member' | '@user1:test' | None | $ppp | + # | 10 | !x:test | 'm.room.member' | '@user2:test' | None | $aaa | + # | 10 | !x:test | 'm.room.power_levels' | | None | $xxx | membership_changes = self.get_success( self.store.get_current_state_delta_membership_changes_for_user( user1_id, @@ -790,23 +790,23 @@ class GetCurrentStateDeltaMembershipChangesForUserTestCase(HomeserverTestCase): # following. When the server leaves a room, it will insert new rows with # `event_id = null` for all current state. # - # | stream_id | room_id | type | state_key | event_id | prev_event_id | instance_name | - # |-----------|-----------|-----------------------------|---------------|----------|---------------|---------------| - # | 2 | '!x:test' | 'm.room.create' | '' | '$xxx' | None | 'master' | - # | 3 | '!x:test' | 'm.room.member' | '@user2:test' | '$aaa' | None | 'master' | - # | 4 | '!x:test' | 'm.room.history_visibility' | '' | '$xxx' | None | 'master' | - # | 4 | '!x:test' | 'm.room.join_rules' | '' | '$xxx' | None | 'master' | - # | 4 | '!x:test' | 'm.room.power_levels' | '' | '$xxx' | None | 'master' | - # | 7 | '!x:test' | 'm.room.member' | '@user1:test' | '$ooo' | None | 'master' | - # | 8 | '!x:test' | 'foobarbazdummy' | '@user1:test' | '$xxx' | None | 'master' | - # | 9 | '!x:test' | 'm.room.member' | '@user2:test' | '$bbb' | '$aaa' | 'master' | - # | 10 | '!x:test' | 'foobarbazdummy' | '@user1:test' | None | '$xxx' | 'master' | - # | 10 | '!x:test' | 'm.room.create' | '' | None | '$xxx' | 'master' | - # | 10 | '!x:test' | 'm.room.history_visibility' | '' | None | '$xxx' | 'master' | - # | 10 | '!x:test' | 'm.room.join_rules' | '' | None | '$xxx' | 'master' | - # | 10 | '!x:test' | 'm.room.member' | '@user1:test' | None | '$ooo' | 'master' | - # | 10 | '!x:test' | 'm.room.member' | '@user2:test' | None | '$bbb' | 'master' | - # | 10 | '!x:test' | 'm.room.power_levels' | '' | None | '$xxx' | 'master' | + # | stream_id | room_id | type | state_key | event_id | prev_event_id | + # |-----------|-----------|-----------------------------|---------------|----------|---------------| + # | 2 | '!x:test' | 'm.room.create' | '' | '$xxx' | None | + # | 3 | '!x:test' | 'm.room.member' | '@user2:test' | '$aaa' | None | + # | 4 | '!x:test' | 'm.room.history_visibility' | '' | '$xxx' | None | + # | 4 | '!x:test' | 'm.room.join_rules' | '' | '$xxx' | None | + # | 4 | '!x:test' | 'm.room.power_levels' | '' | '$xxx' | None | + # | 7 | '!x:test' | 'm.room.member' | '@user1:test' | '$ooo' | None | + # | 8 | '!x:test' | 'foobarbazdummy' | '@user1:test' | '$xxx' | None | + # | 9 | '!x:test' | 'm.room.member' | '@user2:test' | '$bbb' | '$aaa' | + # | 10 | '!x:test' | 'foobarbazdummy' | '@user1:test' | None | '$xxx' | + # | 10 | '!x:test' | 'm.room.create' | '' | None | '$xxx' | + # | 10 | '!x:test' | 'm.room.history_visibility' | '' | None | '$xxx' | + # | 10 | '!x:test' | 'm.room.join_rules' | '' | None | '$xxx' | + # | 10 | '!x:test' | 'm.room.member' | '@user1:test' | None | '$ooo' | + # | 10 | '!x:test' | 'm.room.member' | '@user2:test' | None | '$bbb' | + # | 10 | '!x:test' | 'm.room.power_levels' | '' | None | '$xxx' | membership_changes = self.get_success( self.store.get_current_state_delta_membership_changes_for_user( user1_id, @@ -915,16 +915,16 @@ class GetCurrentStateDeltaMembershipChangesForUserTestCase(HomeserverTestCase): # those three memberships at the end with `stream_id=7` because we persisted # them in the same batch): # - # | stream_id | room_id | type | state_key | event_id | prev_event_id | instance_name | - # |-----------|-----------|----------------------------|------------------|----------|---------------|---------------| - # | 2 | '!x:test' | 'm.room.create' | '' | '$xxx' | None | 'master' | - # | 3 | '!x:test' | 'm.room.member' | '@user2:test' | '$xxx' | None | 'master' | - # | 4 | '!x:test' | 'm.room.history_visibility'| '' | '$xxx' | None | 'master' | - # | 4 | '!x:test' | 'm.room.join_rules' | '' | '$xxx' | None | 'master' | - # | 4 | '!x:test' | 'm.room.power_levels' | '' | '$xxx' | None | 'master' | - # | 7 | '!x:test' | 'm.room.member' | '@user3:test' | '$xxx' | None | 'master' | - # | 7 | '!x:test' | 'm.room.member' | '@user1:test' | '$xxx' | None | 'master' | - # | 7 | '!x:test' | 'm.room.member' | '@user4:test' | '$xxx' | None | 'master' | + # | stream_id | room_id | type | state_key | event_id | prev_event_id | + # |-----------|-----------|----------------------------|------------------|----------|---------------| + # | 2 | '!x:test' | 'm.room.create' | '' | '$xxx' | None | + # | 3 | '!x:test' | 'm.room.member' | '@user2:test' | '$xxx' | None | + # | 4 | '!x:test' | 'm.room.history_visibility'| '' | '$xxx' | None | + # | 4 | '!x:test' | 'm.room.join_rules' | '' | '$xxx' | None | + # | 4 | '!x:test' | 'm.room.power_levels' | '' | '$xxx' | None | + # | 7 | '!x:test' | 'm.room.member' | '@user3:test' | '$xxx' | None | + # | 7 | '!x:test' | 'm.room.member' | '@user1:test' | '$xxx' | None | + # | 7 | '!x:test' | 'm.room.member' | '@user4:test' | '$xxx' | None | membership_changes = self.get_success( self.store.get_current_state_delta_membership_changes_for_user( user3_id, @@ -1200,11 +1200,11 @@ class GetCurrentStateDeltaMembershipChangesForUserFederationTestCase( # following. Notice that all of the events are at the same `stream_id` because # the current state starts out where we remotely joined: # - # | stream_id | room_id | type | state_key | event_id | prev_event_id | instance_name | - # |-----------|------------------------------|-----------------|------------------------------|----------|---------------|----------------| - # | 2 | '!example:other.example.com' | 'm.room.member' | '@user1:test' | '$xxx' | None | 'master' | - # | 2 | '!example:other.example.com' | 'm.room.create' | '' | '$xxx' | None | 'master' | - # | 2 | '!example:other.example.com' | 'm.room.member' | '@creator:other.example.com' | '$xxx' | None | 'master' | + # | stream_id | room_id | type | state_key | event_id | prev_event_id | + # |-----------|------------------------------|-----------------|------------------------------|----------|---------------| + # | 2 | '!example:other.example.com' | 'm.room.member' | '@user1:test' | '$xxx' | None | + # | 2 | '!example:other.example.com' | 'm.room.create' | '' | '$xxx' | None | + # | 2 | '!example:other.example.com' | 'm.room.member' | '@creator:other.example.com' | '$xxx' | None | membership_changes = self.get_success( self.store.get_current_state_delta_membership_changes_for_user( user1_id, From b7914e76769ea330cdfa99e18fd7695f8301b02b Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 26 Jun 2024 21:01:54 -0500 Subject: [PATCH 59/84] Add skipped test for state resets --- tests/storage/test_stream.py | 92 ++++++++++++++++++++++++++++++++++-- 1 file changed, 88 insertions(+), 4 deletions(-) diff --git a/tests/storage/test_stream.py b/tests/storage/test_stream.py index 840f980344..04a0e24154 100644 --- a/tests/storage/test_stream.py +++ b/tests/storage/test_stream.py @@ -46,7 +46,7 @@ from synapse.types import ( from synapse.util import Clock from tests.test_utils.event_injection import create_event -from tests.unittest import FederatingHomeserverTestCase, HomeserverTestCase +from tests.unittest import FederatingHomeserverTestCase, HomeserverTestCase, skip_unless logger = logging.getLogger(__name__) @@ -839,6 +839,7 @@ class GetCurrentStateDeltaMembershipChangesForUserTestCase(HomeserverTestCase): ], ) + @skip_unless(False, "We don't support this yet") def test_membership_persisted_in_same_batch(self) -> None: """ Test batch of membership events being processed at once. This will result in all @@ -948,13 +949,96 @@ class GetCurrentStateDeltaMembershipChangesForUserTestCase(HomeserverTestCase): prev_event_id=None, room_id=room_id1, membership="join", - sender=user1_id, + sender=user3_id, ), ], ) - # TODO: Test state reset where the user gets removed from the room (when there is no - # corresponding leave event) + @skip_unless(False, "We don't support this yet") + def test_state_reset(self) -> None: + """ + Test a state reset scenario where the user gets removed from the room (when + there is no corresponding leave event) + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + + room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok) + join_response1 = self.helper.join(room_id1, user1_id, tok=user1_tok) + join_pos1 = self.get_success( + self.store.get_position_for_event(join_response1["event_id"]) + ) + + before_reset_token = self.event_sources.get_current_token() + + # Send another state event which we will cause the reset at + dummy_state_response = self.helper.send_state( + room_id1, + event_type="foobarbaz", + state_key="", + body={"foo": "bar"}, + tok=user2_tok, + ) + dummy_state_pos = self.get_success( + self.store.get_position_for_event(dummy_state_response["event_id"]) + ) + + # Mock a state reset removing the membership for user1 in the current state + self.get_success( + self.store.db_pool.simple_delete( + table="current_state_events", + keyvalues={ + "room_id": room_id1, + "type": EventTypes.Member, + "state_key": user1_id, + }, + desc="state reset user in current_state_delta_stream", + ) + ) + self.get_success( + self.store.db_pool.simple_insert( + table="current_state_delta_stream", + values={ + "stream_id": dummy_state_pos.stream, + "room_id": room_id1, + "type": EventTypes.Member, + "state_key": user1_id, + "event_id": None, + # FIXME: I'm not sure if a state reset should have a prev_event_id + "prev_event_id": None, + "instance_name": dummy_state_pos.instance_name, + }, + desc="state reset user in current_state_delta_stream", + ) + ) + + after_reset_token = self.event_sources.get_current_token() + + membership_changes = self.get_success( + self.store.get_current_state_delta_membership_changes_for_user( + user1_id, + from_key=before_reset_token.room_key, + to_key=after_reset_token.room_key, + ) + ) + + # Let the whole diff show on failure + self.maxDiff = None + self.assertEqual( + membership_changes, + [ + CurrentStateDeltaMembership( + event_id=TODO, + event_pos=TODO, + prev_event_id=None, + room_id=room_id1, + membership="leave", + sender=user1_id, + ), + ], + ) def test_excluded_room_ids(self) -> None: """ From 7eb1806ee3279f6581996b029f80251f8aaf3d69 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 26 Jun 2024 21:06:05 -0500 Subject: [PATCH 60/84] Fix lints --- tests/storage/test_stream.py | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/tests/storage/test_stream.py b/tests/storage/test_stream.py index 04a0e24154..5b30d7106f 100644 --- a/tests/storage/test_stream.py +++ b/tests/storage/test_stream.py @@ -966,10 +966,7 @@ class GetCurrentStateDeltaMembershipChangesForUserTestCase(HomeserverTestCase): user2_tok = self.login(user2_id, "pass") room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok) - join_response1 = self.helper.join(room_id1, user1_id, tok=user1_tok) - join_pos1 = self.get_success( - self.store.get_position_for_event(join_response1["event_id"]) - ) + self.helper.join(room_id1, user1_id, tok=user1_tok) before_reset_token = self.event_sources.get_current_token() @@ -1028,16 +1025,19 @@ class GetCurrentStateDeltaMembershipChangesForUserTestCase(HomeserverTestCase): self.maxDiff = None self.assertEqual( membership_changes, - [ - CurrentStateDeltaMembership( - event_id=TODO, - event_pos=TODO, - prev_event_id=None, - room_id=room_id1, - membership="leave", - sender=user1_id, - ), - ], + # TODO: Uncomment the expected membership. We just have a `False` value + # here so the test expectation fails and you look here. + False, + # [ + # CurrentStateDeltaMembership( + # event_id=TODO, + # event_pos=TODO, + # prev_event_id=None, + # room_id=room_id1, + # membership="leave", + # sender=user1_id, + # ), + # ], ) def test_excluded_room_ids(self) -> None: From 935b98c474f030f92bdd28cd69fcf20f3d6045fd Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 27 Jun 2024 00:48:17 -0500 Subject: [PATCH 61/84] All `get_current_state_delta_membership_changes_for_user(...)` tests passing --- synapse/storage/databases/main/stream.py | 80 ++++++++++++++++-------- tests/storage/test_stream.py | 39 ++++++------ 2 files changed, 75 insertions(+), 44 deletions(-) diff --git a/synapse/storage/databases/main/stream.py b/synapse/storage/databases/main/stream.py index e222f36bab..9ae1fe6c15 100644 --- a/synapse/storage/databases/main/stream.py +++ b/synapse/storage/databases/main/stream.py @@ -63,7 +63,7 @@ from typing_extensions import Literal from twisted.internet import defer -from synapse.api.constants import Direction, EventTypes +from synapse.api.constants import Direction, EventTypes, Membership from synapse.api.filtering import Filter from synapse.events import EventBase from synapse.logging.context import make_deferred_yieldable, run_in_background @@ -125,12 +125,12 @@ class CurrentStateDeltaMembership: sender: The person who sent the membership event """ - event_id: str + event_id: Optional[str] event_pos: PersistedEventPosition prev_event_id: Optional[str] room_id: str membership: str - sender: str + sender: Optional[str] def generate_pagination_where_clause( @@ -819,22 +819,32 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): # longer in the room or a state reset happened and it was unset. # `stream_ordering` is unique across the Synapse instance so this should # work fine. + # + # We `COALESCE` the `instance_name` and `stream_ordering` because we prefer + # the source of truth from the events table. This gives slightly more + # accurate results when available since `current_state_delta_stream` only + # tracks that the current state is at this stream position (not what stream + # position the state event was added) and batches events at the same + # `stream_id` in certain cases. + # + # TODO: We need to add indexes for `current_state_delta_stream.event_id` and + # `current_state_delta_stream.state_key`/`current_state_delta_stream.type` + # for this to be efficient. sql = """ SELECT e.event_id, s.prev_event_id, s.room_id, - e.instance_name, - e.stream_ordering, + COALESCE(e.instance_name, s.instance_name), + COALESCE(e.stream_ordering, s.stream_id), e.topological_ordering, m.membership, e.sender FROM current_state_delta_stream AS s - INNER JOIN events AS e ON e.stream_ordering = s.stream_id - INNER JOIN room_memberships AS m ON m.event_stream_ordering = s.stream_id + LEFT JOIN events AS e ON e.event_id = s.event_id + LEFT JOIN room_memberships AS m ON m.event_id = s.event_id WHERE s.stream_id > ? AND s.stream_id <= ? - AND m.user_id = ? - AND s.state_key = m.user_id + AND s.state_key = ? AND s.type = ? ORDER BY s.stream_id ASC """ @@ -842,6 +852,7 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): txn.execute(sql, args) membership_changes: List[CurrentStateDeltaMembership] = [] + membership_change_map: Dict[str, CurrentStateDeltaMembership] = {} for ( event_id, prev_event_id, @@ -852,36 +863,55 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): membership, sender, ) in txn: - assert event_id is not None - # `prev_event_id` can be `None` assert room_id is not None assert instance_name is not None assert stream_ordering is not None - assert topological_ordering is not None - assert membership is not None - assert sender is not None if _filter_results( from_key, to_key, instance_name, + # TODO: This isn't always filled now topological_ordering, stream_ordering, ): - membership_changes.append( - CurrentStateDeltaMembership( - event_id=event_id, - event_pos=PersistedEventPosition( - instance_name=instance_name, - stream=stream_ordering, - ), - prev_event_id=prev_event_id, - room_id=room_id, - membership=membership, - sender=sender, + # When the server leaves a room, it will insert new rows with + # `event_id = null` for all current state. This means we might + # already have a row for the leave event and then another for the + # same leave where the `event_id=null` but the `prev_event_id` is + # pointing back at the earlier leave event. Since we're assuming the + # `event_id = null` row is a `leave` and we don't want duplicate + # membership changes in our results, let's get rid of those + # (deduplicate) (see `test_server_left_after_us_room`). + if event_id is None: + already_tracked_membership_change = membership_change_map.get( + prev_event_id ) + if ( + already_tracked_membership_change is not None + and already_tracked_membership_change.membership + == Membership.LEAVE + ): + continue + + membership_change = CurrentStateDeltaMembership( + event_id=event_id, + event_pos=PersistedEventPosition( + instance_name=instance_name, + stream=stream_ordering, + ), + prev_event_id=prev_event_id, + room_id=room_id, + membership=( + membership if membership is not None else Membership.LEAVE + ), + sender=sender, ) + membership_changes.append(membership_change) + if event_id: + membership_change_map[event_id] = membership_change + return membership_changes membership_changes = await self.db_pool.runInteraction( diff --git a/tests/storage/test_stream.py b/tests/storage/test_stream.py index 5b30d7106f..ffa763bff2 100644 --- a/tests/storage/test_stream.py +++ b/tests/storage/test_stream.py @@ -46,7 +46,7 @@ from synapse.types import ( from synapse.util import Clock from tests.test_utils.event_injection import create_event -from tests.unittest import FederatingHomeserverTestCase, HomeserverTestCase, skip_unless +from tests.unittest import FederatingHomeserverTestCase, HomeserverTestCase logger = logging.getLogger(__name__) @@ -829,17 +829,16 @@ class GetCurrentStateDeltaMembershipChangesForUserTestCase(HomeserverTestCase): sender=user1_id, ), CurrentStateDeltaMembership( - event_id=leave_response1["event_id"], + event_id=None, # leave_response1["event_id"], event_pos=leave_pos1, prev_event_id=join_response1["event_id"], room_id=room_id1, membership="leave", - sender=user1_id, + sender=None, # user1_id, ), ], ) - @skip_unless(False, "We don't support this yet") def test_membership_persisted_in_same_batch(self) -> None: """ Test batch of membership events being processed at once. This will result in all @@ -954,7 +953,6 @@ class GetCurrentStateDeltaMembershipChangesForUserTestCase(HomeserverTestCase): ], ) - @skip_unless(False, "We don't support this yet") def test_state_reset(self) -> None: """ Test a state reset scenario where the user gets removed from the room (when @@ -970,7 +968,7 @@ class GetCurrentStateDeltaMembershipChangesForUserTestCase(HomeserverTestCase): before_reset_token = self.event_sources.get_current_token() - # Send another state event which we will cause the reset at + # Send another state event to make a position for the state reset to happen at dummy_state_response = self.helper.send_state( room_id1, event_type="foobarbaz", @@ -1011,6 +1009,12 @@ class GetCurrentStateDeltaMembershipChangesForUserTestCase(HomeserverTestCase): ) ) + # Manually bust the cache since we we're just manually messing with the database + # and not causing an actual state reset. + self.store._membership_stream_cache.entity_has_changed( + user1_id, dummy_state_pos.stream + ) + after_reset_token = self.event_sources.get_current_token() membership_changes = self.get_success( @@ -1025,19 +1029,16 @@ class GetCurrentStateDeltaMembershipChangesForUserTestCase(HomeserverTestCase): self.maxDiff = None self.assertEqual( membership_changes, - # TODO: Uncomment the expected membership. We just have a `False` value - # here so the test expectation fails and you look here. - False, - # [ - # CurrentStateDeltaMembership( - # event_id=TODO, - # event_pos=TODO, - # prev_event_id=None, - # room_id=room_id1, - # membership="leave", - # sender=user1_id, - # ), - # ], + [ + CurrentStateDeltaMembership( + event_id=None, + event_pos=dummy_state_pos, + prev_event_id=None, + room_id=room_id1, + membership="leave", + sender=None, # user1_id, + ), + ], ) def test_excluded_room_ids(self) -> None: From f163fcf08a435ea96de334b1f88bd99a0ccbcc25 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 27 Jun 2024 01:20:42 -0500 Subject: [PATCH 62/84] Remove need for topological_ordering --- synapse/storage/databases/main/stream.py | 45 +++++++++++++++++++++--- 1 file changed, 40 insertions(+), 5 deletions(-) diff --git a/synapse/storage/databases/main/stream.py b/synapse/storage/databases/main/stream.py index 9ae1fe6c15..9e94cb08f6 100644 --- a/synapse/storage/databases/main/stream.py +++ b/synapse/storage/databases/main/stream.py @@ -412,6 +412,43 @@ def _filter_results( return True +def _filter_results_by_stream( + lower_token: Optional[RoomStreamToken], + upper_token: Optional[RoomStreamToken], + instance_name: str, + stream_ordering: int, +) -> bool: + """ + This function only works with "live" tokens with `stream_ordering` only. See + `_filter_results(...)` if you want to work with all tokens. + + Returns True if the event persisted by the given instance at the given + stream_ordering falls between the two tokens (taking a None + token to mean unbounded). + + Used to filter results from fetching events in the DB against the given + tokens. This is necessary to handle the case where the tokens include + position maps, which we handle by fetching more than necessary from the DB + and then filtering (rather than attempting to construct a complicated SQL + query). + """ + if lower_token: + assert lower_token.topological is None + + # If these are live tokens we compare the stream ordering against the + # writers stream position. + if stream_ordering <= lower_token.get_stream_pos_for_instance(instance_name): + return False + + if upper_token: + assert upper_token.topological is None + + if upper_token.get_stream_pos_for_instance(instance_name) < stream_ordering: + return False + + return True + + def filter_to_clause(event_filter: Optional[Filter]) -> Tuple[str, List[str]]: # NB: This may create SQL clauses that don't optimise well (and we don't # have indices on all possible clauses). E.g. it may create @@ -764,6 +801,8 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): Fetch membership events (and the previous event that was replaced by that one) for a given user. + Note: This function only works with "live" tokens with `stream_ordering` only. + We're looking for membership changes in the token range (> `from_key` and <= `to_key`). @@ -837,7 +876,6 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): s.room_id, COALESCE(e.instance_name, s.instance_name), COALESCE(e.stream_ordering, s.stream_id), - e.topological_ordering, m.membership, e.sender FROM current_state_delta_stream AS s @@ -859,7 +897,6 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): room_id, instance_name, stream_ordering, - topological_ordering, membership, sender, ) in txn: @@ -867,12 +904,10 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): assert instance_name is not None assert stream_ordering is not None - if _filter_results( + if _filter_results_by_stream( from_key, to_key, instance_name, - # TODO: This isn't always filled now - topological_ordering, stream_ordering, ): # When the server leaves a room, it will insert new rows with From 956f20ef748b6e3caf76f91623e72b9a617ae235 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 27 Jun 2024 01:24:15 -0500 Subject: [PATCH 63/84] (currently failing) Add test to make sure membership changes don't re-appear if the server leaves the room later --- tests/storage/test_stream.py | 63 +++++++++++++++++++++++++++++++++--- 1 file changed, 59 insertions(+), 4 deletions(-) diff --git a/tests/storage/test_stream.py b/tests/storage/test_stream.py index ffa763bff2..0082132474 100644 --- a/tests/storage/test_stream.py +++ b/tests/storage/test_stream.py @@ -619,7 +619,7 @@ class GetCurrentStateDeltaMembershipChangesForUserTestCase(HomeserverTestCase): ], ) - def test_server_left_after_us_room(self) -> None: + def test_server_left_room_after_us(self) -> None: """ Test that when probing over part of the DAG where the server left the room *after us*, we still see the join and leave changes. @@ -652,7 +652,7 @@ class GetCurrentStateDeltaMembershipChangesForUserTestCase(HomeserverTestCase): join_pos1 = self.get_success( self.store.get_position_for_event(join_response1["event_id"]) ) - # Make sure random other non-member state that happens to have a state_key + # Make sure that random other non-member state that happens to have a `state_key` # matching the user ID doesn't mess with things. self.helper.send_state( room_id1, @@ -728,7 +728,62 @@ class GetCurrentStateDeltaMembershipChangesForUserTestCase(HomeserverTestCase): ], ) - def test_server_left_room(self) -> None: + def test_server_left_room_after_us_later(self) -> None: + """ + Test when the user leaves the room, then sometime later, everyone else leaves + the room, causing the server to leave the room, we shouldn't see any membership + changes. + + This is to make sure we play nicely with this behavior: When the server leaves a + room, it will insert new rows with `event_id = null` into the + `current_state_delta_stream` table for all current state. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + + room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok) + self.helper.join(room_id1, user1_id, tok=user1_tok) + # User1 should leave the room first + self.helper.leave(room_id1, user1_id, tok=user1_tok) + + after_user1_leave_token = self.event_sources.get_current_token() + + # User2 should also leave the room (everyone has left the room which means the + # server is no longer in the room). + self.helper.leave(room_id1, user2_id, tok=user2_tok) + + after_server_leave_token = self.event_sources.get_current_token() + + # Join another room as user1 just to advance the stream_ordering and bust + # `_membership_stream_cache` + room_id2 = self.helper.create_room_as(user2_id, tok=user2_tok) + self.helper.join(room_id2, user1_id, tok=user1_tok) + + # Get the membership changes for the user. + # + # At this point, the `current_state_delta_stream` table should look like the + # following. When the server leaves a room, it will insert new rows with + # `event_id = null` for all current state. + # + # TODO: Add DB rows to better see what's going on. + membership_changes = self.get_success( + self.store.get_current_state_delta_membership_changes_for_user( + user1_id, + from_key=after_user1_leave_token.room_key, + to_key=after_server_leave_token.room_key, + ) + ) + + # Let the whole diff show on failure + self.maxDiff = None + self.assertEqual( + membership_changes, + [], + ) + + def test_we_cause_server_left_room(self) -> None: """ Test that when probing over part of the DAG where we leave the room causing the server to leave the room (because we were the last local user in the room), we @@ -762,7 +817,7 @@ class GetCurrentStateDeltaMembershipChangesForUserTestCase(HomeserverTestCase): join_pos1 = self.get_success( self.store.get_position_for_event(join_response1["event_id"]) ) - # Make sure random other non-member state that happens to have a state_key + # Make sure that random other non-member state that happens to have a `state_key` # matching the user ID doesn't mess with things. self.helper.send_state( room_id1, From cc5e5893fe800bc3fbb96ed407a9560ee96302b7 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 27 Jun 2024 11:04:31 +0100 Subject: [PATCH 64/84] Handle multiple rows device inbox (#17362) Fix bug where we don't get new to-device from remote if they resent a message we've already persisted and have recorded in the DB twice. `device_federation_inbox` table doesn't have a unique index, and so we can race and store an entry in there twice. If we do so then `simple_select_one_txn` will throw an error due to the query returning more than one row. We should add an unique index, but it doesn't really matter so lets just handle the case of multiple rows correctly for now. --- changelog.d/17362.bugfix | 1 + synapse/storage/databases/main/deviceinbox.py | 5 ++--- 2 files changed, 3 insertions(+), 3 deletions(-) create mode 100644 changelog.d/17362.bugfix diff --git a/changelog.d/17362.bugfix b/changelog.d/17362.bugfix new file mode 100644 index 0000000000..a91ce9fc06 --- /dev/null +++ b/changelog.d/17362.bugfix @@ -0,0 +1 @@ +Fix rare race which causes no new to-device messages to be received from remote server. diff --git a/synapse/storage/databases/main/deviceinbox.py b/synapse/storage/databases/main/deviceinbox.py index 07333efff8..5a752b9b8c 100644 --- a/synapse/storage/databases/main/deviceinbox.py +++ b/synapse/storage/databases/main/deviceinbox.py @@ -825,14 +825,13 @@ class DeviceInboxWorkerStore(SQLBaseStore): # Check if we've already inserted a matching message_id for that # origin. This can happen if the origin doesn't receive our # acknowledgement from the first time we received the message. - already_inserted = self.db_pool.simple_select_one_txn( + already_inserted = self.db_pool.simple_select_list_txn( txn, table="device_federation_inbox", keyvalues={"origin": origin, "message_id": message_id}, retcols=("message_id",), - allow_none=True, ) - if already_inserted is not None: + if already_inserted: return # Add an entry for this message_id so that we know we've processed From 27756c9fdff78e036ba6288de6b3079f30cfddcf Mon Sep 17 00:00:00 2001 From: Till <2353100+S7evinK@users.noreply.github.com> Date: Thu, 27 Jun 2024 13:36:08 +0200 Subject: [PATCH 65/84] Use rstcheck to "lint" the README (#17367) Follow up to https://github.com/element-hq/synapse/pull/17363, so we can detect issues with the RST file early on. ### Pull Request Checklist * [x] Pull request is based on the develop branch * [x] Pull request includes a [changelog file](https://element-hq.github.io/synapse/latest/development/contributing_guide.html#changelog). The entry should: - Be a short description of your change which makes sense to users. "Fixed a bug that prevented receiving messages from other servers." instead of "Moved X method from `EventStore` to `EventWorkerStore`.". - Use markdown where necessary, mostly for `code blocks`. - End with either a period (.) or an exclamation mark (!). - Start with a capital letter. - Feel free to credit yourself, by adding a sentence "Contributed by @github_username." or "Contributed by [Your Name]." to the end of the entry. * [x] [Code style](https://element-hq.github.io/synapse/latest/code_style.html) is correct (run the [linters](https://element-hq.github.io/synapse/latest/development/contributing_guide.html#run-the-linters)) --- .github/workflows/tests.yml | 20 ++++++++++++++++++++ changelog.d/17367.misc | 1 + 2 files changed, 21 insertions(+) create mode 100644 changelog.d/17367.misc diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index cdd881fbe1..2cc5a525a6 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -21,6 +21,7 @@ jobs: trial: ${{ !startsWith(github.ref, 'refs/pull/') || steps.filter.outputs.trial }} integration: ${{ !startsWith(github.ref, 'refs/pull/') || steps.filter.outputs.integration }} linting: ${{ !startsWith(github.ref, 'refs/pull/') || steps.filter.outputs.linting }} + linting_readme: ${{ !startsWith(github.ref, 'refs/pull/') || steps.filter.outputs.linting_readme }} steps: - uses: dorny/paths-filter@v3 id: filter @@ -72,6 +73,9 @@ jobs: - 'pyproject.toml' - 'poetry.lock' - '.github/workflows/tests.yml' + + linting_readme: + - 'README.rst' check-sampleconfig: runs-on: ubuntu-latest @@ -269,6 +273,20 @@ jobs: - run: cargo fmt --check + # This is to detect issues with the rst file, which can otherwise cause issues + # when uploading packages to PyPi. + lint-readme: + runs-on: ubuntu-latest + needs: changes + if: ${{ needs.changes.outputs.linting_readme == 'true' }} + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: "3.x" + - run: "pip install rstcheck" + - run: "rstcheck --report-level=WARNING README.rst" + # Dummy step to gate other tests on without repeating the whole list linting-done: if: ${{ !cancelled() }} # Run this even if prior jobs were skipped @@ -284,6 +302,7 @@ jobs: - lint-clippy - lint-clippy-nightly - lint-rustfmt + - lint-readme runs-on: ubuntu-latest steps: - uses: matrix-org/done-action@v2 @@ -301,6 +320,7 @@ jobs: lint-clippy lint-clippy-nightly lint-rustfmt + lint-readme calculate-test-jobs: diff --git a/changelog.d/17367.misc b/changelog.d/17367.misc new file mode 100644 index 0000000000..361731b8ae --- /dev/null +++ b/changelog.d/17367.misc @@ -0,0 +1 @@ +Add CI check for the README. \ No newline at end of file From 830e09d2defc6ae742dce30bdc822dcaf9a74092 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 27 Jun 2024 10:13:02 -0500 Subject: [PATCH 66/84] Grab `prev_membership` to see whether the server left the room (fixes tests) See https://github.com/element-hq/synapse/pull/17320#discussion_r1657170493 `prev_membership` helps determine whether we should include the `event_id=null` row because we can check whether we have already left. - When we leave the room causing the server to leave the room, the `prev_event_id` will be our join event - When the server leaves the room after us, the `prev_event_id` will be leave event - In the state reset case, `prev_event_id` will be our join event --- synapse/storage/databases/main/stream.py | 20 ++++++-------------- tests/storage/test_stream.py | 7 +++---- 2 files changed, 9 insertions(+), 18 deletions(-) diff --git a/synapse/storage/databases/main/stream.py b/synapse/storage/databases/main/stream.py index 9e94cb08f6..d94b9366ab 100644 --- a/synapse/storage/databases/main/stream.py +++ b/synapse/storage/databases/main/stream.py @@ -877,10 +877,12 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): COALESCE(e.instance_name, s.instance_name), COALESCE(e.stream_ordering, s.stream_id), m.membership, - e.sender + e.sender, + m_prev.membership AS prev_membership FROM current_state_delta_stream AS s LEFT JOIN events AS e ON e.event_id = s.event_id LEFT JOIN room_memberships AS m ON m.event_id = s.event_id + LEFT JOIN room_memberships AS m_prev ON s.prev_event_id = m_prev.event_id WHERE s.stream_id > ? AND s.stream_id <= ? AND s.state_key = ? AND s.type = ? @@ -890,7 +892,6 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): txn.execute(sql, args) membership_changes: List[CurrentStateDeltaMembership] = [] - membership_change_map: Dict[str, CurrentStateDeltaMembership] = {} for ( event_id, prev_event_id, @@ -899,6 +900,7 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): stream_ordering, membership, sender, + prev_membership, ) in txn: assert room_id is not None assert instance_name is not None @@ -918,16 +920,8 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): # `event_id = null` row is a `leave` and we don't want duplicate # membership changes in our results, let's get rid of those # (deduplicate) (see `test_server_left_after_us_room`). - if event_id is None: - already_tracked_membership_change = membership_change_map.get( - prev_event_id - ) - if ( - already_tracked_membership_change is not None - and already_tracked_membership_change.membership - == Membership.LEAVE - ): - continue + if event_id is None and prev_membership == Membership.LEAVE: + continue membership_change = CurrentStateDeltaMembership( event_id=event_id, @@ -944,8 +938,6 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): ) membership_changes.append(membership_change) - if event_id: - membership_change_map[event_id] = membership_change return membership_changes diff --git a/tests/storage/test_stream.py b/tests/storage/test_stream.py index 0082132474..1342794d37 100644 --- a/tests/storage/test_stream.py +++ b/tests/storage/test_stream.py @@ -1019,7 +1019,7 @@ class GetCurrentStateDeltaMembershipChangesForUserTestCase(HomeserverTestCase): user2_tok = self.login(user2_id, "pass") room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok) - self.helper.join(room_id1, user1_id, tok=user1_tok) + join_response1 = self.helper.join(room_id1, user1_id, tok=user1_tok) before_reset_token = self.event_sources.get_current_token() @@ -1056,8 +1056,7 @@ class GetCurrentStateDeltaMembershipChangesForUserTestCase(HomeserverTestCase): "type": EventTypes.Member, "state_key": user1_id, "event_id": None, - # FIXME: I'm not sure if a state reset should have a prev_event_id - "prev_event_id": None, + "prev_event_id": join_response1["event_id"], "instance_name": dummy_state_pos.instance_name, }, desc="state reset user in current_state_delta_stream", @@ -1088,7 +1087,7 @@ class GetCurrentStateDeltaMembershipChangesForUserTestCase(HomeserverTestCase): CurrentStateDeltaMembership( event_id=None, event_pos=dummy_state_pos, - prev_event_id=None, + prev_event_id=join_response1["event_id"], room_id=room_id1, membership="leave", sender=None, # user1_id, From 15fcead2a5df17ee10278f1c0cdd16dbba26c76d Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 27 Jun 2024 11:33:41 -0500 Subject: [PATCH 67/84] Slight clean-up --- synapse/storage/databases/main/stream.py | 12 +++++------- tests/storage/test_stream.py | 6 +++--- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/synapse/storage/databases/main/stream.py b/synapse/storage/databases/main/stream.py index d94b9366ab..ab592dcf15 100644 --- a/synapse/storage/databases/main/stream.py +++ b/synapse/storage/databases/main/stream.py @@ -846,7 +846,7 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): min_from_id = from_key.stream max_to_id = to_key.get_max_stream_pos() - args: List[Any] = [min_from_id, max_to_id, user_id, EventTypes.Member] + args: List[Any] = [min_from_id, max_to_id, EventTypes.Member, user_id] # TODO: It would be good to assert that the `from_token`/`to_token` is >= # the first row in `current_state_delta_stream` for the rooms we're @@ -874,7 +874,7 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): e.event_id, s.prev_event_id, s.room_id, - COALESCE(e.instance_name, s.instance_name), + s.instance_name, COALESCE(e.stream_ordering, s.stream_id), m.membership, e.sender, @@ -884,8 +884,8 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): LEFT JOIN room_memberships AS m ON m.event_id = s.event_id LEFT JOIN room_memberships AS m_prev ON s.prev_event_id = m_prev.event_id WHERE s.stream_id > ? AND s.stream_id <= ? - AND s.state_key = ? AND s.type = ? + AND s.state_key = ? ORDER BY s.stream_id ASC """ @@ -916,10 +916,8 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): # `event_id = null` for all current state. This means we might # already have a row for the leave event and then another for the # same leave where the `event_id=null` but the `prev_event_id` is - # pointing back at the earlier leave event. Since we're assuming the - # `event_id = null` row is a `leave` and we don't want duplicate - # membership changes in our results, let's get rid of those - # (deduplicate) (see `test_server_left_after_us_room`). + # pointing back at the earlier leave event. We don't want to report + # the leave, if we already have a leave event. if event_id is None and prev_membership == Membership.LEAVE: continue diff --git a/tests/storage/test_stream.py b/tests/storage/test_stream.py index 1342794d37..5a054d7f2e 100644 --- a/tests/storage/test_stream.py +++ b/tests/storage/test_stream.py @@ -785,9 +785,9 @@ class GetCurrentStateDeltaMembershipChangesForUserTestCase(HomeserverTestCase): def test_we_cause_server_left_room(self) -> None: """ - Test that when probing over part of the DAG where we leave the room causing the - server to leave the room (because we were the last local user in the room), we - still see the join and leave changes. + Test that when probing over part of the DAG where the user leaves the room + causing the server to leave the room (because we were the last local user in the + room), we still see the join and leave changes. This is to make sure we play nicely with this behavior: When the server leaves a room, it will insert new rows with `event_id = null` into the From 81c06bec20d2f6732100672853a140a6e19ff67d Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 27 Jun 2024 11:50:18 -0500 Subject: [PATCH 68/84] Detect state resets --- synapse/storage/databases/main/stream.py | 51 +++++++++++++++++------- tests/storage/test_stream.py | 15 ++++++- 2 files changed, 50 insertions(+), 16 deletions(-) diff --git a/synapse/storage/databases/main/stream.py b/synapse/storage/databases/main/stream.py index ab592dcf15..19dba00a0f 100644 --- a/synapse/storage/databases/main/stream.py +++ b/synapse/storage/databases/main/stream.py @@ -123,6 +123,8 @@ class CurrentStateDeltaMembership: room_id: The room ID of the membership event. membership: The membership state of the user in the room sender: The person who sent the membership event + state_reset: Whether the membership in the room was changed without a + corresponding event (state reset). """ event_id: Optional[str] @@ -131,6 +133,7 @@ class CurrentStateDeltaMembership: room_id: str membership: str sender: Optional[str] + state_reset: bool def generate_pagination_where_clause( @@ -846,7 +849,15 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): min_from_id = from_key.stream max_to_id = to_key.get_max_stream_pos() - args: List[Any] = [min_from_id, max_to_id, EventTypes.Member, user_id] + args: List[Any] = [ + EventTypes.Member, + user_id, + user_id, + min_from_id, + max_to_id, + EventTypes.Member, + user_id, + ] # TODO: It would be good to assert that the `from_token`/`to_token` is >= # the first row in `current_state_delta_stream` for the rooms we're @@ -859,30 +870,35 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): # `stream_ordering` is unique across the Synapse instance so this should # work fine. # - # We `COALESCE` the `instance_name` and `stream_ordering` because we prefer - # the source of truth from the events table. This gives slightly more - # accurate results when available since `current_state_delta_stream` only - # tracks that the current state is at this stream position (not what stream - # position the state event was added) and batches events at the same - # `stream_id` in certain cases. + # We `COALESCE` the `stream_ordering` because we prefer the source of truth + # from the `events` table. This gives slightly more accurate results when + # available since `current_state_delta_stream` only tracks that the current + # state is at this stream position (not what stream position the state event + # was added) and uses the *minimum* stream position for batches of events. # - # TODO: We need to add indexes for `current_state_delta_stream.event_id` and - # `current_state_delta_stream.state_key`/`current_state_delta_stream.type` - # for this to be efficient. + # The extra `LEFT JOIN` by stream position are only needed to tell a state + # reset from the server leaving the room. Both cases have `event_id = null` + # but if we can find a corresponding event at that stream position, then we + # know it was just the server leaving the room. sql = """ SELECT - e.event_id, + COALESCE(e.event_id, e_by_stream.event_id) AS event_id, s.prev_event_id, s.room_id, s.instance_name, - COALESCE(e.stream_ordering, s.stream_id), - m.membership, - e.sender, + COALESCE(e.stream_ordering, e_by_stream.stream_ordering, s.stream_id) AS stream_ordering, + COALESCE(m.membership, m_by_stream.membership) AS membership, + COALESCE(e.sender, e_by_stream.sender) AS sender, m_prev.membership AS prev_membership FROM current_state_delta_stream AS s LEFT JOIN events AS e ON e.event_id = s.event_id LEFT JOIN room_memberships AS m ON m.event_id = s.event_id LEFT JOIN room_memberships AS m_prev ON s.prev_event_id = m_prev.event_id + LEFT JOIN events AS e_by_stream ON e_by_stream.stream_ordering = s.stream_id + AND e_by_stream.type = ? + AND e_by_stream.state_key = ? + LEFT JOIN room_memberships AS m_by_stream ON m_by_stream.event_stream_ordering = s.stream_id + AND m_by_stream.user_id = ? WHERE s.stream_id > ? AND s.stream_id <= ? AND s.type = ? AND s.state_key = ? @@ -921,6 +937,12 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): if event_id is None and prev_membership == Membership.LEAVE: continue + # We can detect a state reset if there was a membership change + # without a corresponding event. + state_reset = False + if event_id is None and membership != prev_membership: + state_reset = True + membership_change = CurrentStateDeltaMembership( event_id=event_id, event_pos=PersistedEventPosition( @@ -933,6 +955,7 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): membership if membership is not None else Membership.LEAVE ), sender=sender, + state_reset=state_reset, ) membership_changes.append(membership_change) diff --git a/tests/storage/test_stream.py b/tests/storage/test_stream.py index 5a054d7f2e..acb2f0e429 100644 --- a/tests/storage/test_stream.py +++ b/tests/storage/test_stream.py @@ -615,6 +615,7 @@ class GetCurrentStateDeltaMembershipChangesForUserTestCase(HomeserverTestCase): room_id=room_id1, membership="join", sender=user1_id, + state_reset=False, ) ], ) @@ -716,6 +717,7 @@ class GetCurrentStateDeltaMembershipChangesForUserTestCase(HomeserverTestCase): room_id=room_id1, membership="join", sender=user1_id, + state_reset=False, ), CurrentStateDeltaMembership( event_id=leave_response1["event_id"], @@ -724,6 +726,7 @@ class GetCurrentStateDeltaMembershipChangesForUserTestCase(HomeserverTestCase): room_id=room_id1, membership="leave", sender=user1_id, + state_reset=False, ), ], ) @@ -882,14 +885,16 @@ class GetCurrentStateDeltaMembershipChangesForUserTestCase(HomeserverTestCase): room_id=room_id1, membership="join", sender=user1_id, + state_reset=False, ), CurrentStateDeltaMembership( - event_id=None, # leave_response1["event_id"], + event_id=leave_response1["event_id"], event_pos=leave_pos1, prev_event_id=join_response1["event_id"], room_id=room_id1, membership="leave", - sender=None, # user1_id, + sender=user1_id, + state_reset=False, ), ], ) @@ -1004,6 +1009,7 @@ class GetCurrentStateDeltaMembershipChangesForUserTestCase(HomeserverTestCase): room_id=room_id1, membership="join", sender=user3_id, + state_reset=False, ), ], ) @@ -1091,6 +1097,7 @@ class GetCurrentStateDeltaMembershipChangesForUserTestCase(HomeserverTestCase): room_id=room_id1, membership="leave", sender=None, # user1_id, + state_reset=True, ), ], ) @@ -1141,6 +1148,7 @@ class GetCurrentStateDeltaMembershipChangesForUserTestCase(HomeserverTestCase): room_id=room_id1, membership="join", sender=user1_id, + state_reset=False, ), CurrentStateDeltaMembership( event_id=join_response2["event_id"], @@ -1149,6 +1157,7 @@ class GetCurrentStateDeltaMembershipChangesForUserTestCase(HomeserverTestCase): room_id=room_id2, membership="join", sender=user1_id, + state_reset=False, ), ], ) @@ -1175,6 +1184,7 @@ class GetCurrentStateDeltaMembershipChangesForUserTestCase(HomeserverTestCase): room_id=room_id1, membership="join", sender=user1_id, + state_reset=False, ) ], ) @@ -1368,6 +1378,7 @@ class GetCurrentStateDeltaMembershipChangesForUserFederationTestCase( room_id=intially_unjoined_room_id, membership="join", sender=user1_id, + state_reset=False, ), ], ) From eb159c11cd7bcc0a72983da46a728282fdbed8e7 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 27 Jun 2024 14:38:55 -0500 Subject: [PATCH 69/84] Don't worry about `state_reset` for now See: - Why no `COALESCE` https://github.com/element-hq/synapse/pull/17320#discussion_r1657435662 - Don't worry about `state_reset` for now, https://github.com/element-hq/synapse/pull/17320#discussion_r1657562645 --- synapse/storage/databases/main/stream.py | 53 ++++++----------------- tests/storage/test_stream.py | 55 +++++++++++------------- 2 files changed, 36 insertions(+), 72 deletions(-) diff --git a/synapse/storage/databases/main/stream.py b/synapse/storage/databases/main/stream.py index 19dba00a0f..c128eb5d5b 100644 --- a/synapse/storage/databases/main/stream.py +++ b/synapse/storage/databases/main/stream.py @@ -123,8 +123,6 @@ class CurrentStateDeltaMembership: room_id: The room ID of the membership event. membership: The membership state of the user in the room sender: The person who sent the membership event - state_reset: Whether the membership in the room was changed without a - corresponding event (state reset). """ event_id: Optional[str] @@ -133,7 +131,6 @@ class CurrentStateDeltaMembership: room_id: str membership: str sender: Optional[str] - state_reset: bool def generate_pagination_where_clause( @@ -849,56 +846,37 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): min_from_id = from_key.stream max_to_id = to_key.get_max_stream_pos() - args: List[Any] = [ - EventTypes.Member, - user_id, - user_id, - min_from_id, - max_to_id, - EventTypes.Member, - user_id, - ] + args: List[Any] = [min_from_id, max_to_id, EventTypes.Member, user_id] # TODO: It would be good to assert that the `from_token`/`to_token` is >= # the first row in `current_state_delta_stream` for the rooms we're # interested in. Otherwise, we will end up with empty results and not know # it. - # We have to look-up events by `stream_ordering` because - # `current_state_delta_stream.event_id` can be `null` if the server is no - # longer in the room or a state reset happened and it was unset. - # `stream_ordering` is unique across the Synapse instance so this should - # work fine. + # We could `COALESCE(e.stream_ordering, s.stream_id)` to get more accurate + # stream positioning when available but given our usages, we can avoid the + # complexity. Between two (valid) stream tokens, we will still get all of + # the state changes. Since those events are persisted in a batch, valid + # tokens will either be before or after the batch of events. # - # We `COALESCE` the `stream_ordering` because we prefer the source of truth - # from the `events` table. This gives slightly more accurate results when - # available since `current_state_delta_stream` only tracks that the current + # `stream_ordering` from the `events` table is more accurate when available + # since the `current_state_delta_stream` table only tracks that the current # state is at this stream position (not what stream position the state event # was added) and uses the *minimum* stream position for batches of events. - # - # The extra `LEFT JOIN` by stream position are only needed to tell a state - # reset from the server leaving the room. Both cases have `event_id = null` - # but if we can find a corresponding event at that stream position, then we - # know it was just the server leaving the room. sql = """ SELECT - COALESCE(e.event_id, e_by_stream.event_id) AS event_id, + e.event_id, s.prev_event_id, s.room_id, s.instance_name, - COALESCE(e.stream_ordering, e_by_stream.stream_ordering, s.stream_id) AS stream_ordering, - COALESCE(m.membership, m_by_stream.membership) AS membership, - COALESCE(e.sender, e_by_stream.sender) AS sender, + s.stream_id, + m.membership, + e.sender, m_prev.membership AS prev_membership FROM current_state_delta_stream AS s LEFT JOIN events AS e ON e.event_id = s.event_id LEFT JOIN room_memberships AS m ON m.event_id = s.event_id LEFT JOIN room_memberships AS m_prev ON s.prev_event_id = m_prev.event_id - LEFT JOIN events AS e_by_stream ON e_by_stream.stream_ordering = s.stream_id - AND e_by_stream.type = ? - AND e_by_stream.state_key = ? - LEFT JOIN room_memberships AS m_by_stream ON m_by_stream.event_stream_ordering = s.stream_id - AND m_by_stream.user_id = ? WHERE s.stream_id > ? AND s.stream_id <= ? AND s.type = ? AND s.state_key = ? @@ -937,12 +915,6 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): if event_id is None and prev_membership == Membership.LEAVE: continue - # We can detect a state reset if there was a membership change - # without a corresponding event. - state_reset = False - if event_id is None and membership != prev_membership: - state_reset = True - membership_change = CurrentStateDeltaMembership( event_id=event_id, event_pos=PersistedEventPosition( @@ -955,7 +927,6 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): membership if membership is not None else Membership.LEAVE ), sender=sender, - state_reset=state_reset, ) membership_changes.append(membership_change) diff --git a/tests/storage/test_stream.py b/tests/storage/test_stream.py index acb2f0e429..4f8f919a24 100644 --- a/tests/storage/test_stream.py +++ b/tests/storage/test_stream.py @@ -615,7 +615,6 @@ class GetCurrentStateDeltaMembershipChangesForUserTestCase(HomeserverTestCase): room_id=room_id1, membership="join", sender=user1_id, - state_reset=False, ) ], ) @@ -717,7 +716,6 @@ class GetCurrentStateDeltaMembershipChangesForUserTestCase(HomeserverTestCase): room_id=room_id1, membership="join", sender=user1_id, - state_reset=False, ), CurrentStateDeltaMembership( event_id=leave_response1["event_id"], @@ -726,7 +724,6 @@ class GetCurrentStateDeltaMembershipChangesForUserTestCase(HomeserverTestCase): room_id=room_id1, membership="leave", sender=user1_id, - state_reset=False, ), ], ) @@ -885,16 +882,14 @@ class GetCurrentStateDeltaMembershipChangesForUserTestCase(HomeserverTestCase): room_id=room_id1, membership="join", sender=user1_id, - state_reset=False, ), CurrentStateDeltaMembership( - event_id=leave_response1["event_id"], + event_id=None, # leave_response1["event_id"], event_pos=leave_pos1, prev_event_id=join_response1["event_id"], room_id=room_id1, membership="leave", - sender=user1_id, - state_reset=False, + sender=None, # user1_id, ), ], ) @@ -924,16 +919,6 @@ class GetCurrentStateDeltaMembershipChangesForUserTestCase(HomeserverTestCase): # Persist the user1, user3, and user4 join events in the same batch so they all # end up in the `current_state_delta_stream` table with the same # stream_ordering. - join_event1, join_event_context1 = self.get_success( - create_event( - self.hs, - sender=user1_id, - type=EventTypes.Member, - state_key=user1_id, - content={"membership": "join"}, - room_id=room_id1, - ) - ) join_event3, join_event_context3 = self.get_success( create_event( self.hs, @@ -944,6 +929,19 @@ class GetCurrentStateDeltaMembershipChangesForUserTestCase(HomeserverTestCase): room_id=room_id1, ) ) + # We want to put user1 in the middle of the batch. This way, regardless of the + # implementation that inserts rows into current_state_delta_stream` (whether it + # be minimum/maximum of stream position of the batch), we will still catch bugs. + join_event1, join_event_context1 = self.get_success( + create_event( + self.hs, + sender=user1_id, + type=EventTypes.Member, + state_key=user1_id, + content={"membership": "join"}, + room_id=room_id1, + ) + ) join_event4, join_event_context4 = self.get_success( create_event( self.hs, @@ -957,8 +955,8 @@ class GetCurrentStateDeltaMembershipChangesForUserTestCase(HomeserverTestCase): self.get_success( self.persistence.persist_events( [ - (join_event1, join_event_context1), (join_event3, join_event_context3), + (join_event1, join_event_context1), (join_event4, join_event_context4), ] ) @@ -966,10 +964,7 @@ class GetCurrentStateDeltaMembershipChangesForUserTestCase(HomeserverTestCase): after_room1_token = self.event_sources.get_current_token() - # Let's get membership changes from user3's perspective because it was in the - # middle of the batch. This way, if rows in` current_state_delta_stream` are - # stored with the first or last event's `stream_ordering`, we will still catch - # bugs. + # Get the membership changes for the user. # # At this point, the `current_state_delta_stream` table should look like (notice # those three memberships at the end with `stream_id=7` because we persisted @@ -987,7 +982,7 @@ class GetCurrentStateDeltaMembershipChangesForUserTestCase(HomeserverTestCase): # | 7 | '!x:test' | 'm.room.member' | '@user4:test' | '$xxx' | None | membership_changes = self.get_success( self.store.get_current_state_delta_membership_changes_for_user( - user3_id, + user1_id, from_key=before_room1_token.room_key, to_key=after_room1_token.room_key, ) @@ -1003,13 +998,16 @@ class GetCurrentStateDeltaMembershipChangesForUserTestCase(HomeserverTestCase): membership_changes, [ CurrentStateDeltaMembership( - event_id=join_event3.event_id, + event_id=join_event1.event_id, + # Ideally, this would be `join_pos1` (to match the `event_id`) but + # when events are persisted in a batch, they are all stored in the + # `current_state_delta_stream` table with the minimum + # `stream_ordering` from the batch. event_pos=join_pos3, prev_event_id=None, room_id=room_id1, membership="join", - sender=user3_id, - state_reset=False, + sender=user1_id, ), ], ) @@ -1097,7 +1095,6 @@ class GetCurrentStateDeltaMembershipChangesForUserTestCase(HomeserverTestCase): room_id=room_id1, membership="leave", sender=None, # user1_id, - state_reset=True, ), ], ) @@ -1148,7 +1145,6 @@ class GetCurrentStateDeltaMembershipChangesForUserTestCase(HomeserverTestCase): room_id=room_id1, membership="join", sender=user1_id, - state_reset=False, ), CurrentStateDeltaMembership( event_id=join_response2["event_id"], @@ -1157,7 +1153,6 @@ class GetCurrentStateDeltaMembershipChangesForUserTestCase(HomeserverTestCase): room_id=room_id2, membership="join", sender=user1_id, - state_reset=False, ), ], ) @@ -1184,7 +1179,6 @@ class GetCurrentStateDeltaMembershipChangesForUserTestCase(HomeserverTestCase): room_id=room_id1, membership="join", sender=user1_id, - state_reset=False, ) ], ) @@ -1378,7 +1372,6 @@ class GetCurrentStateDeltaMembershipChangesForUserFederationTestCase( room_id=intially_unjoined_room_id, membership="join", sender=user1_id, - state_reset=False, ), ], ) From ba56350642d33332d5ab3f3a94005e408cb9f433 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 27 Jun 2024 15:31:18 -0500 Subject: [PATCH 70/84] Passing current tests --- synapse/handlers/sliding_sync.py | 44 +++++++++++++++++++---------- tests/handlers/test_sliding_sync.py | 9 ++++-- 2 files changed, 35 insertions(+), 18 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index 3ce10d3ea7..b327e340ff 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -18,7 +18,6 @@ # # import logging -from collections import defaultdict from typing import TYPE_CHECKING, Any, Dict, List, Optional, Set, Tuple import attr @@ -48,7 +47,9 @@ if TYPE_CHECKING: logger = logging.getLogger(__name__) -def filter_membership_for_sync(*, membership: str, user_id: str, sender: str) -> bool: +def filter_membership_for_sync( + *, membership: str, user_id: str, sender: Optional[str] +) -> bool: """ Returns True if the membership event should be included in the sync response, otherwise False. @@ -65,6 +66,11 @@ def filter_membership_for_sync(*, membership: str, user_id: str, sender: str) -> # # This logic includes kicks (leave events where the sender is not the same user) and # can be read as "anything that isn't a leave or a leave with a different sender". + # + # When `sender=None` and `membership=Membership.LEAVE`, it means that a state reset + # happened that removed the user from the room, or the user was the last person + # locally to leave the room which caused the server to leave the room. In both + # cases, TODO return membership != Membership.LEAVE or sender != user_id @@ -99,10 +105,10 @@ class _RoomMembershipForUser: range """ - event_id: str + event_id: Optional[str] event_pos: PersistedEventPosition membership: str - sender: str + sender: Optional[str] newly_joined: bool def copy_and_replace(self, **kwds: Any) -> "_RoomMembershipForUser": @@ -540,9 +546,11 @@ class SlidingSyncHandler: first_membership_change_by_room_id_in_from_to_range: Dict[ str, CurrentStateDeltaMembership ] = {} - non_join_event_ids_by_room_id_in_from_to_range: Dict[str, List[str]] = ( - defaultdict(list) - ) + # Keep track if the room has a non-join event in the token range so we can later + # tell if it was a `newly_joined` room. If the last membership event in the + # token range is a join and there is also some non-join in the range, we know + # they `newly_joined`. + has_non_join_event_by_room_id_in_from_to_range: Dict[str, bool] = {} for ( membership_change ) in current_state_delta_membership_changes_in_from_to_range: @@ -551,16 +559,13 @@ class SlidingSyncHandler: last_membership_change_by_room_id_in_from_to_range[room_id] = ( membership_change ) - # Only set if we haven't already set it first_membership_change_by_room_id_in_from_to_range.setdefault( room_id, membership_change ) if membership_change.membership != Membership.JOIN: - non_join_event_ids_by_room_id_in_from_to_range[room_id].append( - membership_change.event_id - ) + has_non_join_event_by_room_id_in_from_to_range[room_id] = True # 2) Fixup # @@ -574,6 +579,7 @@ class SlidingSyncHandler: ) in last_membership_change_by_room_id_in_from_to_range.values(): room_id = last_membership_change_in_from_to_range.room_id + # 3) if last_membership_change_in_from_to_range.membership == Membership.JOIN: possibly_newly_joined_room_ids.add(room_id) @@ -592,10 +598,14 @@ class SlidingSyncHandler: # 3) Figure out `newly_joined` prev_event_ids_before_token_range: List[str] = [] for possibly_newly_joined_room_id in possibly_newly_joined_room_ids: - non_joins_for_room = non_join_event_ids_by_room_id_in_from_to_range[ - possibly_newly_joined_room_id - ] - if len(non_joins_for_room) > 0: + has_non_join_in_from_to_range = ( + has_non_join_event_by_room_id_in_from_to_range.get( + possibly_newly_joined_room_id, False + ) + ) + # If the last membership event in the token range is a join and there is + # also some non-join in the range, we know they `newly_joined`. + if has_non_join_in_from_to_range: # We found a `newly_joined` room (we left and joined within the token range) filtered_sync_room_id_set[room_id] = filtered_sync_room_id_set[ room_id @@ -968,6 +978,10 @@ class SlidingSyncHandler: Membership.INVITE, Membership.KNOCK, ): + # This should never happen. If someone is invited/knocked on room, then + # there should be an event for it. + assert rooms_membership_for_user_at_to_token.event_id is not None + invite_or_knock_event = await self.store.get_event( rooms_membership_for_user_at_to_token.event_id ) diff --git a/tests/handlers/test_sliding_sync.py b/tests/handlers/test_sliding_sync.py index 7339cb460e..a751fef1df 100644 --- a/tests/handlers/test_sliding_sync.py +++ b/tests/handlers/test_sliding_sync.py @@ -390,7 +390,7 @@ class GetSyncRoomIdsForUserTestCase(HomeserverTestCase): # Leave during the from_token/to_token range (newly_left) room_id2 = self.helper.create_room_as(user1_id, tok=user1_tok) - leave_response = self.helper.leave(room_id2, user1_id, tok=user1_tok) + _leave_response2 = self.helper.leave(room_id2, user1_id, tok=user1_tok) after_room2_token = self.event_sources.get_current_token() @@ -404,10 +404,13 @@ class GetSyncRoomIdsForUserTestCase(HomeserverTestCase): # Only the newly_left room should show up self.assertEqual(room_id_results.keys(), {room_id2}) - # It should be pointing to the latest membership event in the from/to range + # It should be pointing to the latest membership event in the from/to range but + # the `event_id` is `None` because we left the room causing the server to leave + # the room because no other local users are in it (quirk of the + # `current_state_delta_stream` table that we source things from) self.assertEqual( room_id_results[room_id2].event_id, - leave_response["event_id"], + None, # _leave_response2["event_id"], ) # We should *NOT* be `newly_joined` because we are instead `newly_left` self.assertEqual(room_id_results[room_id2].newly_joined, False) From f77403251cd2faf65689b785eba0a6af5366b5bd Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 27 Jun 2024 15:39:43 -0500 Subject: [PATCH 71/84] Add better comments --- synapse/handlers/sliding_sync.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index b327e340ff..3dd32ae1f1 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -70,8 +70,9 @@ def filter_membership_for_sync( # When `sender=None` and `membership=Membership.LEAVE`, it means that a state reset # happened that removed the user from the room, or the user was the last person # locally to leave the room which caused the server to leave the room. In both - # cases, TODO - return membership != Membership.LEAVE or sender != user_id + # cases, we can just remove the rooms since they are no longer relevant to the user. + # They could still be added back later if they are `newly_left`. + return membership != Membership.LEAVE or sender not in (user_id, None) # We can't freeze this class because we want to update it in place with the @@ -508,6 +509,8 @@ class SlidingSyncHandler: ) ) + # Filter the rooms that that we have updated room membership events to the point + # in time of the `to_token` (from the "1)" fixups) filtered_sync_room_id_set = { room_id: room_membership_for_user for room_id, room_membership_for_user in sync_room_id_set.items() From 325856e14b97aa6eca59d4d5d3b4145d050adfe0 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 27 Jun 2024 15:57:01 -0500 Subject: [PATCH 72/84] Inclusive ranges --- synapse/handlers/sliding_sync.py | 3 +- tests/rest/client/test_sync.py | 92 ++++++++++++++++++++++++++++++++ 2 files changed, 94 insertions(+), 1 deletion(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index 3dd32ae1f1..db5dd75d04 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -263,7 +263,8 @@ class SlidingSyncHandler: for range in list_config.ranges: sliced_room_ids = [ room_id - for room_id, _ in sorted_room_info[range[0] : range[1]] + # Both sides of range are inclusive + for room_id, _ in sorted_room_info[range[0] : range[1] + 1] ] ops.append( diff --git a/tests/rest/client/test_sync.py b/tests/rest/client/test_sync.py index bd1e7d521b..3f4f88c3d1 100644 --- a/tests/rest/client/test_sync.py +++ b/tests/rest/client/test_sync.py @@ -1616,6 +1616,98 @@ class SlidingSyncTestCase(unittest.HomeserverTestCase): channel.json_body["lists"]["foo-list"], ) + def test_sliced_windows(self) -> None: + """ + Test that the `lists` `ranges` are sliced correctly. Both sides of each range + are inclusive. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + + room_id1 = self.helper.create_room_as(user1_id, tok=user1_tok, is_public=True) + room_id2 = self.helper.create_room_as(user1_id, tok=user1_tok, is_public=True) + room_id3 = self.helper.create_room_as(user1_id, tok=user1_tok, is_public=True) + + # Make the Sliding Sync request for a single room + channel = self.make_request( + "POST", + self.sync_endpoint, + { + "lists": { + "foo-list": { + "ranges": [[0, 0]], + "required_state": [ + ["m.room.join_rules", ""], + ["m.room.history_visibility", ""], + ["m.space.child", "*"], + ], + "timeline_limit": 1, + } + } + }, + access_token=user1_tok, + ) + self.assertEqual(channel.code, 200, channel.json_body) + + # Make sure it has the foo-list we requested + self.assertListEqual( + list(channel.json_body["lists"].keys()), + ["foo-list"], + channel.json_body["lists"].keys(), + ) + # Make sure the list is sorted in the way we expect + self.assertListEqual( + list(channel.json_body["lists"]["foo-list"]["ops"]), + [ + { + "op": "SYNC", + "range": [0, 0], + "room_ids": [room_id3], + } + ], + channel.json_body["lists"]["foo-list"], + ) + + # Make the Sliding Sync request for the first two rooms + channel = self.make_request( + "POST", + self.sync_endpoint, + { + "lists": { + "foo-list": { + "ranges": [[0, 1]], + "required_state": [ + ["m.room.join_rules", ""], + ["m.room.history_visibility", ""], + ["m.space.child", "*"], + ], + "timeline_limit": 1, + } + } + }, + access_token=user1_tok, + ) + self.assertEqual(channel.code, 200, channel.json_body) + + # Make sure it has the foo-list we requested + self.assertListEqual( + list(channel.json_body["lists"].keys()), + ["foo-list"], + channel.json_body["lists"].keys(), + ) + # Make sure the list is sorted in the way we expect + self.assertListEqual( + list(channel.json_body["lists"]["foo-list"]["ops"]), + [ + { + "op": "SYNC", + "range": [0, 1], + "room_ids": [room_id3, room_id2], + } + ], + channel.json_body["lists"]["foo-list"], + ) + def test_rooms_limited_initial_sync(self) -> None: """ Test that we mark `rooms` as `limited=True` when we saturate the `timeline_limit` From 63c7b5017ad82ee20bc2ae5898b051a2660cf188 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 27 Jun 2024 16:34:41 -0500 Subject: [PATCH 73/84] (doesn't work) Add test for batch persisting multiple member events for the same user --- tests/storage/test_stream.py | 121 +++++++++++++++++++++++++++++++++-- 1 file changed, 115 insertions(+), 6 deletions(-) diff --git a/tests/storage/test_stream.py b/tests/storage/test_stream.py index 4f8f919a24..53a58bd82a 100644 --- a/tests/storage/test_stream.py +++ b/tests/storage/test_stream.py @@ -46,7 +46,7 @@ from synapse.types import ( from synapse.util import Clock from tests.test_utils.event_injection import create_event -from tests.unittest import FederatingHomeserverTestCase, HomeserverTestCase +from tests.unittest import FederatingHomeserverTestCase, HomeserverTestCase, skip_unless logger = logging.getLogger(__name__) @@ -894,12 +894,12 @@ class GetCurrentStateDeltaMembershipChangesForUserTestCase(HomeserverTestCase): ], ) - def test_membership_persisted_in_same_batch(self) -> None: + def test_different_user_membership_persisted_in_same_batch(self) -> None: """ - Test batch of membership events being processed at once. This will result in all - of the memberships being stored in the `current_state_delta_stream` table with - the same `stream_ordering` even though the individual events have different - `stream_ordering`s. + Test batch of membership events from different users being processed at once. + This will result in all of the memberships being stored in the + `current_state_delta_stream` table with the same `stream_ordering` even though + the individual events have different `stream_ordering`s. """ user1_id = self.register_user("user1", "pass") _user1_tok = self.login(user1_id, "pass") @@ -1012,6 +1012,115 @@ class GetCurrentStateDeltaMembershipChangesForUserTestCase(HomeserverTestCase): ], ) + @skip_unless( + False, + "persist code does not support multiple membership events for the same user in the same batch", + ) + def test_membership_persisted_in_same_batch(self) -> None: + """ + Test batch of membership events for the same user being processed at once. + + This *should* (doesn't happen currently) result in all of the memberships being + stored in the `current_state_delta_stream` table with the same `stream_ordering` + even though the individual events have different `stream_ordering`s. + + FIXME: Currently, only the `join_event` is recorded in the `current_state_delta_stream` + table. + """ + user1_id = self.register_user("user1", "pass") + _user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + + before_room1_token = self.event_sources.get_current_token() + + # User2 is just the designated person to create the room (we do this across the + # tests to be consistent) + room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok) + + # Persist a timeline event sandwiched between two membership events so they end + # up in the `current_state_delta_stream` table with the same `stream_id`. + join_event, join_event_context = self.get_success( + create_event( + self.hs, + sender=user1_id, + type=EventTypes.Member, + state_key=user1_id, + content={"membership": "join"}, + room_id=room_id1, + ) + ) + timeline_event, timeline_event_context = self.get_success( + create_event( + self.hs, + sender=user1_id, + type=EventTypes.Message, + state_key=user1_id, + content={"body": "foo bar", "msgtype": "m.text"}, + room_id=room_id1, + ) + ) + leave_event, leave_event_context = self.get_success( + create_event( + self.hs, + sender=user1_id, + type=EventTypes.Member, + state_key=user1_id, + content={"membership": "leave"}, + room_id=room_id1, + ) + ) + self.get_success( + self.persistence.persist_events( + [ + (join_event, join_event_context), + (timeline_event, timeline_event_context), + (leave_event, leave_event_context), + ] + ) + ) + + after_room1_token = self.event_sources.get_current_token() + + # Get the membership changes for the user. + # + # At this point, the `current_state_delta_stream` table should look like (notice + # those three memberships at the end with `stream_id=7` because we persisted + # them in the same batch): + # + # TODO: DB rows to better see what's going on. + membership_changes = self.get_success( + self.store.get_current_state_delta_membership_changes_for_user( + user1_id, + from_key=before_room1_token.room_key, + to_key=after_room1_token.room_key, + ) + ) + + join_pos = self.get_success( + self.store.get_position_for_event(join_event.event_id) + ) + + # Let the whole diff show on failure + self.maxDiff = None + self.assertEqual( + membership_changes, + [ + CurrentStateDeltaMembership( + event_id=leave_event.event_id, + # Ideally, this would be `leave_pos` (to match the `event_id`) but + # when events are persisted in a batch, they are all stored in the + # `current_state_delta_stream` table with the minimum + # `stream_ordering` from the batch. + event_pos=join_pos, # leave_pos, + prev_event_id=None, + room_id=room_id1, + membership="leave", + sender=user1_id, + ), + ], + ) + def test_state_reset(self) -> None: """ Test a state reset scenario where the user gets removed from the room (when From 1158058aa52e47d0463b44f115222e0e122e045e Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 27 Jun 2024 17:58:31 -0500 Subject: [PATCH 74/84] Opt for tackling more batch scenarios in future PRs --- tests/rest/client/test_sync.py | 2 +- tests/storage/test_stream.py | 111 +-------------------------------- 2 files changed, 2 insertions(+), 111 deletions(-) diff --git a/tests/rest/client/test_sync.py b/tests/rest/client/test_sync.py index 3f4f88c3d1..766c8850d0 100644 --- a/tests/rest/client/test_sync.py +++ b/tests/rest/client/test_sync.py @@ -1624,7 +1624,7 @@ class SlidingSyncTestCase(unittest.HomeserverTestCase): user1_id = self.register_user("user1", "pass") user1_tok = self.login(user1_id, "pass") - room_id1 = self.helper.create_room_as(user1_id, tok=user1_tok, is_public=True) + _room_id1 = self.helper.create_room_as(user1_id, tok=user1_tok, is_public=True) room_id2 = self.helper.create_room_as(user1_id, tok=user1_tok, is_public=True) room_id3 = self.helper.create_room_as(user1_id, tok=user1_tok, is_public=True) diff --git a/tests/storage/test_stream.py b/tests/storage/test_stream.py index 53a58bd82a..e420e680e2 100644 --- a/tests/storage/test_stream.py +++ b/tests/storage/test_stream.py @@ -46,7 +46,7 @@ from synapse.types import ( from synapse.util import Clock from tests.test_utils.event_injection import create_event -from tests.unittest import FederatingHomeserverTestCase, HomeserverTestCase, skip_unless +from tests.unittest import FederatingHomeserverTestCase, HomeserverTestCase logger = logging.getLogger(__name__) @@ -1012,115 +1012,6 @@ class GetCurrentStateDeltaMembershipChangesForUserTestCase(HomeserverTestCase): ], ) - @skip_unless( - False, - "persist code does not support multiple membership events for the same user in the same batch", - ) - def test_membership_persisted_in_same_batch(self) -> None: - """ - Test batch of membership events for the same user being processed at once. - - This *should* (doesn't happen currently) result in all of the memberships being - stored in the `current_state_delta_stream` table with the same `stream_ordering` - even though the individual events have different `stream_ordering`s. - - FIXME: Currently, only the `join_event` is recorded in the `current_state_delta_stream` - table. - """ - user1_id = self.register_user("user1", "pass") - _user1_tok = self.login(user1_id, "pass") - user2_id = self.register_user("user2", "pass") - user2_tok = self.login(user2_id, "pass") - - before_room1_token = self.event_sources.get_current_token() - - # User2 is just the designated person to create the room (we do this across the - # tests to be consistent) - room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok) - - # Persist a timeline event sandwiched between two membership events so they end - # up in the `current_state_delta_stream` table with the same `stream_id`. - join_event, join_event_context = self.get_success( - create_event( - self.hs, - sender=user1_id, - type=EventTypes.Member, - state_key=user1_id, - content={"membership": "join"}, - room_id=room_id1, - ) - ) - timeline_event, timeline_event_context = self.get_success( - create_event( - self.hs, - sender=user1_id, - type=EventTypes.Message, - state_key=user1_id, - content={"body": "foo bar", "msgtype": "m.text"}, - room_id=room_id1, - ) - ) - leave_event, leave_event_context = self.get_success( - create_event( - self.hs, - sender=user1_id, - type=EventTypes.Member, - state_key=user1_id, - content={"membership": "leave"}, - room_id=room_id1, - ) - ) - self.get_success( - self.persistence.persist_events( - [ - (join_event, join_event_context), - (timeline_event, timeline_event_context), - (leave_event, leave_event_context), - ] - ) - ) - - after_room1_token = self.event_sources.get_current_token() - - # Get the membership changes for the user. - # - # At this point, the `current_state_delta_stream` table should look like (notice - # those three memberships at the end with `stream_id=7` because we persisted - # them in the same batch): - # - # TODO: DB rows to better see what's going on. - membership_changes = self.get_success( - self.store.get_current_state_delta_membership_changes_for_user( - user1_id, - from_key=before_room1_token.room_key, - to_key=after_room1_token.room_key, - ) - ) - - join_pos = self.get_success( - self.store.get_position_for_event(join_event.event_id) - ) - - # Let the whole diff show on failure - self.maxDiff = None - self.assertEqual( - membership_changes, - [ - CurrentStateDeltaMembership( - event_id=leave_event.event_id, - # Ideally, this would be `leave_pos` (to match the `event_id`) but - # when events are persisted in a batch, they are all stored in the - # `current_state_delta_stream` table with the minimum - # `stream_ordering` from the batch. - event_pos=join_pos, # leave_pos, - prev_event_id=None, - room_id=room_id1, - membership="leave", - sender=user1_id, - ), - ], - ) - def test_state_reset(self) -> None: """ Test a state reset scenario where the user gets removed from the room (when From 32b8b68df67c6ef4a11921704c570236d2d08592 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 27 Jun 2024 18:13:34 -0500 Subject: [PATCH 75/84] Add TODO to handle state resets See https://github.com/element-hq/synapse/pull/17320#discussion_r1656548733 --- synapse/handlers/sliding_sync.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index db5dd75d04..5dc9867907 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -1002,6 +1002,12 @@ class SlidingSyncHandler: stripped_state.append(strip_event(invite_or_knock_event)) + # TODO: Handle state resets. For example, if we see + # `rooms_membership_for_user_at_to_token.membership = Membership.LEAVE` but + # `required_state` doesn't include it, we should indicate to the client that a + # state reset happened. Perhaps we should indicate this by setting `initial: + # True` and empty `required_state`. + return SlidingSyncResult.RoomResult( # TODO: Dummy value name=None, From b11f5c984b28b5ff6f5909db34a4e6b96d3e4880 Mon Sep 17 00:00:00 2001 From: davidegirardi <16451191+davidegirardi@users.noreply.github.com> Date: Fri, 28 Jun 2024 15:39:54 +0200 Subject: [PATCH 76/84] Clarify `url_preview_url_blacklist` is a usability feature (#17356) --- changelog.d/17356.doc | 1 + .../configuration/config_documentation.md | 21 +++++++++++-------- 2 files changed, 13 insertions(+), 9 deletions(-) create mode 100644 changelog.d/17356.doc diff --git a/changelog.d/17356.doc b/changelog.d/17356.doc new file mode 100644 index 0000000000..b393d8d147 --- /dev/null +++ b/changelog.d/17356.doc @@ -0,0 +1 @@ +Clarify `url_preview_url_blacklist` is a usability feature. diff --git a/docs/usage/configuration/config_documentation.md b/docs/usage/configuration/config_documentation.md index 80a7bf9d21..65b03ad0f8 100644 --- a/docs/usage/configuration/config_documentation.md +++ b/docs/usage/configuration/config_documentation.md @@ -1976,9 +1976,10 @@ This will not prevent the listed domains from accessing media themselves. It simply prevents users on this server from downloading media originating from the listed servers. -This will have no effect on media originating from the local server. -This only affects media downloaded from other Matrix servers, to -block domains from URL previews see [`url_preview_url_blacklist`](#url_preview_url_blacklist). +This will have no effect on media originating from the local server. This only +affects media downloaded from other Matrix servers, to control URL previews see +[`url_preview_ip_range_blacklist`](#url_preview_ip_range_blacklist) or +[`url_preview_url_blacklist`](#url_preview_url_blacklist). Defaults to an empty list (nothing blocked). @@ -2130,12 +2131,14 @@ url_preview_ip_range_whitelist: --- ### `url_preview_url_blacklist` -Optional list of URL matches that the URL preview spider is -denied from accessing. You should use `url_preview_ip_range_blacklist` -in preference to this, otherwise someone could define a public DNS -entry that points to a private IP address and circumvent the blacklist. -This is more useful if you know there is an entire shape of URL that -you know that will never want synapse to try to spider. +Optional list of URL matches that the URL preview spider is denied from +accessing. This is a usability feature, not a security one. You should use +`url_preview_ip_range_blacklist` in preference to this, otherwise someone could +define a public DNS entry that points to a private IP address and circumvent +the blacklist. Applications that perform redirects or serve different content +when detecting that Synapse is accessing them can also bypass the blacklist. +This is more useful if you know there is an entire shape of URL that you know +that you do not want Synapse to preview. Each list entry is a dictionary of url component attributes as returned by urlparse.urlsplit as applied to the absolute form of the URL. See From 2f5a77efae98cf5936d47473a31504f73d1d2315 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 1 Jul 2024 12:48:36 +0100 Subject: [PATCH 77/84] Limit size of presence EDUs (#17371) Otherwise they are unbounded. --------- Co-authored-by: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> --- changelog.d/17371.misc | 1 + .../sender/per_destination_queue.py | 31 +++-- tests/federation/test_federation_sender.py | 119 ++++++++++++++++++ 3 files changed, 140 insertions(+), 11 deletions(-) create mode 100644 changelog.d/17371.misc diff --git a/changelog.d/17371.misc b/changelog.d/17371.misc new file mode 100644 index 0000000000..0fbf19f4fb --- /dev/null +++ b/changelog.d/17371.misc @@ -0,0 +1 @@ +Limit size of presence EDUs to 50 entries. diff --git a/synapse/federation/sender/per_destination_queue.py b/synapse/federation/sender/per_destination_queue.py index d9f2f017ed..9f1c2fe22a 100644 --- a/synapse/federation/sender/per_destination_queue.py +++ b/synapse/federation/sender/per_destination_queue.py @@ -21,6 +21,7 @@ # import datetime import logging +from collections import OrderedDict from types import TracebackType from typing import TYPE_CHECKING, Dict, Hashable, Iterable, List, Optional, Tuple, Type @@ -68,6 +69,10 @@ sent_edus_by_type = Counter( # If the retry interval is larger than this then we enter "catchup" mode CATCHUP_RETRY_INTERVAL = 60 * 60 * 1000 +# Limit how many presence states we add to each presence EDU, to ensure that +# they are bounded in size. +MAX_PRESENCE_STATES_PER_EDU = 50 + class PerDestinationQueue: """ @@ -144,7 +149,7 @@ class PerDestinationQueue: # Map of user_id -> UserPresenceState of pending presence to be sent to this # destination - self._pending_presence: Dict[str, UserPresenceState] = {} + self._pending_presence: OrderedDict[str, UserPresenceState] = OrderedDict() # List of room_id -> receipt_type -> user_id -> receipt_dict, # @@ -399,7 +404,7 @@ class PerDestinationQueue: # through another mechanism, because this is all volatile! self._pending_edus = [] self._pending_edus_keyed = {} - self._pending_presence = {} + self._pending_presence.clear() self._pending_receipt_edus = [] self._start_catching_up() @@ -721,22 +726,26 @@ class _TransactionQueueManager: # Add presence EDU. if self.queue._pending_presence: + # Only send max 50 presence entries in the EDU, to bound the amount + # of data we're sending. + presence_to_add: List[JsonDict] = [] + while ( + self.queue._pending_presence + and len(presence_to_add) < MAX_PRESENCE_STATES_PER_EDU + ): + _, presence = self.queue._pending_presence.popitem(last=False) + presence_to_add.append( + format_user_presence_state(presence, self.queue._clock.time_msec()) + ) + pending_edus.append( Edu( origin=self.queue._server_name, destination=self.queue._destination, edu_type=EduTypes.PRESENCE, - content={ - "push": [ - format_user_presence_state( - presence, self.queue._clock.time_msec() - ) - for presence in self.queue._pending_presence.values() - ] - }, + content={"push": presence_to_add}, ) ) - self.queue._pending_presence = {} # Add read receipt EDUs. pending_edus.extend(self.queue._get_receipt_edus(force_flush=False, limit=5)) diff --git a/tests/federation/test_federation_sender.py b/tests/federation/test_federation_sender.py index 9073afc70e..6a8887fe74 100644 --- a/tests/federation/test_federation_sender.py +++ b/tests/federation/test_federation_sender.py @@ -27,6 +27,8 @@ from twisted.internet import defer from twisted.test.proto_helpers import MemoryReactor from synapse.api.constants import EduTypes, RoomEncryptionAlgorithms +from synapse.api.presence import UserPresenceState +from synapse.federation.sender.per_destination_queue import MAX_PRESENCE_STATES_PER_EDU from synapse.federation.units import Transaction from synapse.handlers.device import DeviceHandler from synapse.rest import admin @@ -266,6 +268,123 @@ class FederationSenderReceiptsTestCases(HomeserverTestCase): ) +class FederationSenderPresenceTestCases(HomeserverTestCase): + """ + Test federation sending for presence updates. + """ + + def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer: + self.federation_transport_client = Mock(spec=["send_transaction"]) + self.federation_transport_client.send_transaction = AsyncMock() + hs = self.setup_test_homeserver( + federation_transport_client=self.federation_transport_client, + ) + + return hs + + def default_config(self) -> JsonDict: + config = super().default_config() + config["federation_sender_instances"] = None + return config + + def test_presence_simple(self) -> None: + "Test that sending a single presence update works" + + mock_send_transaction: AsyncMock = ( + self.federation_transport_client.send_transaction + ) + mock_send_transaction.return_value = {} + + sender = self.hs.get_federation_sender() + self.get_success( + sender.send_presence_to_destinations( + [UserPresenceState.default("@user:test")], + ["server"], + ) + ) + + self.pump() + + # expect a call to send_transaction + mock_send_transaction.assert_awaited_once() + + json_cb = mock_send_transaction.call_args[0][1] + data = json_cb() + self.assertEqual( + data["edus"], + [ + { + "edu_type": EduTypes.PRESENCE, + "content": { + "push": [ + { + "presence": "offline", + "user_id": "@user:test", + } + ] + }, + } + ], + ) + + def test_presence_batched(self) -> None: + """Test that sending lots of presence updates to a destination are + batched, rather than having them all sent in one EDU.""" + + mock_send_transaction: AsyncMock = ( + self.federation_transport_client.send_transaction + ) + mock_send_transaction.return_value = {} + + sender = self.hs.get_federation_sender() + + # We now send lots of presence updates to force the federation sender to + # batch the mup. + number_presence_updates_to_send = MAX_PRESENCE_STATES_PER_EDU * 2 + self.get_success( + sender.send_presence_to_destinations( + [ + UserPresenceState.default(f"@user{i}:test") + for i in range(number_presence_updates_to_send) + ], + ["server"], + ) + ) + + self.pump() + + # We should have seen at least one transcation be sent by now. + mock_send_transaction.assert_called() + + # We don't want to specify exactly how the presence EDUs get sent out, + # could be one per transaction or multiple per transaction. We just want + # to assert that a) each presence EDU has bounded number of updates, and + # b) that all updates get sent out. + presence_edus = [] + for transaction_call in mock_send_transaction.call_args_list: + json_cb = transaction_call[0][1] + data = json_cb() + + for edu in data["edus"]: + self.assertEqual(edu.get("edu_type"), EduTypes.PRESENCE) + presence_edus.append(edu) + + # A set of all user presence we see, this should end up matching the + # number we sent out above. + seen_users: Set[str] = set() + + for edu in presence_edus: + presence_states = edu["content"]["push"] + + # This is where we actually check that the number of presence + # updates is bounded. + self.assertLessEqual(len(presence_states), MAX_PRESENCE_STATES_PER_EDU) + + seen_users.update(p["user_id"] for p in presence_states) + + self.assertEqual(len(seen_users), number_presence_updates_to_send) + + class FederationSenderDevicesTestCases(HomeserverTestCase): """ Test federation sending to update devices. From 1eccbfb82f8aab6b1de8d6f91346cad02d6bde3b Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 1 Jul 2024 13:41:48 +0100 Subject: [PATCH 78/84] Bump cryptography from 42.0.7 to 42.0.8 (#17382) --- poetry.lock | 66 ++++++++++++++++++++++++++--------------------------- 1 file changed, 33 insertions(+), 33 deletions(-) diff --git a/poetry.lock b/poetry.lock index 1bae0ea388..ca0757c8b8 100644 --- a/poetry.lock +++ b/poetry.lock @@ -403,43 +403,43 @@ files = [ [[package]] name = "cryptography" -version = "42.0.7" +version = "42.0.8" description = "cryptography is a package which provides cryptographic recipes and primitives to Python developers." optional = false python-versions = ">=3.7" files = [ - {file = "cryptography-42.0.7-cp37-abi3-macosx_10_12_universal2.whl", hash = "sha256:a987f840718078212fdf4504d0fd4c6effe34a7e4740378e59d47696e8dfb477"}, - {file = "cryptography-42.0.7-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:bd13b5e9b543532453de08bcdc3cc7cebec6f9883e886fd20a92f26940fd3e7a"}, - {file = "cryptography-42.0.7-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a79165431551042cc9d1d90e6145d5d0d3ab0f2d66326c201d9b0e7f5bf43604"}, - {file = "cryptography-42.0.7-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a47787a5e3649008a1102d3df55424e86606c9bae6fb77ac59afe06d234605f8"}, - {file = "cryptography-42.0.7-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:02c0eee2d7133bdbbc5e24441258d5d2244beb31da5ed19fbb80315f4bbbff55"}, - {file = "cryptography-42.0.7-cp37-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:5e44507bf8d14b36b8389b226665d597bc0f18ea035d75b4e53c7b1ea84583cc"}, - {file = "cryptography-42.0.7-cp37-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:7f8b25fa616d8b846aef64b15c606bb0828dbc35faf90566eb139aa9cff67af2"}, - {file = "cryptography-42.0.7-cp37-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:93a3209f6bb2b33e725ed08ee0991b92976dfdcf4e8b38646540674fc7508e13"}, - {file = "cryptography-42.0.7-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:e6b8f1881dac458c34778d0a424ae5769de30544fc678eac51c1c8bb2183e9da"}, - {file = "cryptography-42.0.7-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:3de9a45d3b2b7d8088c3fbf1ed4395dfeff79d07842217b38df14ef09ce1d8d7"}, - {file = "cryptography-42.0.7-cp37-abi3-win32.whl", hash = "sha256:789caea816c6704f63f6241a519bfa347f72fbd67ba28d04636b7c6b7da94b0b"}, - {file = "cryptography-42.0.7-cp37-abi3-win_amd64.whl", hash = "sha256:8cb8ce7c3347fcf9446f201dc30e2d5a3c898d009126010cbd1f443f28b52678"}, - {file = "cryptography-42.0.7-cp39-abi3-macosx_10_12_universal2.whl", hash = "sha256:a3a5ac8b56fe37f3125e5b72b61dcde43283e5370827f5233893d461b7360cd4"}, - {file = "cryptography-42.0.7-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:779245e13b9a6638df14641d029add5dc17edbef6ec915688f3acb9e720a5858"}, - {file = "cryptography-42.0.7-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0d563795db98b4cd57742a78a288cdbdc9daedac29f2239793071fe114f13785"}, - {file = "cryptography-42.0.7-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:31adb7d06fe4383226c3e963471f6837742889b3c4caa55aac20ad951bc8ffda"}, - {file = "cryptography-42.0.7-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:efd0bf5205240182e0f13bcaea41be4fdf5c22c5129fc7ced4a0282ac86998c9"}, - {file = "cryptography-42.0.7-cp39-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:a9bc127cdc4ecf87a5ea22a2556cab6c7eda2923f84e4f3cc588e8470ce4e42e"}, - {file = "cryptography-42.0.7-cp39-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:3577d029bc3f4827dd5bf8bf7710cac13527b470bbf1820a3f394adb38ed7d5f"}, - {file = "cryptography-42.0.7-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:2e47577f9b18723fa294b0ea9a17d5e53a227867a0a4904a1a076d1646d45ca1"}, - {file = "cryptography-42.0.7-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:1a58839984d9cb34c855197043eaae2c187d930ca6d644612843b4fe8513c886"}, - {file = "cryptography-42.0.7-cp39-abi3-win32.whl", hash = "sha256:e6b79d0adb01aae87e8a44c2b64bc3f3fe59515280e00fb6d57a7267a2583cda"}, - {file = "cryptography-42.0.7-cp39-abi3-win_amd64.whl", hash = "sha256:16268d46086bb8ad5bf0a2b5544d8a9ed87a0e33f5e77dd3c3301e63d941a83b"}, - {file = "cryptography-42.0.7-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:2954fccea107026512b15afb4aa664a5640cd0af630e2ee3962f2602693f0c82"}, - {file = "cryptography-42.0.7-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:362e7197754c231797ec45ee081f3088a27a47c6c01eff2ac83f60f85a50fe60"}, - {file = "cryptography-42.0.7-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:4f698edacf9c9e0371112792558d2f705b5645076cc0aaae02f816a0171770fd"}, - {file = "cryptography-42.0.7-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:5482e789294854c28237bba77c4c83be698be740e31a3ae5e879ee5444166582"}, - {file = "cryptography-42.0.7-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:e9b2a6309f14c0497f348d08a065d52f3020656f675819fc405fb63bbcd26562"}, - {file = "cryptography-42.0.7-pp39-pypy39_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:d8e3098721b84392ee45af2dd554c947c32cc52f862b6a3ae982dbb90f577f14"}, - {file = "cryptography-42.0.7-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:c65f96dad14f8528a447414125e1fc8feb2ad5a272b8f68477abbcc1ea7d94b9"}, - {file = "cryptography-42.0.7-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:36017400817987670037fbb0324d71489b6ead6231c9604f8fc1f7d008087c68"}, - {file = "cryptography-42.0.7.tar.gz", hash = "sha256:ecbfbc00bf55888edda9868a4cf927205de8499e7fabe6c050322298382953f2"}, + {file = "cryptography-42.0.8-cp37-abi3-macosx_10_12_universal2.whl", hash = "sha256:81d8a521705787afe7a18d5bfb47ea9d9cc068206270aad0b96a725022e18d2e"}, + {file = "cryptography-42.0.8-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:961e61cefdcb06e0c6d7e3a1b22ebe8b996eb2bf50614e89384be54c48c6b63d"}, + {file = "cryptography-42.0.8-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e3ec3672626e1b9e55afd0df6d774ff0e953452886e06e0f1eb7eb0c832e8902"}, + {file = "cryptography-42.0.8-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e599b53fd95357d92304510fb7bda8523ed1f79ca98dce2f43c115950aa78801"}, + {file = "cryptography-42.0.8-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:5226d5d21ab681f432a9c1cf8b658c0cb02533eece706b155e5fbd8a0cdd3949"}, + {file = "cryptography-42.0.8-cp37-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:6b7c4f03ce01afd3b76cf69a5455caa9cfa3de8c8f493e0d3ab7d20611c8dae9"}, + {file = "cryptography-42.0.8-cp37-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:2346b911eb349ab547076f47f2e035fc8ff2c02380a7cbbf8d87114fa0f1c583"}, + {file = "cryptography-42.0.8-cp37-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:ad803773e9df0b92e0a817d22fd8a3675493f690b96130a5e24f1b8fabbea9c7"}, + {file = "cryptography-42.0.8-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:2f66d9cd9147ee495a8374a45ca445819f8929a3efcd2e3df6428e46c3cbb10b"}, + {file = "cryptography-42.0.8-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:d45b940883a03e19e944456a558b67a41160e367a719833c53de6911cabba2b7"}, + {file = "cryptography-42.0.8-cp37-abi3-win32.whl", hash = "sha256:a0c5b2b0585b6af82d7e385f55a8bc568abff8923af147ee3c07bd8b42cda8b2"}, + {file = "cryptography-42.0.8-cp37-abi3-win_amd64.whl", hash = "sha256:57080dee41209e556a9a4ce60d229244f7a66ef52750f813bfbe18959770cfba"}, + {file = "cryptography-42.0.8-cp39-abi3-macosx_10_12_universal2.whl", hash = "sha256:dea567d1b0e8bc5764b9443858b673b734100c2871dc93163f58c46a97a83d28"}, + {file = "cryptography-42.0.8-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c4783183f7cb757b73b2ae9aed6599b96338eb957233c58ca8f49a49cc32fd5e"}, + {file = "cryptography-42.0.8-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a0608251135d0e03111152e41f0cc2392d1e74e35703960d4190b2e0f4ca9c70"}, + {file = "cryptography-42.0.8-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:dc0fdf6787f37b1c6b08e6dfc892d9d068b5bdb671198c72072828b80bd5fe4c"}, + {file = "cryptography-42.0.8-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:9c0c1716c8447ee7dbf08d6db2e5c41c688544c61074b54fc4564196f55c25a7"}, + {file = "cryptography-42.0.8-cp39-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:fff12c88a672ab9c9c1cf7b0c80e3ad9e2ebd9d828d955c126be4fd3e5578c9e"}, + {file = "cryptography-42.0.8-cp39-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:cafb92b2bc622cd1aa6a1dce4b93307792633f4c5fe1f46c6b97cf67073ec961"}, + {file = "cryptography-42.0.8-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:31f721658a29331f895a5a54e7e82075554ccfb8b163a18719d342f5ffe5ecb1"}, + {file = "cryptography-42.0.8-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:b297f90c5723d04bcc8265fc2a0f86d4ea2e0f7ab4b6994459548d3a6b992a14"}, + {file = "cryptography-42.0.8-cp39-abi3-win32.whl", hash = "sha256:2f88d197e66c65be5e42cd72e5c18afbfae3f741742070e3019ac8f4ac57262c"}, + {file = "cryptography-42.0.8-cp39-abi3-win_amd64.whl", hash = "sha256:fa76fbb7596cc5839320000cdd5d0955313696d9511debab7ee7278fc8b5c84a"}, + {file = "cryptography-42.0.8-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:ba4f0a211697362e89ad822e667d8d340b4d8d55fae72cdd619389fb5912eefe"}, + {file = "cryptography-42.0.8-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:81884c4d096c272f00aeb1f11cf62ccd39763581645b0812e99a91505fa48e0c"}, + {file = "cryptography-42.0.8-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:c9bb2ae11bfbab395bdd072985abde58ea9860ed84e59dbc0463a5d0159f5b71"}, + {file = "cryptography-42.0.8-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:7016f837e15b0a1c119d27ecd89b3515f01f90a8615ed5e9427e30d9cdbfed3d"}, + {file = "cryptography-42.0.8-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:5a94eccb2a81a309806027e1670a358b99b8fe8bfe9f8d329f27d72c094dde8c"}, + {file = "cryptography-42.0.8-pp39-pypy39_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:dec9b018df185f08483f294cae6ccac29e7a6e0678996587363dc352dc65c842"}, + {file = "cryptography-42.0.8-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:343728aac38decfdeecf55ecab3264b015be68fc2816ca800db649607aeee648"}, + {file = "cryptography-42.0.8-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:013629ae70b40af70c9a7a5db40abe5d9054e6f4380e50ce769947b73bf3caad"}, + {file = "cryptography-42.0.8.tar.gz", hash = "sha256:8d09d05439ce7baa8e9e95b07ec5b6c886f548deb7e0f69ef25f64b3bce842f2"}, ] [package.dependencies] From 76b9f14c0accc9ae5f671816480c87d11e7f9f8a Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 1 Jul 2024 13:42:00 +0100 Subject: [PATCH 79/84] Bump log from 0.4.21 to 0.4.22 (#17384) --- Cargo.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 1955c1a4e7..545a7a0a1c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -234,9 +234,9 @@ dependencies = [ [[package]] name = "log" -version = "0.4.21" +version = "0.4.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c" +checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24" [[package]] name = "memchr" From a1b88976680b5a462db94deeebcc8110524a8e76 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 1 Jul 2024 13:42:37 +0100 Subject: [PATCH 80/84] Bump serde_json from 1.0.117 to 1.0.119 (#17385) --- Cargo.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 545a7a0a1c..4353e55977 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -505,9 +505,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.117" +version = "1.0.119" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "455182ea6142b14f93f4bc5320a2b31c1f266b66a4a5c858b013302a5d8cbfc3" +checksum = "e8eddb61f0697cc3989c5d64b452f5488e2b8a60fd7d5076a3045076ffef8cb0" dependencies = [ "itoa", "ryu", From 635e3927d28a471b6c7a03d5c218868378eee6f8 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 1 Jul 2024 17:46:58 +0100 Subject: [PATCH 81/84] Bump types-setuptools from 69.5.0.20240423 to 70.1.0.20240627 (#17380) --- poetry.lock | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/poetry.lock b/poetry.lock index ca0757c8b8..99c3b62c7d 100644 --- a/poetry.lock +++ b/poetry.lock @@ -2906,13 +2906,13 @@ urllib3 = ">=2" [[package]] name = "types-setuptools" -version = "69.5.0.20240423" +version = "70.1.0.20240627" description = "Typing stubs for setuptools" optional = false python-versions = ">=3.8" files = [ - {file = "types-setuptools-69.5.0.20240423.tar.gz", hash = "sha256:a7ba908f1746c4337d13f027fa0f4a5bcad6d1d92048219ba792b3295c58586d"}, - {file = "types_setuptools-69.5.0.20240423-py3-none-any.whl", hash = "sha256:a4381e041510755a6c9210e26ad55b1629bc10237aeb9cb8b6bd24996b73db48"}, + {file = "types-setuptools-70.1.0.20240627.tar.gz", hash = "sha256:385907a47b5cf302b928ce07953cd91147d5de6f3da604c31905fdf0ec309e83"}, + {file = "types_setuptools-70.1.0.20240627-py3-none-any.whl", hash = "sha256:c7bdf05cd0a8b66868b4774c7b3c079d01ae025d8c9562bfc8bf2ff44d263c9c"}, ] [[package]] From 9e53336a71f3567c451456d778e0606cef19cde1 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 1 Jul 2024 13:44:00 -0500 Subject: [PATCH 82/84] Avoid fetching full events for `prev_event_ids` See https://github.com/element-hq/synapse/pull/17320#discussion_r1658832755 --- synapse/handlers/sliding_sync.py | 72 ++++++------------------ synapse/storage/databases/main/stream.py | 51 +++++++++++++---- 2 files changed, 59 insertions(+), 64 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index 5dc9867907..c9285d23c0 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -464,7 +464,6 @@ class SlidingSyncHandler: # Since we fetched a snapshot of the users room list at some point in time after # the from/to tokens, we need to revert/rewind some membership changes to match # the point in time of the `to_token`. - prev_event_ids_in_from_to_range: List[str] = [] for ( room_id, first_membership_change_after_to_token, @@ -475,40 +474,13 @@ class SlidingSyncHandler: # 1b) 1c) From the first membership event after the `to_token`, step backward to the # previous membership that would apply to the from/to range. else: - prev_event_ids_in_from_to_range.append( - first_membership_change_after_to_token.prev_event_id - ) - - # 1) Fixup (more) - # - # 1b) 1c) Fetch the previous membership events that apply to the from/to range - # and fixup our working list. - prev_events_in_from_to_range = await self.store.get_events( - prev_event_ids_in_from_to_range - ) - for prev_event_in_from_to_range in prev_events_in_from_to_range.values(): - # These fields should be present for all persisted events - assert ( - prev_event_in_from_to_range.internal_metadata.instance_name is not None - ) - assert ( - prev_event_in_from_to_range.internal_metadata.stream_ordering - is not None - ) - - # 1b) 1c) Update the membership with what we found - sync_room_id_set[prev_event_in_from_to_range.room_id] = ( - _RoomMembershipForUser( - event_id=prev_event_in_from_to_range.event_id, - event_pos=PersistedEventPosition( - instance_name=prev_event_in_from_to_range.internal_metadata.instance_name, - stream=prev_event_in_from_to_range.internal_metadata.stream_ordering, - ), - membership=prev_event_in_from_to_range.membership, - sender=prev_event_in_from_to_range.sender, + sync_room_id_set[room_id] = _RoomMembershipForUser( + event_id=first_membership_change_after_to_token.prev_event_id, + event_pos=first_membership_change_after_to_token.prev_event_pos, + membership=first_membership_change_after_to_token.prev_membership, + sender=first_membership_change_after_to_token.prev_sender, newly_joined=False, ) - ) # Filter the rooms that that we have updated room membership events to the point # in time of the `to_token` (from the "1)" fixups) @@ -600,12 +572,9 @@ class SlidingSyncHandler: ) # 3) Figure out `newly_joined` - prev_event_ids_before_token_range: List[str] = [] - for possibly_newly_joined_room_id in possibly_newly_joined_room_ids: + for room_id in possibly_newly_joined_room_ids: has_non_join_in_from_to_range = ( - has_non_join_event_by_room_id_in_from_to_range.get( - possibly_newly_joined_room_id, False - ) + has_non_join_event_by_room_id_in_from_to_range.get(room_id, False) ) # If the last membership event in the token range is a join and there is # also some non-join in the range, we know they `newly_joined`. @@ -618,6 +587,9 @@ class SlidingSyncHandler: prev_event_id = first_membership_change_by_room_id_in_from_to_range[ room_id ].prev_event_id + prev_membership = first_membership_change_by_room_id_in_from_to_range[ + room_id + ].prev_membership if prev_event_id is None: # We found a `newly_joined` room (we are joining the room for the @@ -625,22 +597,14 @@ class SlidingSyncHandler: filtered_sync_room_id_set[room_id] = filtered_sync_room_id_set[ room_id ].copy_and_replace(newly_joined=True) - else: - # Last resort, we need to step back to the previous membership event - # just before the token range to see if we're joined then or not. - prev_event_ids_before_token_range.append(prev_event_id) - - # 3) more - prev_events_before_token_range = await self.store.get_events( - prev_event_ids_before_token_range - ) - for prev_event_before_token_range in prev_events_before_token_range.values(): - if prev_event_before_token_range.membership != Membership.JOIN: - # We found a `newly_joined` room (we left before the token range - # and joined within the token range) - filtered_sync_room_id_set[room_id] = filtered_sync_room_id_set[ - room_id - ].copy_and_replace(newly_joined=True) + # Last resort, we need to step back to the previous membership event + # just before the token range to see if we're joined then or not. + elif prev_membership != Membership.JOIN: + # We found a `newly_joined` room (we left before the token range + # and joined within the token range) + filtered_sync_room_id_set[room_id] = filtered_sync_room_id_set[ + room_id + ].copy_and_replace(newly_joined=True) return filtered_sync_room_id_set diff --git a/synapse/storage/databases/main/stream.py b/synapse/storage/databases/main/stream.py index c128eb5d5b..29f675ae44 100644 --- a/synapse/storage/databases/main/stream.py +++ b/synapse/storage/databases/main/stream.py @@ -125,12 +125,17 @@ class CurrentStateDeltaMembership: sender: The person who sent the membership event """ + room_id: str + # Event event_id: Optional[str] event_pos: PersistedEventPosition - prev_event_id: Optional[str] - room_id: str membership: str sender: Optional[str] + # Prev event + prev_event_id: Optional[str] + prev_event_pos: Optional[PersistedEventPosition] + prev_membership: Optional[str] + prev_sender: Optional[str] def generate_pagination_where_clause( @@ -865,18 +870,22 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): # was added) and uses the *minimum* stream position for batches of events. sql = """ SELECT - e.event_id, - s.prev_event_id, s.room_id, + e.event_id, s.instance_name, s.stream_id, m.membership, e.sender, - m_prev.membership AS prev_membership + s.prev_event_id, + e_prev.instance_name AS prev_instance_name, + e_prev.stream_ordering AS prev_stream_ordering, + m_prev.membership AS prev_membership, + e_prev.sender AS prev_sender FROM current_state_delta_stream AS s LEFT JOIN events AS e ON e.event_id = s.event_id LEFT JOIN room_memberships AS m ON m.event_id = s.event_id - LEFT JOIN room_memberships AS m_prev ON s.prev_event_id = m_prev.event_id + LEFT JOIN events AS e_prev ON e_prev.event_id = s.prev_event_id + LEFT JOIN room_memberships AS m_prev ON m_prev.event_id = s.prev_event_id WHERE s.stream_id > ? AND s.stream_id <= ? AND s.type = ? AND s.state_key = ? @@ -887,14 +896,17 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): membership_changes: List[CurrentStateDeltaMembership] = [] for ( - event_id, - prev_event_id, room_id, + event_id, instance_name, stream_ordering, membership, sender, + prev_event_id, + prev_instance_name, + prev_stream_ordering, prev_membership, + prev_sender, ) in txn: assert room_id is not None assert instance_name is not None @@ -916,17 +928,36 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): continue membership_change = CurrentStateDeltaMembership( + room_id=room_id, + # Event event_id=event_id, event_pos=PersistedEventPosition( instance_name=instance_name, stream=stream_ordering, ), - prev_event_id=prev_event_id, - room_id=room_id, membership=( membership if membership is not None else Membership.LEAVE ), sender=sender, + # Prev event + prev_event_id=prev_event_id, + prev_event_pos=( + PersistedEventPosition( + instance_name=prev_instance_name, + stream=prev_stream_ordering, + ) + if ( + prev_instance_name is not None + and prev_stream_ordering is not None + ) + else None + ), + prev_membership=( + prev_membership + if prev_membership is not None + else Membership.LEAVE + ), + prev_sender=prev_sender, ) membership_changes.append(membership_change) From a4263bf92513ecb395fc646dd783badecd2b0c3a Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 1 Jul 2024 18:56:45 -0500 Subject: [PATCH 83/84] Update stream tests with prev event info --- synapse/storage/databases/main/stream.py | 24 +++---- tests/storage/test_stream.py | 80 +++++++++++++++++------- 2 files changed, 71 insertions(+), 33 deletions(-) diff --git a/synapse/storage/databases/main/stream.py b/synapse/storage/databases/main/stream.py index 29f675ae44..7e6beb5239 100644 --- a/synapse/storage/databases/main/stream.py +++ b/synapse/storage/databases/main/stream.py @@ -918,12 +918,13 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): instance_name, stream_ordering, ): - # When the server leaves a room, it will insert new rows with - # `event_id = null` for all current state. This means we might - # already have a row for the leave event and then another for the - # same leave where the `event_id=null` but the `prev_event_id` is - # pointing back at the earlier leave event. We don't want to report - # the leave, if we already have a leave event. + # When the server leaves a room, it will insert new rows into the + # `current_state_delta_stream` table with `event_id = null` for all + # current state. This means we might already have a row for the + # leave event and then another for the same leave where the + # `event_id=null` but the `prev_event_id` is pointing back at the + # earlier leave event. We don't want to report the leave, if we + # already have a leave event. if event_id is None and prev_membership == Membership.LEAVE: continue @@ -935,6 +936,11 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): instance_name=instance_name, stream=stream_ordering, ), + # When `s.event_id = null`, we won't be able to get respective + # `room_membership` but can assume the user has left the room + # because this only happens when the server leaves a room + # (meaning everyone locally left) or a state reset which removed + # the person from the room. membership=( membership if membership is not None else Membership.LEAVE ), @@ -952,11 +958,7 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): ) else None ), - prev_membership=( - prev_membership - if prev_membership is not None - else Membership.LEAVE - ), + prev_membership=prev_membership, prev_sender=prev_sender, ) diff --git a/tests/storage/test_stream.py b/tests/storage/test_stream.py index e420e680e2..aad46b1b44 100644 --- a/tests/storage/test_stream.py +++ b/tests/storage/test_stream.py @@ -609,12 +609,15 @@ class GetCurrentStateDeltaMembershipChangesForUserTestCase(HomeserverTestCase): membership_changes, [ CurrentStateDeltaMembership( + room_id=room_id1, event_id=join_response["event_id"], event_pos=join_pos, - prev_event_id=None, - room_id=room_id1, membership="join", sender=user1_id, + prev_event_id=None, + prev_event_pos=None, + prev_membership=None, + prev_sender=None, ) ], ) @@ -710,20 +713,26 @@ class GetCurrentStateDeltaMembershipChangesForUserTestCase(HomeserverTestCase): membership_changes, [ CurrentStateDeltaMembership( + room_id=room_id1, event_id=join_response1["event_id"], event_pos=join_pos1, - prev_event_id=None, - room_id=room_id1, membership="join", sender=user1_id, + prev_event_id=None, + prev_event_pos=None, + prev_membership=None, + prev_sender=None, ), CurrentStateDeltaMembership( + room_id=room_id1, event_id=leave_response1["event_id"], event_pos=leave_pos1, - prev_event_id=join_response1["event_id"], - room_id=room_id1, membership="leave", sender=user1_id, + prev_event_id=join_response1["event_id"], + prev_event_pos=join_pos1, + prev_membership="join", + prev_sender=user1_id, ), ], ) @@ -876,20 +885,26 @@ class GetCurrentStateDeltaMembershipChangesForUserTestCase(HomeserverTestCase): membership_changes, [ CurrentStateDeltaMembership( + room_id=room_id1, event_id=join_response1["event_id"], event_pos=join_pos1, - prev_event_id=None, - room_id=room_id1, membership="join", sender=user1_id, + prev_event_id=None, + prev_event_pos=None, + prev_membership=None, + prev_sender=None, ), CurrentStateDeltaMembership( + room_id=room_id1, event_id=None, # leave_response1["event_id"], event_pos=leave_pos1, - prev_event_id=join_response1["event_id"], - room_id=room_id1, membership="leave", sender=None, # user1_id, + prev_event_id=join_response1["event_id"], + prev_event_pos=join_pos1, + prev_membership="join", + prev_sender=user1_id, ), ], ) @@ -998,16 +1013,19 @@ class GetCurrentStateDeltaMembershipChangesForUserTestCase(HomeserverTestCase): membership_changes, [ CurrentStateDeltaMembership( + room_id=room_id1, event_id=join_event1.event_id, # Ideally, this would be `join_pos1` (to match the `event_id`) but # when events are persisted in a batch, they are all stored in the # `current_state_delta_stream` table with the minimum # `stream_ordering` from the batch. event_pos=join_pos3, - prev_event_id=None, - room_id=room_id1, membership="join", sender=user1_id, + prev_event_id=None, + prev_event_pos=None, + prev_membership=None, + prev_sender=None, ), ], ) @@ -1024,6 +1042,9 @@ class GetCurrentStateDeltaMembershipChangesForUserTestCase(HomeserverTestCase): room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok) join_response1 = self.helper.join(room_id1, user1_id, tok=user1_tok) + join_pos1 = self.get_success( + self.store.get_position_for_event(join_response1["event_id"]) + ) before_reset_token = self.event_sources.get_current_token() @@ -1089,12 +1110,15 @@ class GetCurrentStateDeltaMembershipChangesForUserTestCase(HomeserverTestCase): membership_changes, [ CurrentStateDeltaMembership( + room_id=room_id1, event_id=None, event_pos=dummy_state_pos, - prev_event_id=join_response1["event_id"], - room_id=room_id1, membership="leave", sender=None, # user1_id, + prev_event_id=join_response1["event_id"], + prev_event_pos=join_pos1, + prev_membership="join", + prev_sender=user1_id, ), ], ) @@ -1139,20 +1163,26 @@ class GetCurrentStateDeltaMembershipChangesForUserTestCase(HomeserverTestCase): membership_changes, [ CurrentStateDeltaMembership( + room_id=room_id1, event_id=join_response1["event_id"], event_pos=join_pos1, - prev_event_id=None, - room_id=room_id1, membership="join", sender=user1_id, + prev_event_id=None, + prev_event_pos=None, + prev_membership=None, + prev_sender=None, ), CurrentStateDeltaMembership( + room_id=room_id2, event_id=join_response2["event_id"], event_pos=join_pos2, - prev_event_id=None, - room_id=room_id2, membership="join", sender=user1_id, + prev_event_id=None, + prev_event_pos=None, + prev_membership=None, + prev_sender=None, ), ], ) @@ -1173,12 +1203,15 @@ class GetCurrentStateDeltaMembershipChangesForUserTestCase(HomeserverTestCase): membership_changes, [ CurrentStateDeltaMembership( + room_id=room_id1, event_id=join_response1["event_id"], event_pos=join_pos1, - prev_event_id=None, - room_id=room_id1, membership="join", sender=user1_id, + prev_event_id=None, + prev_event_pos=None, + prev_membership=None, + prev_sender=None, ) ], ) @@ -1366,12 +1399,15 @@ class GetCurrentStateDeltaMembershipChangesForUserFederationTestCase( membership_changes, [ CurrentStateDeltaMembership( + room_id=intially_unjoined_room_id, event_id=join_event.event_id, event_pos=join_pos, - prev_event_id=None, - room_id=intially_unjoined_room_id, membership="join", sender=user1_id, + prev_event_id=None, + prev_event_pos=None, + prev_membership=None, + prev_sender=None, ), ], ) From 10d78d66b7f2e28c8391da7fc479b329eeddf3cd Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 1 Jul 2024 19:04:46 -0500 Subject: [PATCH 84/84] Protect for non-existent prev events --- synapse/handlers/sliding_sync.py | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index c9285d23c0..8622ef8472 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -474,13 +474,26 @@ class SlidingSyncHandler: # 1b) 1c) From the first membership event after the `to_token`, step backward to the # previous membership that would apply to the from/to range. else: - sync_room_id_set[room_id] = _RoomMembershipForUser( - event_id=first_membership_change_after_to_token.prev_event_id, - event_pos=first_membership_change_after_to_token.prev_event_pos, - membership=first_membership_change_after_to_token.prev_membership, - sender=first_membership_change_after_to_token.prev_sender, - newly_joined=False, - ) + # We don't expect these fields to be `None` if we have a `prev_event_id` + # but we're being defensive since it's possible that the prev event was + # culled from the database. + if ( + first_membership_change_after_to_token.prev_event_pos is not None + and first_membership_change_after_to_token.prev_membership + is not None + ): + sync_room_id_set[room_id] = _RoomMembershipForUser( + event_id=first_membership_change_after_to_token.prev_event_id, + event_pos=first_membership_change_after_to_token.prev_event_pos, + membership=first_membership_change_after_to_token.prev_membership, + sender=first_membership_change_after_to_token.prev_sender, + newly_joined=False, + ) + else: + # If we can't find the previous membership event, we shouldn't + # include the room in the sync response since we can't determine the + # exact membership state and shouldn't rely on the current snapshot. + sync_room_id_set.pop(room_id, None) # Filter the rooms that that we have updated room membership events to the point # in time of the `to_token` (from the "1)" fixups)