From 079194c54740e5046bb988a1b6d602bdd21044ec Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 17 Jun 2024 18:03:02 -0500 Subject: [PATCH 01/62] Return some room timeline data in Sliding Sync --- synapse/handlers/sliding_sync.py | 202 ++++++++++++++++++++++++-- synapse/rest/client/sync.py | 89 ++++++++++-- synapse/types/handlers/__init__.py | 7 +- synapse/types/rest/client/__init__.py | 7 - 4 files changed, 275 insertions(+), 30 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index 16d94925f5..cf448fa3cd 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -18,22 +18,25 @@ # # import logging -from typing import TYPE_CHECKING, Dict, List, Optional, Tuple +from typing import TYPE_CHECKING, Dict, List, Optional, Set, Tuple +import attr from immutabledict import immutabledict -from synapse.api.constants import AccountDataTypes, EventTypes, Membership +from synapse.api.constants import AccountDataTypes, Direction, EventTypes, Membership from synapse.events import EventBase from synapse.storage.roommember import RoomsForUser from synapse.types import ( PersistedEventPosition, Requester, RoomStreamToken, + StreamKeyType, StreamToken, UserID, ) from synapse.types.handlers import OperationType, SlidingSyncConfig, SlidingSyncResult -from synapse.types.state import StateFilter +from synapse.types.state import StateFilter, StateKey +from synapse.visibility import filter_events_for_client if TYPE_CHECKING: from synapse.server import HomeServer @@ -82,6 +85,18 @@ def filter_membership_for_sync(*, membership: str, user_id: str, sender: str) -> return membership != Membership.LEAVE or sender != user_id +# We can't freeze this class because we want to update it in place with the +# de-duplicated data. +@attr.s(slots=True, auto_attribs=True) +class RoomSyncConfig: + """ + Holds the config for what data we should fetch for a room in the sync response. + """ + + timeline_limit: int + required_state: Set[StateKey] + + class SlidingSyncHandler: def __init__(self, hs: "HomeServer"): self.clock = hs.get_clock() @@ -201,6 +216,7 @@ class SlidingSyncHandler: # Assemble sliding window lists lists: Dict[str, SlidingSyncResult.SlidingWindowList] = {} + relevant_room_map: Dict[str, RoomSyncConfig] = {} if sync_config.lists: # Get all of the room IDs that the user should be able to see in the sync # response @@ -225,29 +241,66 @@ class SlidingSyncHandler: ops: List[SlidingSyncResult.SlidingWindowList.Operation] = [] if list_config.ranges: for range in list_config.ranges: + room_id_set = { + room_id + for room_id, _ in sorted_room_info[range[0] : range[1]] + } + ops.append( SlidingSyncResult.SlidingWindowList.Operation( op=OperationType.SYNC, range=range, - room_ids=[ - room_id - for room_id, _ in sorted_room_info[ - range[0] : range[1] - ] - ], + room_ids=list(room_id_set), ) ) + # Update the relevant room map + for room_id in room_id_set: + if relevant_room_map.get(room_id) is not None: + # Take the highest timeline limit + if ( + relevant_room_map[room_id].timeline_limit + < list_config.timeline_limit + ): + relevant_room_map[room_id].timeline_limit = ( + list_config.timeline_limit + ) + + # Union the required state + relevant_room_map[room_id].required_state.update( + list_config.required_state + ) + else: + relevant_room_map[room_id] = RoomSyncConfig( + timeline_limit=list_config.timeline_limit, + required_state=set(list_config.required_state), + ) + lists[list_key] = SlidingSyncResult.SlidingWindowList( count=len(sorted_room_info), ops=ops, ) + # TODO: if (sync_config.room_subscriptions): + + # Fetch room data + rooms: Dict[str, SlidingSyncResult.RoomResult] = {} + for room_id, room_sync_config in relevant_room_map.items(): + room_sync_result = await self.get_room_sync_data( + user=sync_config.user, + room_id=room_id, + room_sync_config=room_sync_config, + rooms_for_user_membership_at_to_token=sync_room_map[room_id], + from_token=from_token, + to_token=to_token, + ) + + rooms[room_id] = room_sync_result + return SlidingSyncResult( next_pos=to_token, lists=lists, - # TODO: Gather room data for rooms in lists and `sync_config.room_subscriptions` - rooms={}, + rooms=rooms, extensions={}, ) @@ -665,3 +718,130 @@ class SlidingSyncHandler: # We want descending order reverse=True, ) + + async def get_room_sync_data( + self, + user: UserID, + room_id: str, + room_sync_config: RoomSyncConfig, + rooms_for_user_membership_at_to_token: RoomsForUser, + from_token: Optional[StreamToken], + to_token: StreamToken, + ) -> SlidingSyncResult.RoomResult: + """ + Fetch room data for a room. + + We fetch data according to the token range (> `from_token` and <= `to_token`). + + Args: + user: User to fetch data for + room_id: The room ID to fetch data for + room_sync_config: Config for what data we should fetch for a room in the + sync response. + rooms_for_user_membership_at_to_token: Membership information for the user + in the room at the time of `to_token`. + from_token: The point in the stream to sync from. + to_token: The point in the stream to sync up to. + """ + + timeline_events: List[EventBase] = [] + limited = False + # We want to use `to_token` (vs `from_token`) because we look backwards from the + # `to_token` up to the `timeline_limit` and we might not reach `from_token` + # before we hit the limit. We will update the room stream position once we've + # fetched the events. + prev_batch_token = to_token + if room_sync_config.timeline_limit > 0: + timeline_events, new_room_key = await self.store.paginate_room_events( + room_id=room_id, + # We're going to paginate backwards from the `to_token` + from_key=to_token.room_key, + to_key=from_token.room_key if from_token is not None else None, + direction=Direction.BACKWARDS, + # We add one so we can determine if there are enough events to saturate + # the limit or not (see `limited`) + limit=room_sync_config.timeline_limit + 1, + event_filter=None, + ) + + # We want to return the events in ascending order (the last event is the + # most recent). + timeline_events.reverse() + + timeline_events = await filter_events_for_client( + self.storage_controllers, + user.to_string(), + timeline_events, + is_peeking=rooms_for_user_membership_at_to_token.membership + != Membership.JOIN, + filter_send_to_client=True, + ) + + # Determine our `limited` status + if len(timeline_events) > room_sync_config.timeline_limit: + limited = True + # Get rid of that extra "+ 1" event because we only used it to determine + # if we hit the limit or not + timeline_events = timeline_events[-room_sync_config.timeline_limit :] + assert timeline_events[0].internal_metadata.stream_ordering + new_room_key = RoomStreamToken( + stream=timeline_events[0].internal_metadata.stream_ordering - 1 + ) + + prev_batch_token = prev_batch_token.copy_and_replace( + StreamKeyType.ROOM, new_room_key + ) + + # Figure out any stripped state events for invite/knocks + stripped_state: List[EventBase] = [] + if rooms_for_user_membership_at_to_token.membership in { + Membership.INVITE, + Membership.KNOCK, + }: + invite_or_knock_event = await self.store.get_event( + rooms_for_user_membership_at_to_token.event_id + ) + + stripped_state = [] + if invite_or_knock_event.membership == Membership.INVITE: + stripped_state = invite_or_knock_event.unsigned.get( + "invite_room_state", [] + ) + elif invite_or_knock_event.membership == Membership.KNOCK: + stripped_state = invite_or_knock_event.unsigned.get( + "knock_room_state", [] + ) + + stripped_state.append(invite_or_knock_event) + + return SlidingSyncResult.RoomResult( + # TODO: Dummy value + name="TODO", + # TODO: Dummy value + avatar=None, + # TODO: Dummy value + heroes=None, + # Since we can't determine whether we've already sent a room down this + # Sliding Sync connection before (we plan to add this optimization in the + # future), we're always returning the requested room state instead of + # updates. + initial=True, + # TODO: Dummy value + required_state=[], + timeline=timeline_events, + # TODO: Dummy value + is_dm=False, + stripped_state=stripped_state, + prev_batch=prev_batch_token, + limited=limited, + # TODO: Dummy values + joined_count=0, + invited_count=0, + # TODO: These are just dummy values. We could potentially just remove these + # since notifications can only really be done correctly on the client anyway + # (encrypted rooms). + notification_count=0, + highlight_count=0, + # TODO: Dummy value + num_live=0, + ) diff --git a/synapse/rest/client/sync.py b/synapse/rest/client/sync.py index 1b0ac20d94..b261b2dd88 100644 --- a/synapse/rest/client/sync.py +++ b/synapse/rest/client/sync.py @@ -761,7 +761,6 @@ class SlidingSyncRestServlet(RestServlet): "lists": { "foo-list": { "ranges": [ [0, 99] ], - "sort": [ "by_notification_level", "by_recency", "by_name" ], "required_state": [ ["m.room.join_rules", ""], ["m.room.history_visibility", ""], @@ -771,7 +770,6 @@ class SlidingSyncRestServlet(RestServlet): "filters": { "is_dm": true }, - "bump_event_types": [ "m.room.message", "m.room.encrypted" ], } }, // Room Subscriptions API @@ -779,10 +777,6 @@ class SlidingSyncRestServlet(RestServlet): "!sub1:bar": { "required_state": [ ["*","*"] ], "timeline_limit": 10, - "include_old_rooms": { - "timeline_limit": 1, - "required_state": [ ["m.room.tombstone", ""], ["m.room.create", ""] ], - } } }, // Extensions API @@ -871,10 +865,11 @@ class SlidingSyncRestServlet(RestServlet): super().__init__() self.auth = hs.get_auth() self.store = hs.get_datastores().main + self.clock = hs.get_clock() self.filtering = hs.get_filtering() self.sliding_sync_handler = hs.get_sliding_sync_handler() + self.event_serializer = hs.get_event_client_serializer() - # TODO: Update this to `on_GET` once we figure out how we want to handle params async def on_POST(self, request: SynapseRequest) -> Tuple[int, JsonDict]: requester = await self.auth.get_user_by_req(request, allow_guest=True) user = requester.user @@ -920,13 +915,14 @@ class SlidingSyncRestServlet(RestServlet): logger.info("Client has disconnected; not serializing response.") return 200, {} - response_content = await self.encode_response(sliding_sync_results) + response_content = await self.encode_response(requester, sliding_sync_results) return 200, response_content # TODO: Is there a better way to encode things? async def encode_response( self, + requester: Requester, sliding_sync_result: SlidingSyncResult, ) -> JsonDict: response: JsonDict = defaultdict(dict) @@ -935,7 +931,9 @@ class SlidingSyncRestServlet(RestServlet): serialized_lists = self.encode_lists(sliding_sync_result.lists) if serialized_lists: response["lists"] = serialized_lists - response["rooms"] = {} # TODO: sliding_sync_result.rooms + response["rooms"] = await self.encode_rooms( + requester, sliding_sync_result.rooms + ) response["extensions"] = {} # TODO: sliding_sync_result.extensions return response @@ -961,6 +959,79 @@ class SlidingSyncRestServlet(RestServlet): return serialized_lists + async def encode_rooms( + self, + requester: Requester, + rooms: Dict[str, SlidingSyncResult.RoomResult], + ) -> JsonDict: + time_now = self.clock.time_msec() + + serialize_options = SerializeEventConfig( + event_format=format_event_for_client_v2_without_room_id, + requester=requester, + ) + + serialized_rooms = {} + for room_id, room_result in rooms.items(): + serialized_timeline = await self.event_serializer.serialize_events( + room_result.timeline, + time_now, + config=serialize_options, + # TODO + # bundle_aggregations=room.timeline.bundled_aggregations, + ) + + serialized_required_state = await self.event_serializer.serialize_events( + room_result.required_state, + time_now, + config=serialize_options, + ) + + serialized_rooms[room_id] = { + "name": room_result.name, + "required_state": serialized_required_state, + "timeline": serialized_timeline, + "prev_batch": await room_result.prev_batch.to_string(self.store), + "limited": room_result.limited, + "joined_count": room_result.joined_count, + "invited_count": room_result.invited_count, + "notification_count": room_result.notification_count, + "highlight_count": room_result.highlight_count, + "num_live": room_result.num_live, + } + + if room_result.avatar: + serialized_rooms[room_id]["avatar"] = room_result.avatar + + if room_result.heroes: + serialized_rooms[room_id]["heroes"] = room_result.heroes + + # We should only include the `initial` key if it's `True` to save bandwidth. + # The absense of this flag means `False`. + if room_result.initial: + serialized_rooms[room_id]["initial"] = room_result.initial + + # Field should be absent on non-DM rooms + if room_result.is_dm: + serialized_rooms[room_id]["is_dm"] = room_result.is_dm + + # Stripped state only applies to invite/knock rooms + if room_result.stripped_state: + serialized_stripped_state = ( + await self.event_serializer.serialize_events( + room_result.stripped_state, + time_now, + config=serialize_options, + ) + ) + + # TODO: Would be good to rename this to `stripped_state` so it can be + # shared between invite and knock rooms, see + # https://github.com/matrix-org/matrix-spec-proposals/pull/3575#discussion_r1117629919 + serialized_rooms[room_id]["invite_state"] = serialized_stripped_state + + return serialized_rooms + def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None: SyncRestServlet(hs).register(http_server) diff --git a/synapse/types/handlers/__init__.py b/synapse/types/handlers/__init__.py index 1d65551d5b..b544398a35 100644 --- a/synapse/types/handlers/__init__.py +++ b/synapse/types/handlers/__init__.py @@ -162,8 +162,9 @@ class SlidingSyncResult: timeline: Latest events in the room. The last event is the most recent is_dm: Flag to specify whether the room is a direct-message room (most likely between two people). - invite_state: Stripped state events. Same as `rooms.invite.$room_id.invite_state` - in sync v2, absent on joined/left rooms + stripped_state: Stripped state events (for rooms where the usre is + invited/knocked). Same as `rooms.invite.$room_id.invite_state` in sync v2, + absent on joined/left rooms prev_batch: A token that can be passed as a start parameter to the `/rooms//messages` API to retrieve earlier messages. limited: True if their are more events than fit between the given position and now. @@ -192,7 +193,7 @@ class SlidingSyncResult: required_state: List[EventBase] timeline: List[EventBase] is_dm: bool - invite_state: List[EventBase] + stripped_state: Optional[List[EventBase]] prev_batch: StreamToken limited: bool joined_count: int diff --git a/synapse/types/rest/client/__init__.py b/synapse/types/rest/client/__init__.py index e2c79c4106..25fbd772f6 100644 --- a/synapse/types/rest/client/__init__.py +++ b/synapse/types/rest/client/__init__.py @@ -152,9 +152,6 @@ class SlidingSyncBody(RequestBodyModel): anyway. timeline_limit: The maximum number of timeline events to return per response. (Max 1000 messages) - include_old_rooms: Determines if `predecessor` rooms are included in the - `rooms` response. The user MUST be joined to old rooms for them to show up - in the response. """ class IncludeOldRooms(RequestBodyModel): @@ -167,7 +164,6 @@ class SlidingSyncBody(RequestBodyModel): timeline_limit: int else: timeline_limit: conint(le=1000, strict=True) # type: ignore[valid-type] - include_old_rooms: Optional[IncludeOldRooms] = None class SlidingSyncList(CommonRoomParameters): """ @@ -208,9 +204,6 @@ class SlidingSyncBody(RequestBodyModel): } timeline_limit: The maximum number of timeline events to return per response. - include_old_rooms: Determines if `predecessor` rooms are included in the - `rooms` response. The user MUST be joined to old rooms for them to show up - in the response. include_heroes: Return a stripped variant of membership events (containing `user_id` and optionally `avatar_url` and `displayname`) for the users used to calculate the room name. From 3e0f759dbc34cb3be0a1946cd36e617fc3c5a17c Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 17 Jun 2024 18:26:59 -0500 Subject: [PATCH 02/62] Strip invite/knock event itself and avoid mutating event `unsigned` Make sure we don't run into https://github.com/element-hq/synapse/issues/14919 (https://github.com/matrix-org/synapse/issues/14919) --- synapse/events/utils.py | 18 ++++++++++++++++++ synapse/handlers/sliding_sync.py | 14 ++++++++------ synapse/rest/client/sync.py | 10 +--------- .../storage/databases/main/events_worker.py | 12 ++---------- synapse/types/handlers/__init__.py | 4 ++-- 5 files changed, 31 insertions(+), 27 deletions(-) diff --git a/synapse/events/utils.py b/synapse/events/utils.py index b997d82d71..f937fd4698 100644 --- a/synapse/events/utils.py +++ b/synapse/events/utils.py @@ -836,3 +836,21 @@ def maybe_upsert_event_field( del container[key] return upsert_okay + + +def strip_event(event: EventBase) -> JsonDict: + """ + Used for "stripped state" events which provide a simplified view of the state of a + room intended to help a potential joiner identify the room (relevant when the user + is invited or knocked). + + Stripped state events can only have the `sender`, `type`, `state_key` and `content` + properties present. + """ + + return { + "type": event.type, + "state_key": event.state_key, + "content": event.content, + "sender": event.sender, + } diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index cf448fa3cd..23f971c1f7 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -25,8 +25,10 @@ from immutabledict import immutabledict from synapse.api.constants import AccountDataTypes, Direction, EventTypes, Membership from synapse.events import EventBase +from synapse.events.utils import strip_event from synapse.storage.roommember import RoomsForUser from synapse.types import ( + JsonDict, PersistedEventPosition, Requester, RoomStreamToken, @@ -793,7 +795,7 @@ class SlidingSyncHandler: ) # Figure out any stripped state events for invite/knocks - stripped_state: List[EventBase] = [] + stripped_state: List[JsonDict] = [] if rooms_for_user_membership_at_to_token.membership in { Membership.INVITE, Membership.KNOCK, @@ -804,15 +806,15 @@ class SlidingSyncHandler: stripped_state = [] if invite_or_knock_event.membership == Membership.INVITE: - stripped_state = invite_or_knock_event.unsigned.get( - "invite_room_state", [] + stripped_state.extend( + invite_or_knock_event.unsigned.get("invite_room_state", []) ) elif invite_or_knock_event.membership == Membership.KNOCK: - stripped_state = invite_or_knock_event.unsigned.get( - "knock_room_state", [] + stripped_state.extend( + invite_or_knock_event.unsigned.get("knock_room_state", []) ) - stripped_state.append(invite_or_knock_event) + stripped_state.append(strip_event(invite_or_knock_event)) return SlidingSyncResult.RoomResult( # TODO: Dummy value diff --git a/synapse/rest/client/sync.py b/synapse/rest/client/sync.py index b261b2dd88..a9be37bbf3 100644 --- a/synapse/rest/client/sync.py +++ b/synapse/rest/client/sync.py @@ -1017,18 +1017,10 @@ class SlidingSyncRestServlet(RestServlet): # Stripped state only applies to invite/knock rooms if room_result.stripped_state: - serialized_stripped_state = ( - await self.event_serializer.serialize_events( - room_result.stripped_state, - time_now, - config=serialize_options, - ) - ) - # TODO: Would be good to rename this to `stripped_state` so it can be # shared between invite and knock rooms, see # https://github.com/matrix-org/matrix-spec-proposals/pull/3575#discussion_r1117629919 - serialized_rooms[room_id]["invite_state"] = serialized_stripped_state + serialized_rooms[room_id]["invite_state"] = room_result.stripped_state return serialized_rooms diff --git a/synapse/storage/databases/main/events_worker.py b/synapse/storage/databases/main/events_worker.py index e264d36f02..f0f390cec4 100644 --- a/synapse/storage/databases/main/events_worker.py +++ b/synapse/storage/databases/main/events_worker.py @@ -55,7 +55,7 @@ from synapse.api.room_versions import ( ) from synapse.events import EventBase, make_event_from_dict from synapse.events.snapshot import EventContext -from synapse.events.utils import prune_event +from synapse.events.utils import prune_event, strip_event from synapse.logging.context import ( PreserveLoggingContext, current_context, @@ -1025,15 +1025,7 @@ class EventsWorkerStore(SQLBaseStore): state_to_include = await self.get_events(selected_state_ids.values()) - return [ - { - "type": e.type, - "state_key": e.state_key, - "content": e.content, - "sender": e.sender, - } - for e in state_to_include.values() - ] + return [strip_event(e) for e in state_to_include.values()] def _maybe_start_fetch_thread(self) -> None: """Starts an event fetch thread if we are not yet at the maximum number.""" diff --git a/synapse/types/handlers/__init__.py b/synapse/types/handlers/__init__.py index b544398a35..04b0ab972b 100644 --- a/synapse/types/handlers/__init__.py +++ b/synapse/types/handlers/__init__.py @@ -31,7 +31,7 @@ else: from pydantic import Extra from synapse.events import EventBase -from synapse.types import JsonMapping, StreamToken, UserID +from synapse.types import JsonDict, JsonMapping, StreamToken, UserID from synapse.types.rest.client import SlidingSyncBody @@ -193,7 +193,7 @@ class SlidingSyncResult: required_state: List[EventBase] timeline: List[EventBase] is_dm: bool - stripped_state: Optional[List[EventBase]] + stripped_state: Optional[List[JsonDict]] prev_batch: StreamToken limited: bool joined_count: int From 5e2fd4e93ca2084ee92533b59e6d45b3a914fa89 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 17 Jun 2024 18:29:44 -0500 Subject: [PATCH 03/62] Add changelog --- changelog.d/17320.feature | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/17320.feature diff --git a/changelog.d/17320.feature b/changelog.d/17320.feature new file mode 100644 index 0000000000..1e524f3eca --- /dev/null +++ b/changelog.d/17320.feature @@ -0,0 +1 @@ +Add `rooms` data to experimental [MSC3575](https://github.com/matrix-org/matrix-spec-proposals/pull/3575) Sliding Sync `/sync` endpoint. From 8ce06f145260540f0c81c1594a011556e90f32c8 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 17 Jun 2024 18:54:23 -0500 Subject: [PATCH 04/62] Fix sort being lost --- synapse/handlers/sliding_sync.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index 23f971c1f7..e61b86d779 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -243,21 +243,21 @@ class SlidingSyncHandler: ops: List[SlidingSyncResult.SlidingWindowList.Operation] = [] if list_config.ranges: for range in list_config.ranges: - room_id_set = { + sliced_room_ids = [ room_id for room_id, _ in sorted_room_info[range[0] : range[1]] - } + ] ops.append( SlidingSyncResult.SlidingWindowList.Operation( op=OperationType.SYNC, range=range, - room_ids=list(room_id_set), + room_ids=sliced_room_ids, ) ) # Update the relevant room map - for room_id in room_id_set: + for room_id in sliced_room_ids: if relevant_room_map.get(room_id) is not None: # Take the highest timeline limit if ( From aa5f54aa135de8ae7fdc201792d548de494cbd40 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 18 Jun 2024 08:26:10 -0500 Subject: [PATCH 05/62] Start on required_state --- synapse/handlers/sliding_sync.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index e61b86d779..5b834fe9ef 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -93,10 +93,16 @@ def filter_membership_for_sync(*, membership: str, user_id: str, sender: str) -> class RoomSyncConfig: """ Holds the config for what data we should fetch for a room in the sync response. + + Attributes: + timeline_limit: The maximum number of events to return in the timeline. + required_state: The minimum set of state events requested for the room. The + values are close to `StateKey` but actually use a syntax where you can provide + `*` and `$LAZY` as the state key part of the tuple (type, state_key). """ timeline_limit: int - required_state: Set[StateKey] + required_state: Set[Tuple[str, str]] class SlidingSyncHandler: @@ -816,6 +822,14 @@ class SlidingSyncHandler: stripped_state.append(strip_event(invite_or_knock_event)) + required_state = [] + if len(room_sync_config.required_state) > 0: + required_state = await self.storage_controllers.state.get_state_at( + room_id, + to_token, + state_filter=StateFilter.from_types(TODO), + ) + return SlidingSyncResult.RoomResult( # TODO: Dummy value name="TODO", From 5c175d5488ac7b700906a722ee16404527d8d711 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 18 Jun 2024 09:35:20 -0500 Subject: [PATCH 06/62] Add some notes from pairing --- synapse/handlers/sliding_sync.py | 20 ++++++++++++++++++-- synapse/rest/client/sync.py | 1 + 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index 5b834fe9ef..f9ec4f7961 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -764,6 +764,13 @@ class SlidingSyncHandler: room_id=room_id, # We're going to paginate backwards from the `to_token` from_key=to_token.room_key, + # We should always return historical messages (outside token range) in + # these cases because clients want to be able to show a basic screen of + # information: + # - Initial sync (because no `from_token`) + # - When users newly_join + # - TODO: For incremental sync where we haven't sent it down this + # connection before to_key=from_token.room_key if from_token is not None else None, direction=Direction.BACKWARDS, # We add one so we can determine if there are enough events to saturate @@ -824,14 +831,23 @@ class SlidingSyncHandler: required_state = [] if len(room_sync_config.required_state) > 0: - required_state = await self.storage_controllers.state.get_state_at( + await self.storage_controllers.state.get_current_state( room_id, - to_token, state_filter=StateFilter.from_types(TODO), + await_full_state=False, ) + # TODO: rewind + + # required_state = await self.storage_controllers.state.get_state_at( + # room_id, + # to_token, + # state_filter=StateFilter.from_types(TODO), + # ) + return SlidingSyncResult.RoomResult( # TODO: Dummy value + # TODO: Make this optional because a computed name doesn't make sense for translated cases name="TODO", # TODO: Dummy value avatar=None, diff --git a/synapse/rest/client/sync.py b/synapse/rest/client/sync.py index a9be37bbf3..0ae31f23e9 100644 --- a/synapse/rest/client/sync.py +++ b/synapse/rest/client/sync.py @@ -1021,6 +1021,7 @@ class SlidingSyncRestServlet(RestServlet): # shared between invite and knock rooms, see # https://github.com/matrix-org/matrix-spec-proposals/pull/3575#discussion_r1117629919 serialized_rooms[room_id]["invite_state"] = room_result.stripped_state + # TODO: `knocked_state` but that isn't specced yet return serialized_rooms From 9089bfe4dc505c02739968cdb1b67220e060580d Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 18 Jun 2024 10:06:29 -0500 Subject: [PATCH 07/62] Remove required_state for now --- synapse/handlers/sliding_sync.py | 23 ++++------------------- 1 file changed, 4 insertions(+), 19 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index f9ec4f7961..f8fd2c6c5e 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -96,9 +96,10 @@ class RoomSyncConfig: Attributes: timeline_limit: The maximum number of events to return in the timeline. - required_state: The minimum set of state events requested for the room. The - values are close to `StateKey` but actually use a syntax where you can provide - `*` and `$LAZY` as the state key part of the tuple (type, state_key). + required_state: The set of state events requested for the room. The + values are close to `StateKey` but actually use a syntax where you can + provide `*` wildcard and `$LAZY` for lazy room members as the `state_key` part + of the tuple (type, state_key). """ timeline_limit: int @@ -829,22 +830,6 @@ class SlidingSyncHandler: stripped_state.append(strip_event(invite_or_knock_event)) - required_state = [] - if len(room_sync_config.required_state) > 0: - await self.storage_controllers.state.get_current_state( - room_id, - state_filter=StateFilter.from_types(TODO), - await_full_state=False, - ) - - # TODO: rewind - - # required_state = await self.storage_controllers.state.get_state_at( - # room_id, - # to_token, - # state_filter=StateFilter.from_types(TODO), - # ) - return SlidingSyncResult.RoomResult( # TODO: Dummy value # TODO: Make this optional because a computed name doesn't make sense for translated cases From 94279915d4432fefb87b2d210a8cd03fd633c002 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 18 Jun 2024 10:09:33 -0500 Subject: [PATCH 08/62] Clean up knock_state comments --- synapse/rest/client/sync.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/synapse/rest/client/sync.py b/synapse/rest/client/sync.py index 0ae31f23e9..db44773824 100644 --- a/synapse/rest/client/sync.py +++ b/synapse/rest/client/sync.py @@ -1017,11 +1017,13 @@ class SlidingSyncRestServlet(RestServlet): # Stripped state only applies to invite/knock rooms if room_result.stripped_state: - # TODO: Would be good to rename this to `stripped_state` so it can be - # shared between invite and knock rooms, see + # TODO: `knocked_state` but that isn't specced yet. + # + # TODO: Instead of adding `knocked_state`, it would be good to rename + # this to `stripped_state` so it can be shared between invite and knock + # rooms, see # https://github.com/matrix-org/matrix-spec-proposals/pull/3575#discussion_r1117629919 serialized_rooms[room_id]["invite_state"] = room_result.stripped_state - # TODO: `knocked_state` but that isn't specced yet return serialized_rooms From 19b22971711da0c8bdbaebed0d2f7a7ccb01e2ae Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 18 Jun 2024 10:36:50 -0500 Subject: [PATCH 09/62] Calculate `num_live` --- synapse/handlers/sliding_sync.py | 55 ++++++++++++++++++++++++++------ 1 file changed, 45 insertions(+), 10 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index f8fd2c6c5e..1d07e22c91 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -753,6 +753,7 @@ class SlidingSyncHandler: to_token: The point in the stream to sync up to. """ + # Assemble the list of timeline events timeline_events: List[EventBase] = [] limited = False # We want to use `to_token` (vs `from_token`) because we look backwards from the @@ -761,18 +762,34 @@ class SlidingSyncHandler: # fetched the events. prev_batch_token = to_token if room_sync_config.timeline_limit > 0: + newly_joined = False + if ( + from_token is not None + and rooms_for_user_membership_at_to_token.membership == Membership.JOIN + ): + newly_joined = ( + rooms_for_user_membership_at_to_token.event_pos.stream + > from_token.room_key.get_stream_pos_for_instance( + rooms_for_user_membership_at_to_token.event_pos.instance_name + ) + ) + timeline_events, new_room_key = await self.store.paginate_room_events( room_id=room_id, # We're going to paginate backwards from the `to_token` from_key=to_token.room_key, - # We should always return historical messages (outside token range) in - # these cases because clients want to be able to show a basic screen of - # information: - # - Initial sync (because no `from_token`) - # - When users newly_join - # - TODO: For incremental sync where we haven't sent it down this + # We should return historical messages (outside token range) in the + # following cases because we want clients to be able to show a basic + # screen of information: + # - Initial sync (because no `from_token` to limit us anyway) + # - When users newly_joined + # - TODO: For an incremental sync where we haven't sent it down this # connection before - to_key=from_token.room_key if from_token is not None else None, + to_key=( + from_token.room_key + if from_token is not None and not newly_joined + else None + ), direction=Direction.BACKWARDS, # We add one so we can determine if there are enough events to saturate # the limit or not (see `limited`) @@ -804,6 +821,25 @@ class SlidingSyncHandler: stream=timeline_events[0].internal_metadata.stream_ordering - 1 ) + # Determine how many "live" events we have (events within the given token range). + # + # This is mostly useful to determine whether a given @mention event should + # make a noise or not. Clients cannot rely solely on the absence of + # `initial: true` to determine live events because if a room not in the + # sliding window bumps into the window because of an @mention it will have + # `initial: true` yet contain a single live event (with potentially other + # old events in the timeline) + num_live = 0 + if from_token is not None: + for timeline_event in timeline_events: + if ( + timeline_event.internal_metadata.stream_ordering + > from_token.room_key.get_stream_pos_for_instance( + timeline_event.internal_metadata.instance_name + ) + ): + num_live += 1 + prev_batch_token = prev_batch_token.copy_and_replace( StreamKeyType.ROOM, new_room_key ) @@ -838,7 +874,7 @@ class SlidingSyncHandler: avatar=None, # TODO: Dummy value heroes=None, - # Since we can't determine whether we've already sent a room down this + # TODO: Since we can't determine whether we've already sent a room down this # Sliding Sync connection before (we plan to add this optimization in the # future), we're always returning the requested room state instead of # updates. @@ -859,6 +895,5 @@ class SlidingSyncHandler: # (encrypted rooms). notification_count=0, highlight_count=0, - # TODO: Dummy value - num_live=0, + num_live=num_live, ) From 81d36f36c1731738b38f0b7842de1ce84a570d74 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 18 Jun 2024 15:28:23 -0500 Subject: [PATCH 10/62] Add tests for `limited` --- synapse/handlers/sliding_sync.py | 22 +++-- tests/rest/client/test_sync.py | 140 ++++++++++++++++++++++++++++++- 2 files changed, 149 insertions(+), 13 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index 1d07e22c91..90991031aa 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -37,7 +37,7 @@ from synapse.types import ( UserID, ) from synapse.types.handlers import OperationType, SlidingSyncConfig, SlidingSyncResult -from synapse.types.state import StateFilter, StateKey +from synapse.types.state import StateFilter from synapse.visibility import filter_events_for_client if TYPE_CHECKING: @@ -764,6 +764,7 @@ class SlidingSyncHandler: if room_sync_config.timeline_limit > 0: newly_joined = False if ( + # We can only determine new-ness if we have a `from_token` to define our range from_token is not None and rooms_for_user_membership_at_to_token.membership == Membership.JOIN ): @@ -778,11 +779,11 @@ class SlidingSyncHandler: room_id=room_id, # We're going to paginate backwards from the `to_token` from_key=to_token.room_key, - # We should return historical messages (outside token range) in the + # We should return historical messages (before token range) in the # following cases because we want clients to be able to show a basic # screen of information: # - Initial sync (because no `from_token` to limit us anyway) - # - When users newly_joined + # - When users `newly_joined` # - TODO: For an incremental sync where we haven't sent it down this # connection before to_key=( @@ -832,12 +833,15 @@ class SlidingSyncHandler: num_live = 0 if from_token is not None: for timeline_event in timeline_events: - if ( - timeline_event.internal_metadata.stream_ordering - > from_token.room_key.get_stream_pos_for_instance( - timeline_event.internal_metadata.instance_name - ) - ): + # This fields should be present for all persisted events + assert timeline_event.internal_metadata.stream_ordering is not None + assert timeline_event.internal_metadata.instance_name is not None + + persisted_position = PersistedEventPosition( + instance_name=timeline_event.internal_metadata.instance_name, + stream=timeline_event.internal_metadata.stream_ordering, + ) + if persisted_position.persisted_after(from_token.room_key): num_live += 1 prev_batch_token = prev_batch_token.copy_and_replace( diff --git a/tests/rest/client/test_sync.py b/tests/rest/client/test_sync.py index 2b06767b8a..5b611cd096 100644 --- a/tests/rest/client/test_sync.py +++ b/tests/rest/client/test_sync.py @@ -35,7 +35,7 @@ from synapse.api.constants import ( ) from synapse.rest.client import devices, knock, login, read_marker, receipts, room, sync from synapse.server import HomeServer -from synapse.types import JsonDict, RoomStreamToken, StreamKeyType +from synapse.types import JsonDict, RoomStreamToken, StreamKeyType, StreamToken from synapse.util import Clock from tests import unittest @@ -1282,7 +1282,7 @@ class SlidingSyncTestCase(unittest.HomeserverTestCase): def test_sync_list(self) -> None: """ - Test that room IDs show up in the Sliding Sync lists + Test that room IDs show up in the Sliding Sync `lists` """ alice_user_id = self.register_user("alice", "correcthorse") alice_access_token = self.login(alice_user_id, "correcthorse") @@ -1387,7 +1387,7 @@ class SlidingSyncTestCase(unittest.HomeserverTestCase): def test_filter_list(self) -> None: """ - Test that filters apply to lists + Test that filters apply to `lists` """ user1_id = self.register_user("user1", "pass") user1_tok = self.login(user1_id, "pass") @@ -1462,7 +1462,7 @@ class SlidingSyncTestCase(unittest.HomeserverTestCase): def test_sort_list(self) -> None: """ - Test that the lists are sorted by `stream_ordering` + Test that the `lists` are sorted by `stream_ordering` """ user1_id = self.register_user("user1", "pass") user1_tok = self.login(user1_id, "pass") @@ -1516,3 +1516,135 @@ class SlidingSyncTestCase(unittest.HomeserverTestCase): ], channel.json_body["lists"]["foo-list"], ) + + def test_rooms_limited_initial_sync(self) -> None: + """ + Test that we mark `rooms` as `limited=True` when we saturate the `timeline_limit` + on initial sync. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + + room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok) + self.helper.send(room_id1, "activity1", tok=user2_tok) + self.helper.send(room_id1, "activity2", tok=user2_tok) + event_response3 = self.helper.send(room_id1, "activity3", tok=user2_tok) + event_pos3 = self.get_success( + self.store.get_position_for_event(event_response3["event_id"]) + ) + event_response4 = self.helper.send(room_id1, "activity4", tok=user2_tok) + event_pos4 = self.get_success( + self.store.get_position_for_event(event_response4["event_id"]) + ) + event_response5 = self.helper.send(room_id1, "activity5", tok=user2_tok) + user1_join_response = self.helper.join(room_id1, user1_id, tok=user1_tok) + + # Make the Sliding Sync request + channel = self.make_request( + "POST", + self.sync_endpoint, + { + "lists": { + "foo-list": { + "ranges": [[0, 1]], + "required_state": [], + "timeline_limit": 3, + } + } + }, + access_token=user1_tok, + ) + self.assertEqual(channel.code, 200, channel.json_body) + + # We expect to saturate the `timeline_limit` (there are more than 3 messages in the room) + self.assertEqual( + channel.json_body["rooms"][room_id1]["limited"], + True, + channel.json_body["rooms"][room_id1], + ) + # Check to make sure the latest events are returned + self.assertEqual( + [ + event["event_id"] + for event in channel.json_body["rooms"][room_id1]["timeline"] + ], + [ + event_response4["event_id"], + event_response5["event_id"], + user1_join_response["event_id"], + ], + channel.json_body["rooms"][room_id1]["timeline"], + ) + + # Check to make sure the `prev_batch` points at the right place + prev_batch_token = self.get_success( + StreamToken.from_string( + self.store, channel.json_body["rooms"][room_id1]["prev_batch"] + ) + ) + prev_batch_room_stream_token_serialized = self.get_success( + prev_batch_token.room_key.to_string(self.store) + ) + # If we use the `prev_batch` token to look backwards, we should see `event3` + # next so make sure the token encompasses it + self.assertEqual( + event_pos3.persisted_after(prev_batch_token.room_key), + False, + f"`prev_batch` token {prev_batch_room_stream_token_serialized} should be >= event_pos3={self.get_success(event_pos3.to_room_stream_token().to_string(self.store))}", + ) + # If we use the `prev_batch` token to look backwards, we shouldn't see `event4` + # anymore since it was just returned in this response. + self.assertEqual( + event_pos4.persisted_after(prev_batch_token.room_key), + True, + f"`prev_batch` token {prev_batch_room_stream_token_serialized} should be < event_pos4={self.get_success(event_pos4.to_room_stream_token().to_string(self.store))}", + ) + + def test_not_limited_initial_sync(self) -> None: + """ + Test that we mark `rooms` as `limited=False` when there are no more events to + paginate to. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + + room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok) + self.helper.send(room_id1, "activity1", tok=user2_tok) + self.helper.send(room_id1, "activity2", tok=user2_tok) + self.helper.send(room_id1, "activity3", tok=user2_tok) + self.helper.join(room_id1, user1_id, tok=user1_tok) + + # Make the Sliding Sync request + channel = self.make_request( + "POST", + self.sync_endpoint, + { + "lists": { + "foo-list": { + "ranges": [[0, 1]], + "required_state": [], + "timeline_limit": 100, + } + } + }, + access_token=user1_tok, + ) + self.assertEqual(channel.code, 200, channel.json_body) + + # The timeline should be `limited=False` because we have all of the events (no + # more to paginate to) + self.assertEqual( + channel.json_body["rooms"][room_id1]["limited"], + False, + channel.json_body["rooms"][room_id1], + ) + # We're just looking to make sure we got all of the events before hitting the `timeline_limit` + self.assertEqual( + len(channel.json_body["rooms"][room_id1]["timeline"]), + 9, + channel.json_body["rooms"][room_id1]["timeline"], + ) From 9791209a3d5c82ad9975acea06aaacb55de2326a Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 18 Jun 2024 18:10:17 -0500 Subject: [PATCH 11/62] Add more tests --- synapse/handlers/sliding_sync.py | 33 ++-- synapse/rest/client/sync.py | 10 +- synapse/types/__init__.py | 3 + tests/rest/client/test_sync.py | 274 ++++++++++++++++++++++++++++++- 4 files changed, 296 insertions(+), 24 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index 90991031aa..c1b0b2153a 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -769,26 +769,29 @@ class SlidingSyncHandler: and rooms_for_user_membership_at_to_token.membership == Membership.JOIN ): newly_joined = ( - rooms_for_user_membership_at_to_token.event_pos.stream - > from_token.room_key.get_stream_pos_for_instance( - rooms_for_user_membership_at_to_token.event_pos.instance_name + rooms_for_user_membership_at_to_token.event_pos.persisted_after( + from_token.room_key ) ) + # We should return historical messages (before token range) in the + # following cases because we want clients to be able to show a basic + # screen of information: + # - Initial sync (because no `from_token` to limit us anyway) + # - When users `newly_joined` + # - TODO: For an incremental sync where we haven't sent it down this + # connection before + should_limit_timeline_to_token_range = ( + from_token is not None and not newly_joined + ) + timeline_events, new_room_key = await self.store.paginate_room_events( room_id=room_id, # We're going to paginate backwards from the `to_token` from_key=to_token.room_key, - # We should return historical messages (before token range) in the - # following cases because we want clients to be able to show a basic - # screen of information: - # - Initial sync (because no `from_token` to limit us anyway) - # - When users `newly_joined` - # - TODO: For an incremental sync where we haven't sent it down this - # connection before to_key=( from_token.room_key - if from_token is not None and not newly_joined + if should_limit_timeline_to_token_range else None ), direction=Direction.BACKWARDS, @@ -832,7 +835,7 @@ class SlidingSyncHandler: # old events in the timeline) num_live = 0 if from_token is not None: - for timeline_event in timeline_events: + for timeline_event in reversed(timeline_events): # This fields should be present for all persisted events assert timeline_event.internal_metadata.stream_ordering is not None assert timeline_event.internal_metadata.instance_name is not None @@ -843,6 +846,12 @@ class SlidingSyncHandler: ) if persisted_position.persisted_after(from_token.room_key): num_live += 1 + else: + # Since we're iterating over the timeline events in + # reverse-chronological order, we can break once we hit an event + # that's not live. In the future, we could potentially optimize + # this more with a binary search (bisect). + break prev_batch_token = prev_batch_token.copy_and_replace( StreamKeyType.ROOM, new_room_key diff --git a/synapse/rest/client/sync.py b/synapse/rest/client/sync.py index db44773824..434eaa4789 100644 --- a/synapse/rest/client/sync.py +++ b/synapse/rest/client/sync.py @@ -785,7 +785,7 @@ class SlidingSyncRestServlet(RestServlet): Response JSON:: { - "next_pos": "s58_224_0_13_10_1_1_16_0_1", + "pos": "s58_224_0_13_10_1_1_16_0_1", "lists": { "foo-list": { "count": 1337, @@ -824,7 +824,8 @@ class SlidingSyncRestServlet(RestServlet): "joined_count": 41, "invited_count": 1, "notification_count": 1, - "highlight_count": 0 + "highlight_count": 0, + "num_live": 2" }, // rooms from list "!foo:bar": { @@ -849,7 +850,8 @@ class SlidingSyncRestServlet(RestServlet): "joined_count": 4, "invited_count": 0, "notification_count": 54, - "highlight_count": 3 + "highlight_count": 3, + "num_live": 1, }, // ... 99 more items }, @@ -927,7 +929,7 @@ class SlidingSyncRestServlet(RestServlet): ) -> JsonDict: response: JsonDict = defaultdict(dict) - response["next_pos"] = await sliding_sync_result.next_pos.to_string(self.store) + response["pos"] = await sliding_sync_result.next_pos.to_string(self.store) serialized_lists = self.encode_lists(sliding_sync_result.lists) if serialized_lists: response["lists"] = serialized_lists diff --git a/synapse/types/__init__.py b/synapse/types/__init__.py index 151658df53..b52236d602 100644 --- a/synapse/types/__init__.py +++ b/synapse/types/__init__.py @@ -1078,6 +1078,9 @@ class PersistedPosition: stream: int def persisted_after(self, token: AbstractMultiWriterStreamToken) -> bool: + """ + Checks whether this position happened after the token + """ return token.get_stream_pos_for_instance(self.instance_name) < self.stream diff --git a/tests/rest/client/test_sync.py b/tests/rest/client/test_sync.py index 5b611cd096..d538716e5a 100644 --- a/tests/rest/client/test_sync.py +++ b/tests/rest/client/test_sync.py @@ -19,6 +19,7 @@ # # import json +import logging from typing import List from parameterized import parameterized, parameterized_class @@ -35,7 +36,7 @@ from synapse.api.constants import ( ) from synapse.rest.client import devices, knock, login, read_marker, receipts, room, sync from synapse.server import HomeServer -from synapse.types import JsonDict, RoomStreamToken, StreamKeyType, StreamToken +from synapse.types import JsonDict, RoomStreamToken, StreamKeyType, StreamToken, UserID from synapse.util import Clock from tests import unittest @@ -44,6 +45,8 @@ from tests.federation.transport.test_knocking import ( ) from tests.server import TimedOutException +logger = logging.getLogger(__name__) + class FilterTestCase(unittest.HomeserverTestCase): user_id = "@apple:test" @@ -1379,11 +1382,9 @@ class SlidingSyncTestCase(unittest.HomeserverTestCase): channel.await_result(timeout_ms=200) self.assertEqual(channel.code, 200, channel.json_body) - # We expect the `next_pos` in the result to be the same as what we requested + # We expect the next `pos` in the result to be the same as what we requested # with because we weren't able to find anything new yet. - self.assertEqual( - channel.json_body["next_pos"], future_position_token_serialized - ) + self.assertEqual(channel.json_body["pos"], future_position_token_serialized) def test_filter_list(self) -> None: """ @@ -1602,7 +1603,15 @@ class SlidingSyncTestCase(unittest.HomeserverTestCase): f"`prev_batch` token {prev_batch_room_stream_token_serialized} should be < event_pos4={self.get_success(event_pos4.to_room_stream_token().to_string(self.store))}", ) - def test_not_limited_initial_sync(self) -> None: + # With no `from_token` (initial sync), it's all historical since there is no + # "current" range + self.assertEqual( + channel.json_body["rooms"][room_id1]["num_live"], + 0, + channel.json_body["rooms"][room_id1], + ) + + def test_rooms_not_limited_initial_sync(self) -> None: """ Test that we mark `rooms` as `limited=False` when there are no more events to paginate to. @@ -1619,6 +1628,7 @@ class SlidingSyncTestCase(unittest.HomeserverTestCase): self.helper.join(room_id1, user1_id, tok=user1_tok) # Make the Sliding Sync request + timeline_limit = 100 channel = self.make_request( "POST", self.sync_endpoint, @@ -1627,7 +1637,7 @@ class SlidingSyncTestCase(unittest.HomeserverTestCase): "foo-list": { "ranges": [[0, 1]], "required_state": [], - "timeline_limit": 100, + "timeline_limit": timeline_limit, } } }, @@ -1642,9 +1652,257 @@ class SlidingSyncTestCase(unittest.HomeserverTestCase): False, channel.json_body["rooms"][room_id1], ) + expected_number_of_events = 9 # We're just looking to make sure we got all of the events before hitting the `timeline_limit` self.assertEqual( len(channel.json_body["rooms"][room_id1]["timeline"]), - 9, + expected_number_of_events, channel.json_body["rooms"][room_id1]["timeline"], ) + self.assertLessEqual(expected_number_of_events, timeline_limit) + + # With no `from_token` (initial sync), it's all historical since there is no + # "live" token range. + self.assertEqual( + channel.json_body["rooms"][room_id1]["num_live"], + 0, + channel.json_body["rooms"][room_id1], + ) + + def test_rooms_incremental_sync(self) -> None: + """ + Test that `rooms` data during an incremental sync after an initial sync. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + + room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok) + self.helper.join(room_id1, user1_id, tok=user1_tok) + self.helper.send(room_id1, "activity before initial sync1", tok=user2_tok) + + # Make an initial Sliding Sync request to grab a token. This is also a sanity + # check that we can go from initial to incremental sync. + sync_params = { + "lists": { + "foo-list": { + "ranges": [[0, 1]], + "required_state": [], + "timeline_limit": 3, + } + } + } + channel = self.make_request( + "POST", + self.sync_endpoint, + sync_params, + access_token=user1_tok, + ) + self.assertEqual(channel.code, 200, channel.json_body) + next_pos = channel.json_body["pos"] + + # Send some events but don't send enough to saturate the `timeline_limit`. + # We want to later test that we only get the new events since the `next_pos` + event_response2 = self.helper.send(room_id1, "activity after2", tok=user2_tok) + event_response3 = self.helper.send(room_id1, "activity after3", tok=user2_tok) + + # Make an incremental Sliding Sync request (what we're trying to test) + channel = self.make_request( + "POST", + self.sync_endpoint + f"?pos={next_pos}", + sync_params, + access_token=user1_tok, + ) + self.assertEqual(channel.code, 200, channel.json_body) + + # We only expect to see the new events since the last sync which isn't enough to + # fill up the `timeline_limit`. + self.assertEqual( + channel.json_body["rooms"][room_id1]["limited"], + False, + f'Our `timeline_limit` was {sync_params["lists"]["foo-list"]["timeline_limit"]} ' + + f'and {len(channel.json_body["rooms"][room_id1]["timeline"])} events were returned in the timeline. ' + + str(channel.json_body["rooms"][room_id1]), + ) + # Check to make sure the latest events are returned + self.assertEqual( + [ + event["event_id"] + for event in channel.json_body["rooms"][room_id1]["timeline"] + ], + [ + event_response2["event_id"], + event_response3["event_id"], + ], + channel.json_body["rooms"][room_id1]["timeline"], + ) + + # All events are "live" + self.assertEqual( + channel.json_body["rooms"][room_id1]["num_live"], + 2, + channel.json_body["rooms"][room_id1], + ) + + def test_rooms_newly_joined_incremental_sync(self) -> None: + """ + Test that when we make an incremental sync with a `newly_joined` `rooms`, we are + able to see some historical events before the `from_token`. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + + room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok) + self.helper.send(room_id1, "activity before token1", tok=user2_tok) + event_response2 = self.helper.send( + room_id1, "activity before token2", tok=user2_tok + ) + + from_token = self.event_sources.get_current_token() + + # Join the room after the `from_token` which will make us consider this room as + # `newly_joined`. + user1_join_response = self.helper.join(room_id1, user1_id, tok=user1_tok) + + # Send some events but don't send enough to saturate the `timeline_limit`. + # We want to later test that we only get the new events since the `next_pos` + event_response3 = self.helper.send( + room_id1, "activity after token3", tok=user2_tok + ) + event_response4 = self.helper.send( + room_id1, "activity after token4", tok=user2_tok + ) + + # The `timeline_limit` is set to 4 so we can at least see one historical event + # before the `from_token`. We should see historical events because this is a + # `newly_joined` room. + timeline_limit = 4 + # Make an incremental Sliding Sync request (what we're trying to test) + channel = self.make_request( + "POST", + self.sync_endpoint + + f"?pos={self.get_success( + from_token.to_string(self.store) + )}", + { + "lists": { + "foo-list": { + "ranges": [[0, 1]], + "required_state": [], + "timeline_limit": timeline_limit, + } + } + }, + access_token=user1_tok, + ) + self.assertEqual(channel.code, 200, channel.json_body) + + # We should see the new events and the rest should be filled with historical + # events which will make us `limited=True` since there are more to paginate to. + self.assertEqual( + channel.json_body["rooms"][room_id1]["limited"], + True, + f"Our `timeline_limit` was {timeline_limit} " + + f'and {len(channel.json_body["rooms"][room_id1]["timeline"])} events were returned in the timeline. ' + + str(channel.json_body["rooms"][room_id1]), + ) + # Check to make sure that the "live" and historical events are returned + self.assertEqual( + [ + event["event_id"] + for event in channel.json_body["rooms"][room_id1]["timeline"] + ], + [ + event_response2["event_id"], + user1_join_response["event_id"], + event_response3["event_id"], + event_response4["event_id"], + ], + channel.json_body["rooms"][room_id1]["timeline"], + ) + + # Only events after the `from_token` are "live" (join, event3, event4) + self.assertEqual( + channel.json_body["rooms"][room_id1]["num_live"], + 3, + channel.json_body["rooms"][room_id1], + ) + + def test_rooms_invite_sync(self) -> None: + """ + Test that `rooms` we are invited to have some stripped `invite_state` and that + we can't see any timeline events because we haven't joined the room yet. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user1 = UserID.from_string(user1_id) + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + user2 = UserID.from_string(user2_id) + + room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok) + self.helper.send(room_id1, "activity before1", tok=user2_tok) + self.helper.send(room_id1, "activity before2", tok=user2_tok) + self.helper.invite(room_id1, src=user2_id, targ=user1_id, tok=user2_tok) + self.helper.send(room_id1, "activity after3", tok=user2_tok) + self.helper.send(room_id1, "activity after4", tok=user2_tok) + + # Make the Sliding Sync request + channel = self.make_request( + "POST", + self.sync_endpoint, + { + "lists": { + "foo-list": { + "ranges": [[0, 1]], + "required_state": [], + "timeline_limit": 3, + } + } + }, + access_token=user1_tok, + ) + self.assertEqual(channel.code, 200, channel.json_body) + + # Should not see anything (except maybe the invite event) because we haven't + # joined yet (`filter_events_for_client(...)` is doing the work here) + self.assertEqual( + channel.json_body["rooms"][room_id1]["timeline"], + [], + channel.json_body["rooms"][room_id1]["timeline"], + ) + # We should have some stripped state so the potential joiner can identify the + # room (we don't care about the order). + self.assertCountEqual( + channel.json_body["rooms"][room_id1]["invite_state"], + [ + { + "content": {"creator": user2_id, "room_version": "10"}, + "sender": user2_id, + "state_key": "", + "type": "m.room.create", + }, + { + "content": {"join_rule": "public"}, + "sender": user2_id, + "state_key": "", + "type": "m.room.join_rules", + }, + { + "content": {"displayname": user2.localpart, "membership": "join"}, + "sender": user2_id, + "state_key": user2_id, + "type": "m.room.member", + }, + { + "content": {"displayname": user1.localpart, "membership": "invite"}, + "sender": user2_id, + "state_key": user1_id, + "type": "m.room.member", + }, + ], + channel.json_body["rooms"][room_id1]["invite_state"], + ) From 70ecd4d8d3646ddb1fb55b37cdf9a07612a59d2f Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 18 Jun 2024 19:38:35 -0500 Subject: [PATCH 12/62] Fix lint --- synapse/handlers/sliding_sync.py | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index c1b0b2153a..7a6ef1a2d9 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -774,24 +774,22 @@ class SlidingSyncHandler: ) ) - # We should return historical messages (before token range) in the - # following cases because we want clients to be able to show a basic - # screen of information: - # - Initial sync (because no `from_token` to limit us anyway) - # - When users `newly_joined` - # - TODO: For an incremental sync where we haven't sent it down this - # connection before - should_limit_timeline_to_token_range = ( - from_token is not None and not newly_joined - ) - timeline_events, new_room_key = await self.store.paginate_room_events( room_id=room_id, # We're going to paginate backwards from the `to_token` from_key=to_token.room_key, to_key=( + # Determine whether we should limit the timeline to the token range. + # + # We should return historical messages (before token range) in the + # following cases because we want clients to be able to show a basic + # screen of information: + # - Initial sync (because no `from_token` to limit us anyway) + # - When users `newly_joined` + # - TODO: For an incremental sync where we haven't sent it down this + # connection before from_token.room_key - if should_limit_timeline_to_token_range + if from_token is not None and not newly_joined else None ), direction=Direction.BACKWARDS, From 71eabe5e63fc2d637785866c6e1f471fe67d0966 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 18 Jun 2024 19:41:41 -0500 Subject: [PATCH 13/62] Make room name optional --- synapse/handlers/sliding_sync.py | 3 +-- synapse/rest/client/sync.py | 4 +++- synapse/types/handlers/__init__.py | 2 +- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index 7a6ef1a2d9..f2b29ce1d1 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -879,8 +879,7 @@ class SlidingSyncHandler: return SlidingSyncResult.RoomResult( # TODO: Dummy value - # TODO: Make this optional because a computed name doesn't make sense for translated cases - name="TODO", + name=None, # TODO: Dummy value avatar=None, # TODO: Dummy value diff --git a/synapse/rest/client/sync.py b/synapse/rest/client/sync.py index 434eaa4789..da28c2b3a5 100644 --- a/synapse/rest/client/sync.py +++ b/synapse/rest/client/sync.py @@ -990,7 +990,6 @@ class SlidingSyncRestServlet(RestServlet): ) serialized_rooms[room_id] = { - "name": room_result.name, "required_state": serialized_required_state, "timeline": serialized_timeline, "prev_batch": await room_result.prev_batch.to_string(self.store), @@ -1002,6 +1001,9 @@ class SlidingSyncRestServlet(RestServlet): "num_live": room_result.num_live, } + if room_result.name: + serialized_rooms[room_id]["name"] = room_result.name + if room_result.avatar: serialized_rooms[room_id]["avatar"] = room_result.avatar diff --git a/synapse/types/handlers/__init__.py b/synapse/types/handlers/__init__.py index 04b0ab972b..1b544456a6 100644 --- a/synapse/types/handlers/__init__.py +++ b/synapse/types/handlers/__init__.py @@ -186,7 +186,7 @@ class SlidingSyncResult: (with potentially other old events in the timeline). """ - name: str + name: Optional[str] avatar: Optional[str] heroes: Optional[List[EventBase]] initial: bool From 39b4f10533fded08647c198c80e6b185bc8558e0 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 18 Jun 2024 19:55:12 -0500 Subject: [PATCH 14/62] Update comments --- synapse/handlers/sliding_sync.py | 14 +++++++++----- tests/rest/client/test_sync.py | 2 +- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index f2b29ce1d1..cb5274d495 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -756,10 +756,10 @@ class SlidingSyncHandler: # Assemble the list of timeline events timeline_events: List[EventBase] = [] limited = False - # We want to use `to_token` (vs `from_token`) because we look backwards from the - # `to_token` up to the `timeline_limit` and we might not reach `from_token` - # before we hit the limit. We will update the room stream position once we've - # fetched the events. + # We want to start off using the `to_token` (vs `from_token`) because we look + # backwards from the `to_token` up to the `timeline_limit` and we might not + # reach the `from_token` before we hit the limit. We will update the room stream + # position once we've fetched the events. prev_batch_token = to_token if room_sync_config.timeline_limit > 0: newly_joined = False @@ -803,6 +803,7 @@ class SlidingSyncHandler: # most recent). timeline_events.reverse() + # Make sure we don't expose any events that the client shouldn't see timeline_events = await filter_events_for_client( self.storage_controllers, user.to_string(), @@ -851,11 +852,14 @@ class SlidingSyncHandler: # this more with a binary search (bisect). break + # Update the `prev_batch_token` to point to the position that allows us to + # keep paginating backwards from the oldest event we return in the timeline. prev_batch_token = prev_batch_token.copy_and_replace( StreamKeyType.ROOM, new_room_key ) - # Figure out any stripped state events for invite/knocks + # Figure out any stripped state events for invite/knocks. This allows the + # potential joiner to identify the room. stripped_state: List[JsonDict] = [] if rooms_for_user_membership_at_to_token.membership in { Membership.INVITE, diff --git a/tests/rest/client/test_sync.py b/tests/rest/client/test_sync.py index d538716e5a..838ff6e2b4 100644 --- a/tests/rest/client/test_sync.py +++ b/tests/rest/client/test_sync.py @@ -1874,7 +1874,7 @@ class SlidingSyncTestCase(unittest.HomeserverTestCase): [], channel.json_body["rooms"][room_id1]["timeline"], ) - # We should have some stripped state so the potential joiner can identify the + # We should have some `stripped_state` so the potential joiner can identify the # room (we don't care about the order). self.assertCountEqual( channel.json_body["rooms"][room_id1]["invite_state"], From 9883b0f63f87cf34b50e28390a0fa29d8e014443 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 18 Jun 2024 21:00:26 -0500 Subject: [PATCH 15/62] Add bundled aggregations --- synapse/handlers/sliding_sync.py | 16 +++++++++++++++- synapse/rest/client/sync.py | 5 ++--- synapse/types/handlers/__init__.py | 10 ++++++++-- 3 files changed, 25 insertions(+), 6 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index cb5274d495..e418a6e074 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -114,6 +114,7 @@ class SlidingSyncHandler: self.auth_blocking = hs.get_auth_blocking() self.notifier = hs.get_notifier() self.event_sources = hs.get_event_sources() + self.relations_handler = hs.get_relations_handler() self.rooms_to_exclude_globally = hs.config.server.rooms_to_exclude_from_sync async def wait_for_sync_for_user( @@ -881,6 +882,18 @@ class SlidingSyncHandler: stripped_state.append(strip_event(invite_or_knock_event)) + # TODO: Handle timeline gaps (`get_timeline_gaps()`) + + # If the timeline is `limited=True`, the client does not have all events + # necessary to calculate aggregations themselves. + bundled_aggregations = None + if limited: + bundled_aggregations = ( + await self.relations_handler.get_bundled_aggregations( + timeline_events, user.to_string() + ) + ) + return SlidingSyncResult.RoomResult( # TODO: Dummy value name=None, @@ -895,7 +908,8 @@ class SlidingSyncHandler: initial=True, # TODO: Dummy value required_state=[], - timeline=timeline_events, + timeline_events=timeline_events, + bundled_aggregations=bundled_aggregations, # TODO: Dummy value is_dm=False, stripped_state=stripped_state, diff --git a/synapse/rest/client/sync.py b/synapse/rest/client/sync.py index da28c2b3a5..4333ee8c2b 100644 --- a/synapse/rest/client/sync.py +++ b/synapse/rest/client/sync.py @@ -976,11 +976,10 @@ class SlidingSyncRestServlet(RestServlet): serialized_rooms = {} for room_id, room_result in rooms.items(): serialized_timeline = await self.event_serializer.serialize_events( - room_result.timeline, + room_result.timeline_events, time_now, config=serialize_options, - # TODO - # bundle_aggregations=room.timeline.bundled_aggregations, + bundle_aggregations=room_result.bundled_aggregations, ) serialized_required_state = await self.event_serializer.serialize_events( diff --git a/synapse/types/handlers/__init__.py b/synapse/types/handlers/__init__.py index 1b544456a6..1ba5ea55c1 100644 --- a/synapse/types/handlers/__init__.py +++ b/synapse/types/handlers/__init__.py @@ -31,6 +31,7 @@ else: from pydantic import Extra from synapse.events import EventBase +from synapse.handlers.relations import BundledAggregations from synapse.types import JsonDict, JsonMapping, StreamToken, UserID from synapse.types.rest.client import SlidingSyncBody @@ -159,7 +160,11 @@ class SlidingSyncResult: entirely and NOT send "initial":false as this is wasteful on bandwidth. The absence of this flag means 'false'. required_state: The current state of the room - timeline: Latest events in the room. The last event is the most recent + timeline: Latest events in the room. The last event is the most recent. + bundled_aggregations: A mapping of event ID to the bundled aggregations for + the timeline events above. This allows clients to show accurate reaction + counts (or edits, threads), even if some of the reaction events were skipped + over in a gappy sync. is_dm: Flag to specify whether the room is a direct-message room (most likely between two people). stripped_state: Stripped state events (for rooms where the usre is @@ -191,7 +196,8 @@ class SlidingSyncResult: heroes: Optional[List[EventBase]] initial: bool required_state: List[EventBase] - timeline: List[EventBase] + timeline_events: List[EventBase] + bundled_aggregations: Optional[Dict[str, BundledAggregations]] is_dm: bool stripped_state: Optional[List[JsonDict]] prev_batch: StreamToken From 1c06153a0d3c24039a70b0c770947874bc05c246 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 18 Jun 2024 21:22:40 -0500 Subject: [PATCH 16/62] Determine limited before filtering --- synapse/handlers/sliding_sync.py | 27 ++++++++++++++++----------- tests/rest/client/test_sync.py | 8 ++++++++ 2 files changed, 24 insertions(+), 11 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index e418a6e074..fe369949c5 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -804,17 +804,9 @@ class SlidingSyncHandler: # most recent). timeline_events.reverse() - # Make sure we don't expose any events that the client shouldn't see - timeline_events = await filter_events_for_client( - self.storage_controllers, - user.to_string(), - timeline_events, - is_peeking=rooms_for_user_membership_at_to_token.membership - != Membership.JOIN, - filter_send_to_client=True, - ) - - # Determine our `limited` status + # Determine our `limited` status based on the timeline. We do this before + # filtering the events so we can accurately determine if there is more to + # paginate even if we filter out some/all events. if len(timeline_events) > room_sync_config.timeline_limit: limited = True # Get rid of that extra "+ 1" event because we only used it to determine @@ -825,6 +817,19 @@ class SlidingSyncHandler: stream=timeline_events[0].internal_metadata.stream_ordering - 1 ) + # TODO: Does `newly_joined` affect `limited`? It does in sync v2 but I fail + # to understand why. + + # Make sure we don't expose any events that the client shouldn't see + timeline_events = await filter_events_for_client( + self.storage_controllers, + user.to_string(), + timeline_events, + is_peeking=rooms_for_user_membership_at_to_token.membership + != Membership.JOIN, + filter_send_to_client=True, + ) + # Determine how many "live" events we have (events within the given token range). # # This is mostly useful to determine whether a given @mention event should diff --git a/tests/rest/client/test_sync.py b/tests/rest/client/test_sync.py index 838ff6e2b4..df85c94bd5 100644 --- a/tests/rest/client/test_sync.py +++ b/tests/rest/client/test_sync.py @@ -1874,6 +1874,13 @@ class SlidingSyncTestCase(unittest.HomeserverTestCase): [], channel.json_body["rooms"][room_id1]["timeline"], ) + # Even though we don't get any timeline events because they are filtered out, + # there is still more to paginate + self.assertEqual( + channel.json_body["rooms"][room_id1]["limited"], + True, + channel.json_body["rooms"][room_id1], + ) # We should have some `stripped_state` so the potential joiner can identify the # room (we don't care about the order). self.assertCountEqual( @@ -1906,3 +1913,4 @@ class SlidingSyncTestCase(unittest.HomeserverTestCase): ], channel.json_body["rooms"][room_id1]["invite_state"], ) + From c81f3006a5e768e0e3f099dd7e001a7f1768b2c6 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 19 Jun 2024 12:54:39 -0500 Subject: [PATCH 17/62] Add better support for leave/ban --- synapse/handlers/sliding_sync.py | 48 ++-- synapse/storage/databases/main/stream.py | 20 ++ tests/rest/client/test_sync.py | 350 ++++++++++++++++++++++- tests/rest/client/utils.py | 4 +- 4 files changed, 399 insertions(+), 23 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index fe369949c5..0d2f4dbfff 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -775,24 +775,36 @@ class SlidingSyncHandler: ) ) + # We're going to paginate backwards from the `to_token` + from_bound = to_token.room_key + # People shouldn't see past their leave/ban event + if rooms_for_user_membership_at_to_token.membership in ( + Membership.LEAVE, + Membership.BAN, + ): + from_bound = ( + rooms_for_user_membership_at_to_token.event_pos.to_room_stream_token() + ) + + # Determine whether we should limit the timeline to the token range. + # + # We should return historical messages (before token range) in the + # following cases because we want clients to be able to show a basic + # screen of information: + # - Initial sync (because no `from_token` to limit us anyway) + # - When users `newly_joined` + # - TODO: For an incremental sync where we haven't sent it down this + # connection before + to_bound = ( + from_token.room_key + if from_token is not None and not newly_joined + else None + ) + timeline_events, new_room_key = await self.store.paginate_room_events( room_id=room_id, - # We're going to paginate backwards from the `to_token` - from_key=to_token.room_key, - to_key=( - # Determine whether we should limit the timeline to the token range. - # - # We should return historical messages (before token range) in the - # following cases because we want clients to be able to show a basic - # screen of information: - # - Initial sync (because no `from_token` to limit us anyway) - # - When users `newly_joined` - # - TODO: For an incremental sync where we haven't sent it down this - # connection before - from_token.room_key - if from_token is not None and not newly_joined - else None - ), + from_key=from_bound, + to_key=to_bound, direction=Direction.BACKWARDS, # We add one so we can determine if there are enough events to saturate # the limit or not (see `limited`) @@ -867,10 +879,10 @@ class SlidingSyncHandler: # Figure out any stripped state events for invite/knocks. This allows the # potential joiner to identify the room. stripped_state: List[JsonDict] = [] - if rooms_for_user_membership_at_to_token.membership in { + if rooms_for_user_membership_at_to_token.membership in ( Membership.INVITE, Membership.KNOCK, - }: + ): invite_or_knock_event = await self.store.get_event( rooms_for_user_membership_at_to_token.event_id ) diff --git a/synapse/storage/databases/main/stream.py b/synapse/storage/databases/main/stream.py index ff0d723684..c21e69ecda 100644 --- a/synapse/storage/databases/main/stream.py +++ b/synapse/storage/databases/main/stream.py @@ -1551,6 +1551,9 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): ) -> Tuple[List[EventBase], RoomStreamToken]: """Returns list of events before or after a given token. + When Direction.FORWARDS: from_key < x <= to_key + When Direction.BACKWARDS: from_key >= x > to_key + Args: room_id from_key: The token used to stream from @@ -1567,6 +1570,23 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): and `to_key`). """ + # We can bail early if we're looking forwards, and our `to_key` is already + # before our `from_key`. + if ( + direction == Direction.FORWARDS + and to_key is not None + and to_key.is_before_or_eq(from_key) + ): + return [], from_key + # Or vice-versa, if we're looking backwards and our `from_key` is already before + # our `to_key`. + elif ( + direction == Direction.BACKWARDS + and to_key is not None + and from_key.is_before_or_eq(to_key) + ): + return [], from_key + rows, token = await self.db_pool.runInteraction( "paginate_room_events", self._paginate_room_events_txn, diff --git a/tests/rest/client/test_sync.py b/tests/rest/client/test_sync.py index df85c94bd5..32542a64e8 100644 --- a/tests/rest/client/test_sync.py +++ b/tests/rest/client/test_sync.py @@ -31,6 +31,7 @@ from synapse.api.constants import ( AccountDataTypes, EventContentFields, EventTypes, + HistoryVisibility, ReceiptTypes, RelationTypes, ) @@ -1831,10 +1832,11 @@ class SlidingSyncTestCase(unittest.HomeserverTestCase): channel.json_body["rooms"][room_id1], ) - def test_rooms_invite_sync(self) -> None: + def test_rooms_invite_shared_history_initial_sync(self) -> None: """ Test that `rooms` we are invited to have some stripped `invite_state` and that - we can't see any timeline events because we haven't joined the room yet. + we can't see any timeline events because the history visiblity is `shared` and + we haven't joined the room yet. """ user1_id = self.register_user("user1", "pass") user1_tok = self.login(user1_id, "pass") @@ -1844,6 +1846,16 @@ class SlidingSyncTestCase(unittest.HomeserverTestCase): user2 = UserID.from_string(user2_id) room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok) + # Ensure we're testing with a room with `shared` history visibility which means + # history visible until you actually join the room. + history_visibility_response = self.helper.get_state( + room_id1, EventTypes.RoomHistoryVisibility, tok=user2_tok + ) + self.assertEqual( + history_visibility_response.get("history_visibility"), + HistoryVisibility.SHARED, + ) + self.helper.send(room_id1, "activity before1", tok=user2_tok) self.helper.send(room_id1, "activity before2", tok=user2_tok) self.helper.invite(room_id1, src=user2_id, targ=user1_id, tok=user2_tok) @@ -1868,12 +1880,21 @@ class SlidingSyncTestCase(unittest.HomeserverTestCase): self.assertEqual(channel.code, 200, channel.json_body) # Should not see anything (except maybe the invite event) because we haven't - # joined yet (`filter_events_for_client(...)` is doing the work here) + # joined yet (history visibility is `shared`) (`filter_events_for_client(...)` + # is doing the work here) self.assertEqual( channel.json_body["rooms"][room_id1]["timeline"], [], channel.json_body["rooms"][room_id1]["timeline"], ) + # No "live" events in a initial sync (no `from_token` to define the "live" + # range) and no events returned in the timeline anyway so nothing could be + # "live". + self.assertEqual( + channel.json_body["rooms"][room_id1]["num_live"], + 0, + channel.json_body["rooms"][room_id1], + ) # Even though we don't get any timeline events because they are filtered out, # there is still more to paginate self.assertEqual( @@ -1914,3 +1935,326 @@ class SlidingSyncTestCase(unittest.HomeserverTestCase): channel.json_body["rooms"][room_id1]["invite_state"], ) + + def test_rooms_invite_world_readable_history_initial_sync(self) -> None: + """ + Test that `rooms` we are invited to have some stripped `invite_state` and that + we can't see any timeline events because the history visiblity is `shared` and + we haven't joined the room yet. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user1 = UserID.from_string(user1_id) + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + user2 = UserID.from_string(user2_id) + + room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok, + extra_content={ + "preset": "public_chat", + "initial_state": [ + { + "content": {"history_visibility": HistoryVisibility.WORLD_READABLE}, + "state_key": "", + "type": EventTypes.RoomHistoryVisibility, + } + ], + },) + # Ensure we're testing with a room with `world_readable` history visibility + # which means events are visible to anyone even without membership. + history_visibility_response = self.helper.get_state( + room_id1, EventTypes.RoomHistoryVisibility, tok=user2_tok + ) + self.assertEqual( + history_visibility_response.get("history_visibility"), + HistoryVisibility.WORLD_READABLE, + ) + + self.helper.send(room_id1, "activity before1", tok=user2_tok) + event_response2 = self.helper.send(room_id1, "activity before2", tok=user2_tok) + use1_invite_response = self.helper.invite(room_id1, src=user2_id, targ=user1_id, tok=user2_tok) + event_response3 = self.helper.send(room_id1, "activity after3", tok=user2_tok) + event_response4 = self.helper.send(room_id1, "activity after4", tok=user2_tok) + + # Make the Sliding Sync request + channel = self.make_request( + "POST", + self.sync_endpoint, + { + "lists": { + "foo-list": { + "ranges": [[0, 1]], + "required_state": [], + # Large enough to see the latest events and before the invite + "timeline_limit": 4, + } + } + }, + access_token=user1_tok, + ) + self.assertEqual(channel.code, 200, channel.json_body) + + # Should see the last 4 events in the room + self.assertEqual( + [ + event["event_id"] + for event in channel.json_body["rooms"][room_id1]["timeline"] + ], + [ + event_response2["event_id"], + use1_invite_response["event_id"], + event_response3["event_id"], + event_response4["event_id"], + ], + channel.json_body["rooms"][room_id1]["timeline"], + ) + # No "live" events in a initial sync (no `from_token` to define the "live" + # range) + self.assertEqual( + channel.json_body["rooms"][room_id1]["num_live"], + 0, + channel.json_body["rooms"][room_id1], + ) + # There is still more to paginate + self.assertEqual( + channel.json_body["rooms"][room_id1]["limited"], + True, + channel.json_body["rooms"][room_id1], + ) + # We should have some `stripped_state` so the potential joiner can identify the + # room (we don't care about the order). + self.assertCountEqual( + channel.json_body["rooms"][room_id1]["invite_state"], + [ + { + "content": {"creator": user2_id, "room_version": "10"}, + "sender": user2_id, + "state_key": "", + "type": "m.room.create", + }, + { + "content": {"join_rule": "public"}, + "sender": user2_id, + "state_key": "", + "type": "m.room.join_rules", + }, + { + "content": {"displayname": user2.localpart, "membership": "join"}, + "sender": user2_id, + "state_key": user2_id, + "type": "m.room.member", + }, + { + "content": {"displayname": user1.localpart, "membership": "invite"}, + "sender": user2_id, + "state_key": user1_id, + "type": "m.room.member", + }, + ], + channel.json_body["rooms"][room_id1]["invite_state"], + ) + + def test_rooms_ban_initial_sync(self) -> None: + """ + Test that `rooms` we are banned from in an intial sync only allows us to see + timeline events up to the ban event. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + + room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok) + self.helper.send(room_id1, "activity before1", tok=user2_tok) + self.helper.send(room_id1, "activity before2", tok=user2_tok) + self.helper.join(room_id1, user1_id, tok=user1_tok) + + event_response3 = self.helper.send(room_id1, "activity after3", tok=user2_tok) + event_response4 = self.helper.send(room_id1, "activity after4", tok=user2_tok) + user1_ban_response = self.helper.ban( + room_id1, src=user2_id, targ=user1_id, tok=user2_tok + ) + + self.helper.send(room_id1, "activity after5", tok=user2_tok) + self.helper.send(room_id1, "activity after6", tok=user2_tok) + + # Make the Sliding Sync request + channel = self.make_request( + "POST", + self.sync_endpoint, + { + "lists": { + "foo-list": { + "ranges": [[0, 1]], + "required_state": [], + "timeline_limit": 3, + } + } + }, + access_token=user1_tok, + ) + self.assertEqual(channel.code, 200, channel.json_body) + + # We should see events before the ban but not after + self.assertEqual( + [ + event["event_id"] + for event in channel.json_body["rooms"][room_id1]["timeline"] + ], + [ + event_response3["event_id"], + event_response4["event_id"], + user1_ban_response["event_id"], + ], + channel.json_body["rooms"][room_id1]["timeline"], + ) + # No "live" events in a initial sync (no `from_token` to define the "live" + # range) + self.assertEqual( + channel.json_body["rooms"][room_id1]["num_live"], + 0, + channel.json_body["rooms"][room_id1], + ) + # There are more events to paginate to + self.assertEqual( + channel.json_body["rooms"][room_id1]["limited"], + True, + channel.json_body["rooms"][room_id1], + ) + + def test_rooms_ban_incremental_sync1(self) -> None: + """ + Test that `rooms` we are banned from during the next incremental sync only + allows us to see timeline events up to the ban event. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + + room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok) + self.helper.send(room_id1, "activity before1", tok=user2_tok) + self.helper.send(room_id1, "activity before2", tok=user2_tok) + self.helper.join(room_id1, user1_id, tok=user1_tok) + + from_token = self.event_sources.get_current_token() + + event_response3 = self.helper.send(room_id1, "activity after3", tok=user2_tok) + event_response4 = self.helper.send(room_id1, "activity after4", tok=user2_tok) + # The ban is within the token range (between the `from_token` and the sliding + # sync request) + user1_ban_response = self.helper.ban( + room_id1, src=user2_id, targ=user1_id, tok=user2_tok + ) + + self.helper.send(room_id1, "activity after5", tok=user2_tok) + self.helper.send(room_id1, "activity after6", tok=user2_tok) + + # Make the Sliding Sync request + channel = self.make_request( + "POST", + self.sync_endpoint + + f"?pos={self.get_success( + from_token.to_string(self.store) + )}", + { + "lists": { + "foo-list": { + "ranges": [[0, 1]], + "required_state": [], + "timeline_limit": 4, + } + } + }, + access_token=user1_tok, + ) + self.assertEqual(channel.code, 200, channel.json_body) + + # We should see events before the ban but not after + self.assertEqual( + [ + event["event_id"] + for event in channel.json_body["rooms"][room_id1]["timeline"] + ], + [ + event_response3["event_id"], + event_response4["event_id"], + user1_ban_response["event_id"], + ], + channel.json_body["rooms"][room_id1]["timeline"], + ) + # All live events in the incremental sync + self.assertEqual( + channel.json_body["rooms"][room_id1]["num_live"], + 3, + channel.json_body["rooms"][room_id1], + ) + # There aren't anymore events to paginate to in this range + self.assertEqual( + channel.json_body["rooms"][room_id1]["limited"], + False, + channel.json_body["rooms"][room_id1], + ) + + def test_rooms_ban_incremental_sync2(self) -> None: + """ + Test that `rooms` we are banned from before the incremental sync doesn't return + any events in the timeline. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + + room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok) + self.helper.send(room_id1, "activity before1", tok=user2_tok) + self.helper.join(room_id1, user1_id, tok=user1_tok) + + self.helper.send(room_id1, "activity after2", tok=user2_tok) + # The ban is before we get our `from_token` + self.helper.ban(room_id1, src=user2_id, targ=user1_id, tok=user2_tok) + + self.helper.send(room_id1, "activity after3", tok=user2_tok) + + from_token = self.event_sources.get_current_token() + + self.helper.send(room_id1, "activity after4", tok=user2_tok) + + # Make the Sliding Sync request + channel = self.make_request( + "POST", + self.sync_endpoint + + f"?pos={self.get_success( + from_token.to_string(self.store) + )}", + { + "lists": { + "foo-list": { + "ranges": [[0, 1]], + "required_state": [], + "timeline_limit": 4, + } + } + }, + access_token=user1_tok, + ) + self.assertEqual(channel.code, 200, channel.json_body) + + # Nothing to see for this banned user in the room in the token range + self.assertEqual( + channel.json_body["rooms"][room_id1]["timeline"], + [], + channel.json_body["rooms"][room_id1]["timeline"], + ) + # No events returned in the timeline so nothing is "live" + self.assertEqual( + channel.json_body["rooms"][room_id1]["num_live"], + 0, + channel.json_body["rooms"][room_id1], + ) + # There aren't anymore events to paginate to in this range + self.assertEqual( + channel.json_body["rooms"][room_id1]["limited"], + False, + channel.json_body["rooms"][room_id1], + ) diff --git a/tests/rest/client/utils.py b/tests/rest/client/utils.py index f0ba40a1f1..e43140720d 100644 --- a/tests/rest/client/utils.py +++ b/tests/rest/client/utils.py @@ -261,9 +261,9 @@ class RestHelper: targ: str, expect_code: int = HTTPStatus.OK, tok: Optional[str] = None, - ) -> None: + ) -> JsonDict: """A convenience helper: `change_membership` with `membership` preset to "ban".""" - self.change_membership( + return self.change_membership( room=room, src=src, targ=targ, From d801db0d96ef53e1eaa42c7540f744a56de90b59 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 19 Jun 2024 13:24:01 -0500 Subject: [PATCH 18/62] Fix lints --- tests/rest/client/test_sync.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/tests/rest/client/test_sync.py b/tests/rest/client/test_sync.py index 32542a64e8..6db6f855ba 100644 --- a/tests/rest/client/test_sync.py +++ b/tests/rest/client/test_sync.py @@ -1935,7 +1935,6 @@ class SlidingSyncTestCase(unittest.HomeserverTestCase): channel.json_body["rooms"][room_id1]["invite_state"], ) - def test_rooms_invite_world_readable_history_initial_sync(self) -> None: """ Test that `rooms` we are invited to have some stripped `invite_state` and that @@ -1949,17 +1948,22 @@ class SlidingSyncTestCase(unittest.HomeserverTestCase): user2_tok = self.login(user2_id, "pass") user2 = UserID.from_string(user2_id) - room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok, + room_id1 = self.helper.create_room_as( + user2_id, + tok=user2_tok, extra_content={ "preset": "public_chat", "initial_state": [ { - "content": {"history_visibility": HistoryVisibility.WORLD_READABLE}, + "content": { + "history_visibility": HistoryVisibility.WORLD_READABLE + }, "state_key": "", "type": EventTypes.RoomHistoryVisibility, } ], - },) + }, + ) # Ensure we're testing with a room with `world_readable` history visibility # which means events are visible to anyone even without membership. history_visibility_response = self.helper.get_state( @@ -1972,7 +1976,9 @@ class SlidingSyncTestCase(unittest.HomeserverTestCase): self.helper.send(room_id1, "activity before1", tok=user2_tok) event_response2 = self.helper.send(room_id1, "activity before2", tok=user2_tok) - use1_invite_response = self.helper.invite(room_id1, src=user2_id, targ=user1_id, tok=user2_tok) + use1_invite_response = self.helper.invite( + room_id1, src=user2_id, targ=user1_id, tok=user2_tok + ) event_response3 = self.helper.send(room_id1, "activity after3", tok=user2_tok) event_response4 = self.helper.send(room_id1, "activity after4", tok=user2_tok) From 884b44801253c6b97ae07f958744c8443649153e Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 19 Jun 2024 13:50:28 -0500 Subject: [PATCH 19/62] Update some wording --- synapse/handlers/sliding_sync.py | 6 +++--- tests/rest/client/test_sync.py | 12 ++++++------ 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index 0d2f4dbfff..3e49054e43 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -264,7 +264,7 @@ class SlidingSyncHandler: ) ) - # Update the relevant room map + # Take the superset of the `RoomSyncConfig` for each room for room_id in sliced_room_ids: if relevant_room_map.get(room_id) is not None: # Take the highest timeline limit @@ -739,7 +739,7 @@ class SlidingSyncHandler: to_token: StreamToken, ) -> SlidingSyncResult.RoomResult: """ - Fetch room data for a room. + Fetch room data for the sync response. We fetch data according to the token range (> `from_token` and <= `to_token`). @@ -760,7 +760,7 @@ class SlidingSyncHandler: # We want to start off using the `to_token` (vs `from_token`) because we look # backwards from the `to_token` up to the `timeline_limit` and we might not # reach the `from_token` before we hit the limit. We will update the room stream - # position once we've fetched the events. + # position once we've fetched the events to point to the earliest event fetched. prev_batch_token = to_token if room_sync_config.timeline_limit > 0: newly_joined = False diff --git a/tests/rest/client/test_sync.py b/tests/rest/client/test_sync.py index 3213059a78..a55804c96c 100644 --- a/tests/rest/client/test_sync.py +++ b/tests/rest/client/test_sync.py @@ -1607,7 +1607,7 @@ class SlidingSyncTestCase(unittest.HomeserverTestCase): ) # With no `from_token` (initial sync), it's all historical since there is no - # "current" range + # "live" range self.assertEqual( channel.json_body["rooms"][room_id1]["num_live"], 0, @@ -1674,7 +1674,7 @@ class SlidingSyncTestCase(unittest.HomeserverTestCase): def test_rooms_incremental_sync(self) -> None: """ - Test that `rooms` data during an incremental sync after an initial sync. + Test `rooms` data during an incremental sync after an initial sync. """ user1_id = self.register_user("user1", "pass") user1_tok = self.login(user1_id, "pass") @@ -1889,7 +1889,7 @@ class SlidingSyncTestCase(unittest.HomeserverTestCase): [], channel.json_body["rooms"][room_id1]["timeline"], ) - # No "live" events in a initial sync (no `from_token` to define the "live" + # No "live" events in an initial sync (no `from_token` to define the "live" # range) and no events returned in the timeline anyway so nothing could be # "live". self.assertEqual( @@ -2016,7 +2016,7 @@ class SlidingSyncTestCase(unittest.HomeserverTestCase): ], channel.json_body["rooms"][room_id1]["timeline"], ) - # No "live" events in a initial sync (no `from_token` to define the "live" + # No "live" events in an initial sync (no `from_token` to define the "live" # range) self.assertEqual( channel.json_body["rooms"][room_id1]["num_live"], @@ -2116,7 +2116,7 @@ class SlidingSyncTestCase(unittest.HomeserverTestCase): ], channel.json_body["rooms"][room_id1]["timeline"], ) - # No "live" events in a initial sync (no `from_token` to define the "live" + # No "live" events in an initial sync (no `from_token` to define the "live" # range) self.assertEqual( channel.json_body["rooms"][room_id1]["num_live"], @@ -2206,7 +2206,7 @@ class SlidingSyncTestCase(unittest.HomeserverTestCase): def test_rooms_ban_incremental_sync2(self) -> None: """ - Test that `rooms` we are banned from before the incremental sync doesn't return + Test that `rooms` we are banned from before the incremental sync don't return any events in the timeline. """ user1_id = self.register_user("user1", "pass") From 0eb029472e5410b780156f12db13434b003f42ae Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 20 Jun 2024 14:34:10 -0500 Subject: [PATCH 20/62] Remove unused `IncludeOldRooms` class --- synapse/types/rest/client/__init__.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/synapse/types/rest/client/__init__.py b/synapse/types/rest/client/__init__.py index 25fbd772f6..5d453769b5 100644 --- a/synapse/types/rest/client/__init__.py +++ b/synapse/types/rest/client/__init__.py @@ -154,10 +154,6 @@ class SlidingSyncBody(RequestBodyModel): (Max 1000 messages) """ - class IncludeOldRooms(RequestBodyModel): - timeline_limit: StrictInt - required_state: List[Tuple[StrictStr, StrictStr]] - required_state: List[Tuple[StrictStr, StrictStr]] # mypy workaround via https://github.com/pydantic/pydantic/issues/156#issuecomment-1130883884 if TYPE_CHECKING: From 87fac19fdebd070b09a7a7daae7217ccaa2f2d1e Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 24 Jun 2024 10:15:15 -0500 Subject: [PATCH 21/62] Fix lints See https://github.com/element-hq/synapse/pull/17320#discussion_r1647701997 ``` synapse/federation/federation_server.py:677: error: Cannot determine type of "_join_rate_per_room_limiter" [has-type] synapse/federation/federation_server.py:720: error: Cannot determine type of "_join_rate_per_room_limiter" [has-type] ``` --- synapse/types/handlers/__init__.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/synapse/types/handlers/__init__.py b/synapse/types/handlers/__init__.py index 1ba5ea55c1..8e097d8b48 100644 --- a/synapse/types/handlers/__init__.py +++ b/synapse/types/handlers/__init__.py @@ -31,10 +31,12 @@ else: from pydantic import Extra from synapse.events import EventBase -from synapse.handlers.relations import BundledAggregations from synapse.types import JsonDict, JsonMapping, StreamToken, UserID from synapse.types.rest.client import SlidingSyncBody +if TYPE_CHECKING: + from synapse.handlers.relations import BundledAggregations + class ShutdownRoomParams(TypedDict): """ @@ -197,7 +199,7 @@ class SlidingSyncResult: initial: bool required_state: List[EventBase] timeline_events: List[EventBase] - bundled_aggregations: Optional[Dict[str, BundledAggregations]] + bundled_aggregations: Optional[Dict[str, "BundledAggregations"]] is_dm: bool stripped_state: Optional[List[JsonDict]] prev_batch: StreamToken From 0e71a2f2d1231603d4643f9402dbd7b4f4df226b Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 24 Jun 2024 15:56:27 -0500 Subject: [PATCH 22/62] Add TODO for filtering call invites in public rooms --- synapse/handlers/sliding_sync.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index 3e49054e43..a6e84cb976 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -841,6 +841,8 @@ class SlidingSyncHandler: != Membership.JOIN, filter_send_to_client=True, ) + # TODO: Filter out `EventTypes.CallInvite` in public rooms, + # see https://github.com/element-hq/synapse/pull/16908#discussion_r1651598029 # Determine how many "live" events we have (events within the given token range). # From 21ca02c5ad2b030f3a3d76526690b23f40ef9412 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 24 Jun 2024 16:08:58 -0500 Subject: [PATCH 23/62] `newly_joined` vs `limited` already being tracked in a discussion See https://github.com/element-hq/synapse/pull/17320#discussion_r1646579623 if anything comes out of it. --- synapse/handlers/sliding_sync.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index a6e84cb976..4d73134e7f 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -829,9 +829,6 @@ class SlidingSyncHandler: stream=timeline_events[0].internal_metadata.stream_ordering - 1 ) - # TODO: Does `newly_joined` affect `limited`? It does in sync v2 but I fail - # to understand why. - # Make sure we don't expose any events that the client shouldn't see timeline_events = await filter_events_for_client( self.storage_controllers, From 35683119890e06bb65bca24e303154acb4f62a1b Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 24 Jun 2024 19:08:18 -0500 Subject: [PATCH 24/62] Fix spelling typo --- synapse/handlers/sliding_sync.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index 4d73134e7f..d5390e8945 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -535,7 +535,7 @@ class SlidingSyncHandler: # 2) ----------------------------------------------------- # We fix-up newly_left rooms after the first fixup because it may have removed - # some left rooms that we can figure out our newly_left in the following code + # some left rooms that we can figure out are newly_left in the following code # 2) Fetch membership changes that fall in the range from `from_token` up to `to_token` membership_change_events_in_from_to_range = [] From 7aea406c22066f061cf537ed25d0dbb00a107308 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 25 Jun 2024 11:18:27 -0500 Subject: [PATCH 25/62] Just stripped_state for invite rooms --- synapse/handlers/sliding_sync.py | 27 ++-- synapse/rest/client/sync.py | 57 ++++++--- synapse/types/handlers/__init__.py | 15 ++- tests/rest/client/test_sync.py | 192 ++++++++++++++++++++++------- 4 files changed, 210 insertions(+), 81 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index d5390e8945..991d32356e 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -755,14 +755,23 @@ class SlidingSyncHandler: """ # Assemble the list of timeline events - timeline_events: List[EventBase] = [] - limited = False - # We want to start off using the `to_token` (vs `from_token`) because we look - # backwards from the `to_token` up to the `timeline_limit` and we might not - # reach the `from_token` before we hit the limit. We will update the room stream - # position once we've fetched the events to point to the earliest event fetched. - prev_batch_token = to_token - if room_sync_config.timeline_limit > 0: + timeline_events: Optional[List[EventBase]] = None + limited: Optional[bool] = None + prev_batch_token: Optional[StreamToken] = None + num_live: Optional[int] = None + if ( + room_sync_config.timeline_limit > 0 + # No timeline for invite/knock rooms (just `stripped_state`) + and rooms_for_user_membership_at_to_token.membership + not in (Membership.INVITE, Membership.KNOCK) + ): + limited = False + # We want to start off using the `to_token` (vs `from_token`) because we look + # backwards from the `to_token` up to the `timeline_limit` and we might not + # reach the `from_token` before we hit the limit. We will update the room stream + # position once we've fetched the events to point to the earliest event fetched. + prev_batch_token = to_token + newly_joined = False if ( # We can only determine new-ness if we have a `from_token` to define our range @@ -903,7 +912,7 @@ class SlidingSyncHandler: # If the timeline is `limited=True`, the client does not have all events # necessary to calculate aggregations themselves. bundled_aggregations = None - if limited: + if limited and timeline_events is not None: bundled_aggregations = ( await self.relations_handler.get_bundled_aggregations( timeline_events, user.to_string() diff --git a/synapse/rest/client/sync.py b/synapse/rest/client/sync.py index b60af6356a..1d955a2e89 100644 --- a/synapse/rest/client/sync.py +++ b/synapse/rest/client/sync.py @@ -973,31 +973,13 @@ class SlidingSyncRestServlet(RestServlet): requester=requester, ) - serialized_rooms = {} + serialized_rooms: Dict[str, JsonDict] = {} for room_id, room_result in rooms.items(): - serialized_timeline = await self.event_serializer.serialize_events( - room_result.timeline_events, - time_now, - config=serialize_options, - bundle_aggregations=room_result.bundled_aggregations, - ) - - serialized_required_state = await self.event_serializer.serialize_events( - room_result.required_state, - time_now, - config=serialize_options, - ) - serialized_rooms[room_id] = { - "required_state": serialized_required_state, - "timeline": serialized_timeline, - "prev_batch": await room_result.prev_batch.to_string(self.store), - "limited": room_result.limited, "joined_count": room_result.joined_count, "invited_count": room_result.invited_count, "notification_count": room_result.notification_count, "highlight_count": room_result.highlight_count, - "num_live": room_result.num_live, } if room_result.name: @@ -1014,12 +996,47 @@ class SlidingSyncRestServlet(RestServlet): if room_result.initial: serialized_rooms[room_id]["initial"] = room_result.initial + # This will omitted for invite/knock rooms with `stripped_state` + if room_result.required_state is not None: + serialized_required_state = ( + await self.event_serializer.serialize_events( + room_result.required_state, + time_now, + config=serialize_options, + ) + ) + serialized_rooms[room_id]["required_state"] = serialized_required_state + + # This will omitted for invite/knock rooms with `stripped_state` + if room_result.timeline_events is not None: + serialized_timeline = await self.event_serializer.serialize_events( + room_result.timeline_events, + time_now, + config=serialize_options, + bundle_aggregations=room_result.bundled_aggregations, + ) + serialized_rooms[room_id]["timeline"] = serialized_timeline + + # This will omitted for invite/knock rooms with `stripped_state` + if room_result.limited is not None: + serialized_rooms[room_id]["limited"] = room_result.limited + + # This will omitted for invite/knock rooms with `stripped_state` + if room_result.prev_batch is not None: + serialized_rooms[room_id]["prev_batch"] = ( + await room_result.prev_batch.to_string(self.store) + ) + + # This will omitted for invite/knock rooms with `stripped_state` + if room_result.num_live is not None: + serialized_rooms[room_id]["num_live"] = room_result.num_live + # Field should be absent on non-DM rooms if room_result.is_dm: serialized_rooms[room_id]["is_dm"] = room_result.is_dm # Stripped state only applies to invite/knock rooms - if room_result.stripped_state: + if room_result.stripped_state is not None: # TODO: `knocked_state` but that isn't specced yet. # # TODO: Instead of adding `knocked_state`, it would be good to rename diff --git a/synapse/types/handlers/__init__.py b/synapse/types/handlers/__init__.py index 8e097d8b48..d50d02bfc6 100644 --- a/synapse/types/handlers/__init__.py +++ b/synapse/types/handlers/__init__.py @@ -197,18 +197,23 @@ class SlidingSyncResult: avatar: Optional[str] heroes: Optional[List[EventBase]] initial: bool - required_state: List[EventBase] - timeline_events: List[EventBase] + # Only optional because it won't be included for invite/knock rooms with `stripped_state` + required_state: Optional[List[EventBase]] + # Only optional because it won't be included for invite/knock rooms with `stripped_state` + timeline_events: Optional[List[EventBase]] bundled_aggregations: Optional[Dict[str, "BundledAggregations"]] is_dm: bool stripped_state: Optional[List[JsonDict]] - prev_batch: StreamToken - limited: bool + # Only optional because it won't be included for invite/knock rooms with `stripped_state` + prev_batch: Optional[StreamToken] + # Only optional because it won't be included for invite/knock rooms with `stripped_state` + limited: Optional[bool] joined_count: int invited_count: int notification_count: int highlight_count: int - num_live: int + # Only optional because it won't be included for invite/knock rooms with `stripped_state` + num_live: Optional[int] @attr.s(slots=True, frozen=True, auto_attribs=True) class SlidingWindowList: diff --git a/tests/rest/client/test_sync.py b/tests/rest/client/test_sync.py index a55804c96c..ad6b29b412 100644 --- a/tests/rest/client/test_sync.py +++ b/tests/rest/client/test_sync.py @@ -1881,27 +1881,134 @@ class SlidingSyncTestCase(unittest.HomeserverTestCase): ) self.assertEqual(channel.code, 200, channel.json_body) - # Should not see anything (except maybe the invite event) because we haven't - # joined yet (history visibility is `shared`) (`filter_events_for_client(...)` - # is doing the work here) - self.assertEqual( - channel.json_body["rooms"][room_id1]["timeline"], - [], - channel.json_body["rooms"][room_id1]["timeline"], - ) - # No "live" events in an initial sync (no `from_token` to define the "live" - # range) and no events returned in the timeline anyway so nothing could be - # "live". - self.assertEqual( - channel.json_body["rooms"][room_id1]["num_live"], - 0, + # `timeline` is omitted for `invite` rooms with `stripped_state` + self.assertIsNone( + channel.json_body["rooms"][room_id1].get("timeline"), channel.json_body["rooms"][room_id1], ) - # Even though we don't get any timeline events because they are filtered out, - # there is still more to paginate + # `num_live` is omitted for `invite` rooms with `stripped_state` (no timeline anyway) + self.assertIsNone( + channel.json_body["rooms"][room_id1].get("num_live"), + channel.json_body["rooms"][room_id1], + ) + # `limited` is omitted for `invite` rooms with `stripped_state` (no timeline anyway) + self.assertIsNone( + channel.json_body["rooms"][room_id1].get("limited"), + channel.json_body["rooms"][room_id1], + ) + # `prev_batch` is omitted for `invite` rooms with `stripped_state` (no timeline anyway) + self.assertIsNone( + channel.json_body["rooms"][room_id1].get("prev_batch"), + channel.json_body["rooms"][room_id1], + ) + # We should have some `stripped_state` so the potential joiner can identify the + # room (we don't care about the order). + self.assertCountEqual( + channel.json_body["rooms"][room_id1]["invite_state"], + [ + { + "content": {"creator": user2_id, "room_version": "10"}, + "sender": user2_id, + "state_key": "", + "type": "m.room.create", + }, + { + "content": {"join_rule": "public"}, + "sender": user2_id, + "state_key": "", + "type": "m.room.join_rules", + }, + { + "content": {"displayname": user2.localpart, "membership": "join"}, + "sender": user2_id, + "state_key": user2_id, + "type": "m.room.member", + }, + { + "content": {"displayname": user1.localpart, "membership": "invite"}, + "sender": user2_id, + "state_key": user1_id, + "type": "m.room.member", + }, + ], + channel.json_body["rooms"][room_id1]["invite_state"], + ) + + def test_rooms_invite_shared_history_incremental_sync(self) -> None: + """ + Test that `rooms` we are invited to have some stripped `invite_state` + + This is an `invite` room so we should only have `stripped_state` (no timeline) + but we also shouldn't see any timeline events because the history visiblity is + `shared` and we haven't joined the room yet. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user1 = UserID.from_string(user1_id) + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + user2 = UserID.from_string(user2_id) + + room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok) + # Ensure we're testing with a room with `shared` history visibility which means + # history visible until you actually join the room. + history_visibility_response = self.helper.get_state( + room_id1, EventTypes.RoomHistoryVisibility, tok=user2_tok + ) self.assertEqual( - channel.json_body["rooms"][room_id1]["limited"], - True, + history_visibility_response.get("history_visibility"), + HistoryVisibility.SHARED, + ) + + self.helper.send(room_id1, "activity before invite1", tok=user2_tok) + self.helper.send(room_id1, "activity before invite2", tok=user2_tok) + self.helper.invite(room_id1, src=user2_id, targ=user1_id, tok=user2_tok) + self.helper.send(room_id1, "activity after invite3", tok=user2_tok) + self.helper.send(room_id1, "activity after invite4", tok=user2_tok) + + from_token = self.event_sources.get_current_token() + + self.helper.send(room_id1, "activity after token5", tok=user2_tok) + self.helper.send(room_id1, "activity after toekn6", tok=user2_tok) + + # Make the Sliding Sync request + channel = self.make_request( + "POST", + self.sync_endpoint + + f"?pos={self.get_success( + from_token.to_string(self.store) + )}", + { + "lists": { + "foo-list": { + "ranges": [[0, 1]], + "required_state": [], + "timeline_limit": 3, + } + } + }, + access_token=user1_tok, + ) + self.assertEqual(channel.code, 200, channel.json_body) + + # `timeline` is omitted for `invite` rooms with `stripped_state` + self.assertIsNone( + channel.json_body["rooms"][room_id1].get("timeline"), + channel.json_body["rooms"][room_id1], + ) + # `num_live` is omitted for `invite` rooms with `stripped_state` (no timeline anyway) + self.assertIsNone( + channel.json_body["rooms"][room_id1].get("num_live"), + channel.json_body["rooms"][room_id1], + ) + # `limited` is omitted for `invite` rooms with `stripped_state` (no timeline anyway) + self.assertIsNone( + channel.json_body["rooms"][room_id1].get("limited"), + channel.json_body["rooms"][room_id1], + ) + # `prev_batch` is omitted for `invite` rooms with `stripped_state` (no timeline anyway) + self.assertIsNone( + channel.json_body["rooms"][room_id1].get("prev_batch"), channel.json_body["rooms"][room_id1], ) # We should have some `stripped_state` so the potential joiner can identify the @@ -1977,12 +2084,10 @@ class SlidingSyncTestCase(unittest.HomeserverTestCase): ) self.helper.send(room_id1, "activity before1", tok=user2_tok) - event_response2 = self.helper.send(room_id1, "activity before2", tok=user2_tok) - use1_invite_response = self.helper.invite( - room_id1, src=user2_id, targ=user1_id, tok=user2_tok - ) - event_response3 = self.helper.send(room_id1, "activity after3", tok=user2_tok) - event_response4 = self.helper.send(room_id1, "activity after4", tok=user2_tok) + self.helper.send(room_id1, "activity before2", tok=user2_tok) + self.helper.invite(room_id1, src=user2_id, targ=user1_id, tok=user2_tok) + self.helper.send(room_id1, "activity after3", tok=user2_tok) + self.helper.send(room_id1, "activity after4", tok=user2_tok) # Make the Sliding Sync request channel = self.make_request( @@ -2002,31 +2107,24 @@ class SlidingSyncTestCase(unittest.HomeserverTestCase): ) self.assertEqual(channel.code, 200, channel.json_body) - # Should see the last 4 events in the room - self.assertEqual( - [ - event["event_id"] - for event in channel.json_body["rooms"][room_id1]["timeline"] - ], - [ - event_response2["event_id"], - use1_invite_response["event_id"], - event_response3["event_id"], - event_response4["event_id"], - ], - channel.json_body["rooms"][room_id1]["timeline"], - ) - # No "live" events in an initial sync (no `from_token` to define the "live" - # range) - self.assertEqual( - channel.json_body["rooms"][room_id1]["num_live"], - 0, + # `timeline` is omitted for `invite` rooms with `stripped_state` + self.assertIsNone( + channel.json_body["rooms"][room_id1].get("timeline"), channel.json_body["rooms"][room_id1], ) - # There is still more to paginate - self.assertEqual( - channel.json_body["rooms"][room_id1]["limited"], - True, + # `num_live` is omitted for `invite` rooms with `stripped_state` (no timeline anyway) + self.assertIsNone( + channel.json_body["rooms"][room_id1].get("num_live"), + channel.json_body["rooms"][room_id1], + ) + # `limited` is omitted for `invite` rooms with `stripped_state` (no timeline anyway) + self.assertIsNone( + channel.json_body["rooms"][room_id1].get("limited"), + channel.json_body["rooms"][room_id1], + ) + # `prev_batch` is omitted for `invite` rooms with `stripped_state` (no timeline anyway) + self.assertIsNone( + channel.json_body["rooms"][room_id1].get("prev_batch"), channel.json_body["rooms"][room_id1], ) # We should have some `stripped_state` so the potential joiner can identify the From e3e431fab4ba821b62558ebdffb5bbad2fcc6da3 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 25 Jun 2024 12:35:48 -0500 Subject: [PATCH 26/62] Finish up stripped_state for invite rooms See https://github.com/element-hq/synapse/pull/17320#discussion_r1646581077 --- synapse/handlers/sliding_sync.py | 27 ++--- synapse/types/handlers/__init__.py | 1 + tests/rest/client/test_sync.py | 156 +++++++++++++++++++++++++++-- 3 files changed, 162 insertions(+), 22 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index 991d32356e..e781080470 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -26,6 +26,7 @@ from immutabledict import immutabledict from synapse.api.constants import AccountDataTypes, Direction, EventTypes, Membership from synapse.events import EventBase from synapse.events.utils import strip_event +from synapse.handlers.relations import BundledAggregations from synapse.storage.roommember import RoomsForUser from synapse.types import ( JsonDict, @@ -756,6 +757,7 @@ class SlidingSyncHandler: # Assemble the list of timeline events timeline_events: Optional[List[EventBase]] = None + bundled_aggregations: Optional[Dict[str, BundledAggregations]] = None limited: Optional[bool] = None prev_batch_token: Optional[StreamToken] = None num_live: Optional[int] = None @@ -848,7 +850,9 @@ class SlidingSyncHandler: filter_send_to_client=True, ) # TODO: Filter out `EventTypes.CallInvite` in public rooms, - # see https://github.com/element-hq/synapse/pull/16908#discussion_r1651598029 + # see https://github.com/element-hq/synapse/issues/17359 + + # TODO: Handle timeline gaps (`get_timeline_gaps()`) # Determine how many "live" events we have (events within the given token range). # @@ -878,6 +882,15 @@ class SlidingSyncHandler: # this more with a binary search (bisect). break + # If the timeline is `limited=True`, the client does not have all events + # necessary to calculate aggregations themselves. + if limited: + bundled_aggregations = ( + await self.relations_handler.get_bundled_aggregations( + timeline_events, user.to_string() + ) + ) + # Update the `prev_batch_token` to point to the position that allows us to # keep paginating backwards from the oldest event we return in the timeline. prev_batch_token = prev_batch_token.copy_and_replace( @@ -907,18 +920,6 @@ class SlidingSyncHandler: stripped_state.append(strip_event(invite_or_knock_event)) - # TODO: Handle timeline gaps (`get_timeline_gaps()`) - - # If the timeline is `limited=True`, the client does not have all events - # necessary to calculate aggregations themselves. - bundled_aggregations = None - if limited and timeline_events is not None: - bundled_aggregations = ( - await self.relations_handler.get_bundled_aggregations( - timeline_events, user.to_string() - ) - ) - return SlidingSyncResult.RoomResult( # TODO: Dummy value name=None, diff --git a/synapse/types/handlers/__init__.py b/synapse/types/handlers/__init__.py index d50d02bfc6..3cd3c8fb0f 100644 --- a/synapse/types/handlers/__init__.py +++ b/synapse/types/handlers/__init__.py @@ -203,6 +203,7 @@ class SlidingSyncResult: timeline_events: Optional[List[EventBase]] bundled_aggregations: Optional[Dict[str, "BundledAggregations"]] is_dm: bool + # Optional because it's only relevant to invite/knock rooms stripped_state: Optional[List[JsonDict]] # Only optional because it won't be included for invite/knock rooms with `stripped_state` prev_batch: Optional[StreamToken] diff --git a/tests/rest/client/test_sync.py b/tests/rest/client/test_sync.py index ad6b29b412..ba7cae8645 100644 --- a/tests/rest/client/test_sync.py +++ b/tests/rest/client/test_sync.py @@ -1836,9 +1836,12 @@ class SlidingSyncTestCase(unittest.HomeserverTestCase): def test_rooms_invite_shared_history_initial_sync(self) -> None: """ - Test that `rooms` we are invited to have some stripped `invite_state` and that - we can't see any timeline events because the history visiblity is `shared` and - we haven't joined the room yet. + Test that `rooms` we are invited to have some stripped `invite_state` during an + initial sync. + + This is an `invite` room so we should only have `stripped_state` (no `timeline`) + but we also shouldn't see any timeline events because the history visiblity is + `shared` and we haven't joined the room yet. """ user1_id = self.register_user("user1", "pass") user1_tok = self.login(user1_id, "pass") @@ -1936,9 +1939,10 @@ class SlidingSyncTestCase(unittest.HomeserverTestCase): def test_rooms_invite_shared_history_incremental_sync(self) -> None: """ - Test that `rooms` we are invited to have some stripped `invite_state` - - This is an `invite` room so we should only have `stripped_state` (no timeline) + Test that `rooms` we are invited to have some stripped `invite_state` during an + incremental sync. + + This is an `invite` room so we should only have `stripped_state` (no `timeline`) but we also shouldn't see any timeline events because the history visiblity is `shared` and we haven't joined the room yet. """ @@ -2046,9 +2050,14 @@ class SlidingSyncTestCase(unittest.HomeserverTestCase): def test_rooms_invite_world_readable_history_initial_sync(self) -> None: """ - Test that `rooms` we are invited to have some stripped `invite_state` and that - we can't see any timeline events because the history visiblity is `shared` and - we haven't joined the room yet. + Test that `rooms` we are invited to have some stripped `invite_state` during an + initial sync. + + This is an `invite` room so we should only have `stripped_state` (no `timeline`) + but depending on the semantics we decide, we could potentially see some + historical events before/after the `from_token` because the history is + `world_readable`. Same situation for events after the `from_token` if the + history visibility was set to `invited`. """ user1_id = self.register_user("user1", "pass") user1_tok = self.login(user1_id, "pass") @@ -2160,6 +2169,135 @@ class SlidingSyncTestCase(unittest.HomeserverTestCase): channel.json_body["rooms"][room_id1]["invite_state"], ) + def test_rooms_invite_world_readable_history_incremental_sync(self) -> None: + """ + Test that `rooms` we are invited to have some stripped `invite_state` during an + incremental sync. + + This is an `invite` room so we should only have `stripped_state` (no `timeline`) + but depending on the semantics we decide, we could potentially see some + historical events before/after the `from_token` because the history is + `world_readable`. Same situation for events after the `from_token` if the + history visibility was set to `invited`. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user1 = UserID.from_string(user1_id) + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + user2 = UserID.from_string(user2_id) + + room_id1 = self.helper.create_room_as( + user2_id, + tok=user2_tok, + extra_content={ + "preset": "public_chat", + "initial_state": [ + { + "content": { + "history_visibility": HistoryVisibility.WORLD_READABLE + }, + "state_key": "", + "type": EventTypes.RoomHistoryVisibility, + } + ], + }, + ) + # Ensure we're testing with a room with `world_readable` history visibility + # which means events are visible to anyone even without membership. + history_visibility_response = self.helper.get_state( + room_id1, EventTypes.RoomHistoryVisibility, tok=user2_tok + ) + self.assertEqual( + history_visibility_response.get("history_visibility"), + HistoryVisibility.WORLD_READABLE, + ) + + self.helper.send(room_id1, "activity before invite1", tok=user2_tok) + self.helper.send(room_id1, "activity before invite2", tok=user2_tok) + self.helper.invite(room_id1, src=user2_id, targ=user1_id, tok=user2_tok) + self.helper.send(room_id1, "activity after invite3", tok=user2_tok) + self.helper.send(room_id1, "activity after invite4", tok=user2_tok) + + from_token = self.event_sources.get_current_token() + + self.helper.send(room_id1, "activity after token5", tok=user2_tok) + self.helper.send(room_id1, "activity after toekn6", tok=user2_tok) + + # Make the Sliding Sync request + channel = self.make_request( + "POST", + self.sync_endpoint + + f"?pos={self.get_success( + from_token.to_string(self.store) + )}", + { + "lists": { + "foo-list": { + "ranges": [[0, 1]], + "required_state": [], + # Large enough to see the latest events and before the invite + "timeline_limit": 4, + } + } + }, + access_token=user1_tok, + ) + self.assertEqual(channel.code, 200, channel.json_body) + + # `timeline` is omitted for `invite` rooms with `stripped_state` + self.assertIsNone( + channel.json_body["rooms"][room_id1].get("timeline"), + channel.json_body["rooms"][room_id1], + ) + # `num_live` is omitted for `invite` rooms with `stripped_state` (no timeline anyway) + self.assertIsNone( + channel.json_body["rooms"][room_id1].get("num_live"), + channel.json_body["rooms"][room_id1], + ) + # `limited` is omitted for `invite` rooms with `stripped_state` (no timeline anyway) + self.assertIsNone( + channel.json_body["rooms"][room_id1].get("limited"), + channel.json_body["rooms"][room_id1], + ) + # `prev_batch` is omitted for `invite` rooms with `stripped_state` (no timeline anyway) + self.assertIsNone( + channel.json_body["rooms"][room_id1].get("prev_batch"), + channel.json_body["rooms"][room_id1], + ) + # We should have some `stripped_state` so the potential joiner can identify the + # room (we don't care about the order). + self.assertCountEqual( + channel.json_body["rooms"][room_id1]["invite_state"], + [ + { + "content": {"creator": user2_id, "room_version": "10"}, + "sender": user2_id, + "state_key": "", + "type": "m.room.create", + }, + { + "content": {"join_rule": "public"}, + "sender": user2_id, + "state_key": "", + "type": "m.room.join_rules", + }, + { + "content": {"displayname": user2.localpart, "membership": "join"}, + "sender": user2_id, + "state_key": user2_id, + "type": "m.room.member", + }, + { + "content": {"displayname": user1.localpart, "membership": "invite"}, + "sender": user2_id, + "state_key": user1_id, + "type": "m.room.member", + }, + ], + channel.json_body["rooms"][room_id1]["invite_state"], + ) + def test_rooms_ban_initial_sync(self) -> None: """ Test that `rooms` we are banned from in an intial sync only allows us to see From 303d834b78a7c93e390da3f426754cafff07c20f Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 25 Jun 2024 12:38:09 -0500 Subject: [PATCH 27/62] Add tracking discussion for not optional in the future --- synapse/handlers/sliding_sync.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index e781080470..0538fddf84 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -756,6 +756,11 @@ class SlidingSyncHandler: """ # Assemble the list of timeline events + # + # It would be nice to make the `rooms` response more uniform regardless of + # membership. Currently, we have to make all of these optional because + # `invite`/`knock` rooms only have `stripped_state`. See + # https://github.com/matrix-org/matrix-spec-proposals/pull/3575#discussion_r1653045932 timeline_events: Optional[List[EventBase]] = None bundled_aggregations: Optional[Dict[str, BundledAggregations]] = None limited: Optional[bool] = None From 4c2213144258cef2b2ac7960f290649a076d1927 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 25 Jun 2024 15:27:43 -0500 Subject: [PATCH 28/62] Start testing for the correct room membership (failing) --- tests/handlers/test_sliding_sync.py | 477 +++++++++++++++++++++++++--- 1 file changed, 432 insertions(+), 45 deletions(-) diff --git a/tests/handlers/test_sliding_sync.py b/tests/handlers/test_sliding_sync.py index 0358239c7f..df262400e4 100644 --- a/tests/handlers/test_sliding_sync.py +++ b/tests/handlers/test_sliding_sync.py @@ -63,6 +63,7 @@ class GetSyncRoomIdsForUserTestCase(HomeserverTestCase): self.sliding_sync_handler = self.hs.get_sliding_sync_handler() self.store = self.hs.get_datastores().main self.event_sources = hs.get_event_sources() + self.storage_controllers = hs.get_storage_controllers() def test_no_rooms(self) -> None: """ @@ -90,10 +91,13 @@ class GetSyncRoomIdsForUserTestCase(HomeserverTestCase): """ user1_id = self.register_user("user1", "pass") user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") before_room_token = self.event_sources.get_current_token() - room_id = self.helper.create_room_as(user1_id, tok=user1_tok, is_public=True) + room_id = self.helper.create_room_as(user2_id, tok=user2_tok) + join_response = self.helper.join(room_id, user1_id, tok=user1_tok) after_room_token = self.event_sources.get_current_token() @@ -106,6 +110,12 @@ class GetSyncRoomIdsForUserTestCase(HomeserverTestCase): ) self.assertEqual(room_id_results.keys(), {room_id}) + # It should be pointing to the join event (latest membership event in the + # from/to range) + self.assertEqual( + room_id_results[room_id].event_id, + join_response["event_id"], + ) def test_get_already_joined_room(self) -> None: """ @@ -113,8 +123,11 @@ class GetSyncRoomIdsForUserTestCase(HomeserverTestCase): """ user1_id = self.register_user("user1", "pass") user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") - room_id = self.helper.create_room_as(user1_id, tok=user1_tok, is_public=True) + room_id = self.helper.create_room_as(user2_id, tok=user2_tok) + join_response = self.helper.join(room_id, user1_id, tok=user1_tok) after_room_token = self.event_sources.get_current_token() @@ -127,6 +140,12 @@ class GetSyncRoomIdsForUserTestCase(HomeserverTestCase): ) self.assertEqual(room_id_results.keys(), {room_id}) + # It should be pointing to the join event (latest membership event in the + # from/to range) + self.assertEqual( + room_id_results[room_id].event_id, + join_response["event_id"], + ) def test_get_invited_banned_knocked_room(self) -> None: """ @@ -142,14 +161,18 @@ class GetSyncRoomIdsForUserTestCase(HomeserverTestCase): # Setup the invited room (user2 invites user1 to the room) invited_room_id = self.helper.create_room_as(user2_id, tok=user2_tok) - self.helper.invite(invited_room_id, targ=user1_id, tok=user2_tok) + invite_response = self.helper.invite( + invited_room_id, targ=user1_id, tok=user2_tok + ) # Setup the ban room (user2 bans user1 from the room) ban_room_id = self.helper.create_room_as( user2_id, tok=user2_tok, is_public=True ) self.helper.join(ban_room_id, user1_id, tok=user1_tok) - self.helper.ban(ban_room_id, src=user2_id, targ=user1_id, tok=user2_tok) + ban_response = self.helper.ban( + ban_room_id, src=user2_id, targ=user1_id, tok=user2_tok + ) # Setup the knock room (user1 knocks on the room) knock_room_id = self.helper.create_room_as( @@ -162,13 +185,19 @@ class GetSyncRoomIdsForUserTestCase(HomeserverTestCase): tok=user2_tok, ) # User1 knocks on the room - channel = self.make_request( + knock_channel = self.make_request( "POST", "/_matrix/client/r0/knock/%s" % (knock_room_id,), b"{}", user1_tok, ) - self.assertEqual(channel.code, 200, channel.result) + self.assertEqual(knock_channel.code, 200, knock_channel.result) + knock_room_membership_state_event = self.get_success( + self.storage_controllers.state.get_current_state_event( + knock_room_id, EventTypes.Member, user1_id + ) + ) + assert knock_room_membership_state_event is not None after_room_token = self.event_sources.get_current_token() @@ -189,6 +218,20 @@ class GetSyncRoomIdsForUserTestCase(HomeserverTestCase): knock_room_id, }, ) + # It should be pointing to the the respective membership event (latest + # membership event in the from/to range) + self.assertEqual( + room_id_results[invited_room_id].event_id, + invite_response["event_id"], + ) + self.assertEqual( + room_id_results[ban_room_id].event_id, + ban_response["event_id"], + ) + self.assertEqual( + room_id_results[knock_room_id].event_id, + knock_room_membership_state_event.event_id, + ) def test_get_kicked_room(self) -> None: """ @@ -206,7 +249,7 @@ class GetSyncRoomIdsForUserTestCase(HomeserverTestCase): ) self.helper.join(kick_room_id, user1_id, tok=user1_tok) # Kick user1 from the room - self.helper.change_membership( + kick_response = self.helper.change_membership( room=kick_room_id, src=user2_id, targ=user1_id, @@ -229,6 +272,11 @@ class GetSyncRoomIdsForUserTestCase(HomeserverTestCase): # The kicked room should show up self.assertEqual(room_id_results.keys(), {kick_room_id}) + # It should be pointing to the latest membership event in the from/to range + self.assertEqual( + room_id_results[kick_room_id].event_id, + kick_response["event_id"], + ) def test_forgotten_rooms(self) -> None: """ @@ -329,7 +377,7 @@ class GetSyncRoomIdsForUserTestCase(HomeserverTestCase): # Leave during the from_token/to_token range (newly_left) room_id2 = self.helper.create_room_as(user1_id, tok=user1_tok) - self.helper.leave(room_id2, user1_id, tok=user1_tok) + leave_response = self.helper.leave(room_id2, user1_id, tok=user1_tok) after_room2_token = self.event_sources.get_current_token() @@ -343,6 +391,11 @@ class GetSyncRoomIdsForUserTestCase(HomeserverTestCase): # Only the newly_left room should show up self.assertEqual(room_id_results.keys(), {room_id2}) + # It should be pointing to the latest membership event in the from/to range + self.assertEqual( + room_id_results[room_id2].event_id, + leave_response["event_id"], + ) def test_no_joins_after_to_token(self) -> None: """ @@ -351,16 +404,19 @@ class GetSyncRoomIdsForUserTestCase(HomeserverTestCase): """ user1_id = self.register_user("user1", "pass") user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") before_room1_token = self.event_sources.get_current_token() - room_id1 = self.helper.create_room_as(user1_id, tok=user1_tok) + room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok) + join_response1 = self.helper.join(room_id1, user1_id, tok=user1_tok) after_room1_token = self.event_sources.get_current_token() - # Room join after after our `to_token` shouldn't show up - room_id2 = self.helper.create_room_as(user1_id, tok=user1_tok) - _ = room_id2 + # Room join after our `to_token` shouldn't show up + room_id2 = self.helper.create_room_as(user2_id, tok=user2_tok) + self.helper.join(room_id2, user1_id, tok=user1_tok) room_id_results = self.get_success( self.sliding_sync_handler.get_sync_room_ids_for_user( @@ -371,6 +427,11 @@ class GetSyncRoomIdsForUserTestCase(HomeserverTestCase): ) self.assertEqual(room_id_results.keys(), {room_id1}) + # It should be pointing to the latest membership event in the from/to range + self.assertEqual( + room_id_results[room_id1].event_id, + join_response1["event_id"], + ) def test_join_during_range_and_left_room_after_to_token(self) -> None: """ @@ -380,15 +441,18 @@ class GetSyncRoomIdsForUserTestCase(HomeserverTestCase): """ user1_id = self.register_user("user1", "pass") user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") before_room1_token = self.event_sources.get_current_token() - room_id1 = self.helper.create_room_as(user1_id, tok=user1_tok) + room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok) + join_response = self.helper.join(room_id1, user1_id, tok=user1_tok) after_room1_token = self.event_sources.get_current_token() # Leave the room after we already have our tokens - self.helper.leave(room_id1, user1_id, tok=user1_tok) + leave_response = self.helper.leave(room_id1, user1_id, tok=user1_tok) room_id_results = self.get_success( self.sliding_sync_handler.get_sync_room_ids_for_user( @@ -401,6 +465,18 @@ class GetSyncRoomIdsForUserTestCase(HomeserverTestCase): # We should still see the room because we were joined during the # from_token/to_token time period. self.assertEqual(room_id_results.keys(), {room_id1}) + # It should be pointing to the latest membership event in the from/to range + self.assertEqual( + room_id_results[room_id1].event_id, + join_response["event_id"], + "Corresponding map to disambiguate the opaque event IDs: " + + str( + { + "join_response": join_response["event_id"], + "leave_response": leave_response["event_id"], + } + ), + ) def test_join_before_range_and_left_room_after_to_token(self) -> None: """ @@ -410,13 +486,16 @@ class GetSyncRoomIdsForUserTestCase(HomeserverTestCase): """ user1_id = self.register_user("user1", "pass") user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") - room_id1 = self.helper.create_room_as(user1_id, tok=user1_tok) + room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok) + join_response = self.helper.join(room_id1, user1_id, tok=user1_tok) after_room1_token = self.event_sources.get_current_token() # Leave the room after we already have our tokens - self.helper.leave(room_id1, user1_id, tok=user1_tok) + leave_response = self.helper.leave(room_id1, user1_id, tok=user1_tok) room_id_results = self.get_success( self.sliding_sync_handler.get_sync_room_ids_for_user( @@ -428,6 +507,18 @@ class GetSyncRoomIdsForUserTestCase(HomeserverTestCase): # We should still see the room because we were joined before the `from_token` self.assertEqual(room_id_results.keys(), {room_id1}) + # It should be pointing to the latest membership event in the from/to range + self.assertEqual( + room_id_results[room_id1].event_id, + join_response["event_id"], + "Corresponding map to disambiguate the opaque event IDs: " + + str( + { + "join_response": join_response["event_id"], + "leave_response": leave_response["event_id"], + } + ), + ) def test_kicked_before_range_and_left_after_to_token(self) -> None: """ @@ -444,9 +535,9 @@ class GetSyncRoomIdsForUserTestCase(HomeserverTestCase): kick_room_id = self.helper.create_room_as( user2_id, tok=user2_tok, is_public=True ) - self.helper.join(kick_room_id, user1_id, tok=user1_tok) + join_response1 = self.helper.join(kick_room_id, user1_id, tok=user1_tok) # Kick user1 from the room - self.helper.change_membership( + kick_response = self.helper.change_membership( room=kick_room_id, src=user2_id, targ=user1_id, @@ -463,8 +554,8 @@ class GetSyncRoomIdsForUserTestCase(HomeserverTestCase): # # We have to join before we can leave (leave -> leave isn't a valid transition # or at least it doesn't work in Synapse, 403 forbidden) - self.helper.join(kick_room_id, user1_id, tok=user1_tok) - self.helper.leave(kick_room_id, user1_id, tok=user1_tok) + join_response2 = self.helper.join(kick_room_id, user1_id, tok=user1_tok) + leave_response = self.helper.leave(kick_room_id, user1_id, tok=user1_tok) room_id_results = self.get_success( self.sliding_sync_handler.get_sync_room_ids_for_user( @@ -476,6 +567,20 @@ class GetSyncRoomIdsForUserTestCase(HomeserverTestCase): # We shouldn't see the room because it was forgotten self.assertEqual(room_id_results.keys(), {kick_room_id}) + # It should be pointing to the latest membership event in the from/to range + self.assertEqual( + room_id_results[kick_room_id].event_id, + kick_response["event_id"], + "Corresponding map to disambiguate the opaque event IDs: " + + str( + { + "join_response1": join_response1["event_id"], + "kick_response": kick_response["event_id"], + "join_response2": join_response2["event_id"], + "leave_response": leave_response["event_id"], + } + ), + ) def test_newly_left_during_range_and_join_leave_after_to_token(self) -> None: """ @@ -494,14 +599,14 @@ class GetSyncRoomIdsForUserTestCase(HomeserverTestCase): # leave and can still re-join. room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok, is_public=True) # Join and leave the room during the from/to range - self.helper.join(room_id1, user1_id, tok=user1_tok) - self.helper.leave(room_id1, user1_id, tok=user1_tok) + join_response1 = self.helper.join(room_id1, user1_id, tok=user1_tok) + leave_response1 = self.helper.leave(room_id1, user1_id, tok=user1_tok) after_room1_token = self.event_sources.get_current_token() # Join and leave the room after we already have our tokens - self.helper.join(room_id1, user1_id, tok=user1_tok) - self.helper.leave(room_id1, user1_id, tok=user1_tok) + join_response2 = self.helper.join(room_id1, user1_id, tok=user1_tok) + leave_response2 = self.helper.leave(room_id1, user1_id, tok=user1_tok) room_id_results = self.get_success( self.sliding_sync_handler.get_sync_room_ids_for_user( @@ -513,6 +618,20 @@ class GetSyncRoomIdsForUserTestCase(HomeserverTestCase): # Room should still show up because it's newly_left during the from/to range self.assertEqual(room_id_results.keys(), {room_id1}) + # It should be pointing to the latest membership event in the from/to range + self.assertEqual( + room_id_results[room_id1].event_id, + leave_response1["event_id"], + "Corresponding map to disambiguate the opaque event IDs: " + + str( + { + "join_response1": join_response1["event_id"], + "leave_response1": leave_response1["event_id"], + "join_response2": join_response2["event_id"], + "leave_response2": leave_response2["event_id"], + } + ), + ) def test_newly_left_during_range_and_join_after_to_token(self) -> None: """ @@ -531,13 +650,13 @@ class GetSyncRoomIdsForUserTestCase(HomeserverTestCase): # leave and can still re-join. room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok, is_public=True) # Join and leave the room during the from/to range - self.helper.join(room_id1, user1_id, tok=user1_tok) - self.helper.leave(room_id1, user1_id, tok=user1_tok) + join_response1 = self.helper.join(room_id1, user1_id, tok=user1_tok) + leave_response1 = self.helper.leave(room_id1, user1_id, tok=user1_tok) after_room1_token = self.event_sources.get_current_token() # Join the room after we already have our tokens - self.helper.join(room_id1, user1_id, tok=user1_tok) + join_response2 = self.helper.join(room_id1, user1_id, tok=user1_tok) room_id_results = self.get_success( self.sliding_sync_handler.get_sync_room_ids_for_user( @@ -549,11 +668,24 @@ class GetSyncRoomIdsForUserTestCase(HomeserverTestCase): # Room should still show up because it's newly_left during the from/to range self.assertEqual(room_id_results.keys(), {room_id1}) + # It should be pointing to the latest membership event in the from/to range + self.assertEqual( + room_id_results[room_id1].event_id, + leave_response1["event_id"], + "Corresponding map to disambiguate the opaque event IDs: " + + str( + { + "join_response1": join_response1["event_id"], + "leave_response1": leave_response1["event_id"], + "join_response2": join_response2["event_id"], + } + ), + ) def test_no_from_token(self) -> None: """ Test that if we don't provide a `from_token`, we get all the rooms that we we're - joined to up to the `to_token`. + joined up to the `to_token`. Providing `from_token` only really has the effect that it adds `newly_left` rooms to the response. @@ -569,7 +701,7 @@ class GetSyncRoomIdsForUserTestCase(HomeserverTestCase): room_id2 = self.helper.create_room_as(user2_id, tok=user2_tok, is_public=True) # Join room1 - self.helper.join(room_id1, user1_id, tok=user1_tok) + join_response1 = self.helper.join(room_id1, user1_id, tok=user1_tok) # Join and leave the room2 before the `to_token` self.helper.join(room_id2, user1_id, tok=user1_tok) @@ -590,6 +722,11 @@ class GetSyncRoomIdsForUserTestCase(HomeserverTestCase): # Only rooms we were joined to before the `to_token` should show up self.assertEqual(room_id_results.keys(), {room_id1}) + # It should be pointing to the latest membership event in the from/to range + self.assertEqual( + room_id_results[room_id1].event_id, + join_response1["event_id"], + ) def test_from_token_ahead_of_to_token(self) -> None: """ @@ -609,7 +746,7 @@ class GetSyncRoomIdsForUserTestCase(HomeserverTestCase): room_id4 = self.helper.create_room_as(user2_id, tok=user2_tok, is_public=True) # Join room1 before `before_room_token` - self.helper.join(room_id1, user1_id, tok=user1_tok) + join_response1 = self.helper.join(room_id1, user1_id, tok=user1_tok) # Join and leave the room2 before `before_room_token` self.helper.join(room_id2, user1_id, tok=user1_tok) @@ -651,6 +788,11 @@ class GetSyncRoomIdsForUserTestCase(HomeserverTestCase): # There won't be any newly_left rooms because the `from_token` is ahead of the # `to_token` and that range will give no membership changes to check. self.assertEqual(room_id_results.keys(), {room_id1}) + # It should be pointing to the latest membership event in the from/to range + self.assertEqual( + room_id_results[room_id1].event_id, + join_response1["event_id"], + ) def test_leave_before_range_and_join_leave_after_to_token(self) -> None: """ @@ -741,16 +883,16 @@ class GetSyncRoomIdsForUserTestCase(HomeserverTestCase): # leave and can still re-join. room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok, is_public=True) # Join, leave, join back to the room before the from/to range - self.helper.join(room_id1, user1_id, tok=user1_tok) - self.helper.leave(room_id1, user1_id, tok=user1_tok) - self.helper.join(room_id1, user1_id, tok=user1_tok) + join_response1 = self.helper.join(room_id1, user1_id, tok=user1_tok) + leave_response1 = self.helper.leave(room_id1, user1_id, tok=user1_tok) + join_response2 = self.helper.join(room_id1, user1_id, tok=user1_tok) after_room1_token = self.event_sources.get_current_token() # Leave and Join the room multiple times after we already have our tokens - self.helper.leave(room_id1, user1_id, tok=user1_tok) - self.helper.join(room_id1, user1_id, tok=user1_tok) - self.helper.leave(room_id1, user1_id, tok=user1_tok) + leave_response2 = self.helper.leave(room_id1, user1_id, tok=user1_tok) + join_response3 = self.helper.join(room_id1, user1_id, tok=user1_tok) + leave_response3 = self.helper.leave(room_id1, user1_id, tok=user1_tok) room_id_results = self.get_success( self.sliding_sync_handler.get_sync_room_ids_for_user( @@ -762,6 +904,22 @@ class GetSyncRoomIdsForUserTestCase(HomeserverTestCase): # Room should show up because it was newly_left and joined during the from/to range self.assertEqual(room_id_results.keys(), {room_id1}) + # It should be pointing to the latest membership event in the from/to range + self.assertEqual( + room_id_results[room_id1].event_id, + join_response2["event_id"], + "Corresponding map to disambiguate the opaque event IDs: " + + str( + { + "join_response1": join_response1["event_id"], + "leave_response1": leave_response1["event_id"], + "join_response2": join_response2["event_id"], + "leave_response2": leave_response2["event_id"], + "join_response3": join_response3["event_id"], + "leave_response3": leave_response3["event_id"], + } + ), + ) def test_join_leave_multiple_times_before_range_and_after_to_token( self, @@ -781,16 +939,16 @@ class GetSyncRoomIdsForUserTestCase(HomeserverTestCase): # leave and can still re-join. room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok, is_public=True) # Join, leave, join back to the room before the from/to range - self.helper.join(room_id1, user1_id, tok=user1_tok) - self.helper.leave(room_id1, user1_id, tok=user1_tok) - self.helper.join(room_id1, user1_id, tok=user1_tok) + join_response1 = self.helper.join(room_id1, user1_id, tok=user1_tok) + leave_response1 = self.helper.leave(room_id1, user1_id, tok=user1_tok) + join_response2 = self.helper.join(room_id1, user1_id, tok=user1_tok) after_room1_token = self.event_sources.get_current_token() # Leave and Join the room multiple times after we already have our tokens - self.helper.leave(room_id1, user1_id, tok=user1_tok) - self.helper.join(room_id1, user1_id, tok=user1_tok) - self.helper.leave(room_id1, user1_id, tok=user1_tok) + leave_response2 = self.helper.leave(room_id1, user1_id, tok=user1_tok) + join_response3 = self.helper.join(room_id1, user1_id, tok=user1_tok) + leave_response3 = self.helper.leave(room_id1, user1_id, tok=user1_tok) room_id_results = self.get_success( self.sliding_sync_handler.get_sync_room_ids_for_user( @@ -802,6 +960,22 @@ class GetSyncRoomIdsForUserTestCase(HomeserverTestCase): # Room should show up because we were joined before the from/to range self.assertEqual(room_id_results.keys(), {room_id1}) + # It should be pointing to the latest membership event in the from/to range + self.assertEqual( + room_id_results[room_id1].event_id, + join_response2["event_id"], + "Corresponding map to disambiguate the opaque event IDs: " + + str( + { + "join_response1": join_response1["event_id"], + "leave_response1": leave_response1["event_id"], + "join_response2": join_response2["event_id"], + "leave_response2": leave_response2["event_id"], + "join_response3": join_response3["event_id"], + "leave_response3": leave_response3["event_id"], + } + ), + ) def test_invite_before_range_and_join_leave_after_to_token( self, @@ -821,13 +995,15 @@ class GetSyncRoomIdsForUserTestCase(HomeserverTestCase): room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok, is_public=True) # Invited to the room before the token - self.helper.invite(room_id1, src=user2_id, targ=user1_id, tok=user2_tok) + invite_response = self.helper.invite( + room_id1, src=user2_id, targ=user1_id, tok=user2_tok + ) after_room1_token = self.event_sources.get_current_token() # Join and leave the room after we already have our tokens - self.helper.join(room_id1, user1_id, tok=user1_tok) - self.helper.leave(room_id1, user1_id, tok=user1_tok) + join_respsonse = self.helper.join(room_id1, user1_id, tok=user1_tok) + leave_response = self.helper.leave(room_id1, user1_id, tok=user1_tok) room_id_results = self.get_success( self.sliding_sync_handler.get_sync_room_ids_for_user( @@ -839,6 +1015,217 @@ class GetSyncRoomIdsForUserTestCase(HomeserverTestCase): # Room should show up because we were invited before the from/to range self.assertEqual(room_id_results.keys(), {room_id1}) + # It should be pointing to the latest membership event in the from/to range + self.assertEqual( + room_id_results[room_id1].event_id, + invite_response["event_id"], + "Corresponding map to disambiguate the opaque event IDs: " + + str( + { + "invite_response": invite_response["event_id"], + "join_respsonse": join_respsonse["event_id"], + "leave_response": leave_response["event_id"], + } + ), + ) + + def test_display_name_changes( + self, + ) -> None: + """ + Test that we point to the correct membership event within the from/to range even + if there are multiple `join` membership events in a row indicating + `displayname`/`avatar_url` updates. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + + before_room1_token = self.event_sources.get_current_token() + + # We create the room with user2 so the room isn't left with no members when we + # leave and can still re-join. + room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok, is_public=True) + join_response = self.helper.join(room_id1, user1_id, tok=user1_tok) + # Update the displayname during the token range + displayname_change_during_token_range_response = self.helper.send_state( + room_id1, + event_type=EventTypes.Member, + state_key=user1_id, + body={ + "membership": Membership.JOIN, + "displayname": "displayname during token range", + }, + tok=user1_tok, + ) + + after_room1_token = self.event_sources.get_current_token() + + # Update the displayname after the token range + displayname_change_after_token_range_response = self.helper.send_state( + room_id1, + event_type=EventTypes.Member, + state_key=user1_id, + body={ + "membership": Membership.JOIN, + "displayname": "displayname after token range", + }, + tok=user1_tok, + ) + + room_id_results = self.get_success( + self.sliding_sync_handler.get_sync_room_ids_for_user( + UserID.from_string(user1_id), + from_token=before_room1_token, + to_token=after_room1_token, + ) + ) + + # Room should show up because we were joined during the from/to range + self.assertEqual(room_id_results.keys(), {room_id1}) + # It should be pointing to the latest membership event in the from/to range + self.assertEqual( + room_id_results[room_id1].event_id, + displayname_change_during_token_range_response["event_id"], + "Corresponding map to disambiguate the opaque event IDs: " + + str( + { + "join_response": join_response["event_id"], + "displayname_change_during_token_range_response": displayname_change_during_token_range_response[ + "event_id" + ], + "displayname_change_after_token_range_response": displayname_change_after_token_range_response[ + "event_id" + ], + } + ), + ) + + def test_display_name_changes_leave_after_token_range( + self, + ) -> None: + """ + Test that we point to the correct membership event within the from/to range even + if there are multiple `join` membership events in a row indicating + `displayname`/`avatar_url` updates and we leave after the `to_token`. + + See condition "1a)" comments in the `get_sync_room_ids_for_user()` method. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + + before_room1_token = self.event_sources.get_current_token() + + # We create the room with user2 so the room isn't left with no members when we + # leave and can still re-join. + room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok, is_public=True) + join_response = self.helper.join(room_id1, user1_id, tok=user1_tok) + # Update the displayname during the token range + displayname_change_during_token_range_response = self.helper.send_state( + room_id1, + event_type=EventTypes.Member, + state_key=user1_id, + body={ + "membership": Membership.JOIN, + "displayname": "displayname during token range", + }, + tok=user1_tok, + ) + + after_room1_token = self.event_sources.get_current_token() + + # Update the displayname after the token range + displayname_change_after_token_range_response = self.helper.send_state( + room_id1, + event_type=EventTypes.Member, + state_key=user1_id, + body={ + "membership": Membership.JOIN, + "displayname": "displayname after token range", + }, + tok=user1_tok, + ) + + # Leave after the token + self.helper.leave(room_id1, user1_id, tok=user1_tok) + + room_id_results = self.get_success( + self.sliding_sync_handler.get_sync_room_ids_for_user( + UserID.from_string(user1_id), + from_token=before_room1_token, + to_token=after_room1_token, + ) + ) + + # Room should show up because we were joined during the from/to range + self.assertEqual(room_id_results.keys(), {room_id1}) + # It should be pointing to the latest membership event in the from/to range + self.assertEqual( + room_id_results[room_id1].event_id, + displayname_change_during_token_range_response["event_id"], + "Corresponding map to disambiguate the opaque event IDs: " + + str( + { + "join_response": join_response["event_id"], + "displayname_change_during_token_range_response": displayname_change_during_token_range_response[ + "event_id" + ], + "displayname_change_after_token_range_response": displayname_change_after_token_range_response[ + "event_id" + ], + } + ), + ) + + def test_display_name_changes_join_after_token_range( + self, + ) -> None: + """ + Test that multiple `join` membership events (after the `to_token`) in a row + indicating `displayname`/`avatar_url` updates doesn't affect the results (we + joined after the token range so it shouldn't show up) + + See condition "1b)" comments in the `get_sync_room_ids_for_user()` method. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + + before_room1_token = self.event_sources.get_current_token() + + # We create the room with user2 so the room isn't left with no members when we + # leave and can still re-join. + room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok, is_public=True) + + after_room1_token = self.event_sources.get_current_token() + + self.helper.join(room_id1, user1_id, tok=user1_tok) + # Update the displayname after the token range + self.helper.send_state( + room_id1, + event_type=EventTypes.Member, + state_key=user1_id, + body={ + "membership": Membership.JOIN, + "displayname": "displayname after token range", + }, + tok=user1_tok, + ) + + room_id_results = self.get_success( + self.sliding_sync_handler.get_sync_room_ids_for_user( + UserID.from_string(user1_id), + from_token=before_room1_token, + to_token=after_room1_token, + ) + ) + + # Room shouldn't show up because we joined after the from/to range + self.assertEqual(room_id_results.keys(), set()) def test_multiple_rooms_are_not_confused( self, From 83d6f76606bb7d1eaba9d5e498efc9fa15d13957 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 25 Jun 2024 15:27:49 -0500 Subject: [PATCH 29/62] Describe `current_state_delta_stream` better --- synapse/storage/schema/main/delta/42/current_state_delta.sql | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/synapse/storage/schema/main/delta/42/current_state_delta.sql b/synapse/storage/schema/main/delta/42/current_state_delta.sql index 876b61e6a5..3d2fd69480 100644 --- a/synapse/storage/schema/main/delta/42/current_state_delta.sql +++ b/synapse/storage/schema/main/delta/42/current_state_delta.sql @@ -32,7 +32,10 @@ * limitations under the License. */ - +-- Tracks what the server thinks is the current state of the room as time goes. It does +-- not track how state progresses from the beginning of the room. So for example, when +-- you remotely join a room, the first rows will just be the state when you joined and +-- progress from there. CREATE TABLE current_state_delta_stream ( stream_id BIGINT NOT NULL, room_id TEXT NOT NULL, From fbd92e1c9da2bc89a555f3fa609bba20a76e4440 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 25 Jun 2024 20:16:50 -0500 Subject: [PATCH 30/62] Add `get_current_state_delta_membership_changes_for_user(...)` (using `current_state_delta_stream`) (still need to add newly_left rooms back) --- synapse/handlers/sliding_sync.py | 347 ++++++++++++++--------- synapse/storage/databases/main/stream.py | 151 +++++++++- tests/handlers/test_sliding_sync.py | 73 ++++- 3 files changed, 426 insertions(+), 145 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index 0538fddf84..2e24b0c338 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -27,6 +27,7 @@ from synapse.api.constants import AccountDataTypes, Direction, EventTypes, Membe from synapse.events import EventBase from synapse.events.utils import strip_event from synapse.handlers.relations import BundledAggregations +from synapse.storage.databases.main.stream import CurrentStateDeltaMembership from synapse.storage.roommember import RoomsForUser from synapse.types import ( JsonDict, @@ -369,6 +370,9 @@ class SlidingSyncHandler: # Our working list of rooms that can show up in the sync response sync_room_id_set = { + # Note: The `room_for_user` we're assigning here will need to be fixed up + # (below) because they are potentially from the current snapshot time + # instead from the time of the `to_token`. room_for_user.room_id: room_for_user for room_for_user in room_for_user_list if filter_membership_for_sync( @@ -404,33 +408,10 @@ class SlidingSyncHandler: instance_map=immutabledict(instance_to_max_stream_ordering_map), ) - # Since we fetched the users room list at some point in time after the from/to - # tokens, we need to revert/rewind some membership changes to match the point in - # time of the `to_token`. In particular, we need to make these fixups: - # - # - 1a) Remove rooms that the user joined after the `to_token` - # - 1b) Add back rooms that the user left after the `to_token` - # - 2) Add back newly_left rooms (> `from_token` and <= `to_token`) - # - # Below, we're doing two separate lookups for membership changes. We could - # request everything for both fixups in one range, [`from_token.room_key`, - # `membership_snapshot_token`), but we want to avoid raw `stream_ordering` - # comparison without `instance_name` (which is flawed). We could refactor - # `event.internal_metadata` to include `instance_name` but it might turn out a - # little difficult and a bigger, broader Synapse change than we want to make. - - # 1) ----------------------------------------------------- - - # 1) Fetch membership changes that fall in the range from `to_token` up to - # `membership_snapshot_token` - # - # If our `to_token` is already the same or ahead of the latest room membership - # for the user, we don't need to do any "2)" fix-ups and can just straight-up - # use the room list from the snapshot as a base (nothing has changed) - membership_change_events_after_to_token = [] + current_state_delta_membership_changes_after_to_token = [] if not membership_snapshot_token.is_before_or_eq(to_token.room_key): - membership_change_events_after_to_token = ( - await self.store.get_membership_changes_for_user( + current_state_delta_membership_changes_after_to_token = ( + await self.store.get_current_state_delta_membership_changes_for_user( user_id, from_key=to_token.room_key, to_key=membership_snapshot_token, @@ -438,138 +419,224 @@ class SlidingSyncHandler: ) ) - # 1) Assemble a list of the last membership events in some given ranges. Someone - # could have left and joined multiple times during the given range but we only - # care about end-result so we grab the last one. - last_membership_change_by_room_id_after_to_token: Dict[str, EventBase] = {} - # We also need the first membership event after the `to_token` so we can step + # We need the first membership event after the `to_token` so we can step # backward to the previous membership that would apply to the from/to range. - first_membership_change_by_room_id_after_to_token: Dict[str, EventBase] = {} - for event in membership_change_events_after_to_token: - last_membership_change_by_room_id_after_to_token[event.room_id] = event + first_membership_change_by_room_id_after_to_token: Dict[ + str, CurrentStateDeltaMembership + ] = {} + for membership_change in current_state_delta_membership_changes_after_to_token: # Only set if we haven't already set it first_membership_change_by_room_id_after_to_token.setdefault( - event.room_id, event + membership_change.room_id, membership_change ) - # 1) Fixup + # Since we fetched a snapshot of the users room list at some point in time after + # the from/to tokens, we need to revert/rewind some membership changes to match + # the point in time of the `to_token`. + prev_event_ids_in_from_to_range = [] for ( - last_membership_change_after_to_token - ) in last_membership_change_by_room_id_after_to_token.values(): - room_id = last_membership_change_after_to_token.room_id - - # We want to find the first membership change after the `to_token` then step - # backward to know the membership in the from/to range. - first_membership_change_after_to_token = ( - first_membership_change_by_room_id_after_to_token.get(room_id) - ) - assert first_membership_change_after_to_token is not None, ( - "If there was a `last_membership_change_after_to_token` that we're iterating over, " - + "then there should be corresponding a first change. For example, even if there " - + "is only one event after the `to_token`, the first and last event will be same event. " - + "This is probably a mistake in assembling the `last_membership_change_by_room_id_after_to_token`" - + "/`first_membership_change_by_room_id_after_to_token` dicts above." - ) - # TODO: Instead of reading from `unsigned`, refactor this to use the - # `current_state_delta_stream` table in the future. Probably a new - # `get_membership_changes_for_user()` function that uses - # `current_state_delta_stream` with a join to `room_memberships`. This would - # help in state reset scenarios since `prev_content` is looking at the - # current branch vs the current room state. This is all just data given to - # the client so no real harm to data integrity, but we'd like to be nice to - # the client. Since the `current_state_delta_stream` table is new, it - # doesn't have all events in it. Since this is Sliding Sync, if we ever need - # to, we can signal the client to throw all of their state away by sending - # "operation: RESET". - prev_content = first_membership_change_after_to_token.unsigned.get( - "prev_content", {} - ) - prev_membership = prev_content.get("membership", None) - prev_sender = first_membership_change_after_to_token.unsigned.get( - "prev_sender", None + room_id, + first_membership_change_after_to_token, + ) in first_membership_change_by_room_id_after_to_token.items(): + # One of these should exist to be a valid row in `current_state_delta_stream` + assert ( + first_membership_change_after_to_token.event_id is not None + or first_membership_change_after_to_token.prev_event_id is not None ) - # Check if the previous membership (membership that applies to the from/to - # range) should be included in our `sync_room_id_set` - should_prev_membership_be_included = ( - prev_membership is not None - and prev_sender is not None - and filter_membership_for_sync( - membership=prev_membership, - user_id=user_id, - sender=prev_sender, + # If the membership change was added after the `to_token`, we need to remove + # it + if first_membership_change_after_to_token.prev_event_id is None: + sync_room_id_set.pop(room_id, None) + # From the first membership event after the `to_token`, we need to step + # backward to the previous membership that would apply to the from/to range. + else: + prev_event_ids_in_from_to_range.append( + first_membership_change_after_to_token.prev_event_id ) - ) - # Check if the last membership (membership that applies to our snapshot) was - # already included in our `sync_room_id_set` - was_last_membership_already_included = filter_membership_for_sync( - membership=last_membership_change_after_to_token.membership, + # Fetch the previous membership events that apply to the from/to range and fixup + # our working list. + prev_events_in_from_to_range = await self.store.get_events( + prev_event_ids_in_from_to_range + ) + for prev_event_in_from_to_range in prev_events_in_from_to_range.values(): + # Update if the membership should be included + if filter_membership_for_sync( + membership=prev_event_in_from_to_range.membership, user_id=user_id, - sender=last_membership_change_after_to_token.sender, - ) - - # 1a) Add back rooms that the user left after the `to_token` - # - # For example, if the last membership event after the `to_token` is a leave - # event, then the room was excluded from `sync_room_id_set` when we first - # crafted it above. We should add these rooms back as long as the user also - # was part of the room before the `to_token`. - if ( - not was_last_membership_already_included - and should_prev_membership_be_included + sender=prev_event_in_from_to_range.sender, ): - sync_room_id_set[room_id] = convert_event_to_rooms_for_user( - last_membership_change_after_to_token + sync_room_id_set[prev_event_in_from_to_range.room_id] = ( + convert_event_to_rooms_for_user(prev_event_in_from_to_range) ) - # 1b) Remove rooms that the user joined (hasn't left) after the `to_token` - # - # For example, if the last membership event after the `to_token` is a "join" - # event, then the room was included `sync_room_id_set` when we first crafted - # it above. We should remove these rooms as long as the user also wasn't - # part of the room before the `to_token`. - elif ( - was_last_membership_already_included - and not should_prev_membership_be_included - ): - del sync_room_id_set[room_id] + # Otherwise, remove it + else: + sync_room_id_set.pop(prev_event_in_from_to_range.room_id, None) - # 2) ----------------------------------------------------- - # We fix-up newly_left rooms after the first fixup because it may have removed - # some left rooms that we can figure out are newly_left in the following code + # TODO: Add back newly_left rooms - # 2) Fetch membership changes that fall in the range from `from_token` up to `to_token` - membership_change_events_in_from_to_range = [] - if from_token: - membership_change_events_in_from_to_range = ( - await self.store.get_membership_changes_for_user( - user_id, - from_key=from_token.room_key, - to_key=to_token.room_key, - excluded_rooms=self.rooms_to_exclude_globally, - ) - ) + # Since we fetched the users room list at some point in time after the from/to + # tokens, we need to revert/rewind some membership changes to match the point in + # time of the `to_token`. In particular, we need to make these fixups: + # + # - 1a) Remove rooms that the user joined after the `to_token` + # - 1b) Add back rooms that the user left after the `to_token` + # - 2) Add back newly_left rooms (> `from_token` and <= `to_token`) - # 2) Assemble a list of the last membership events in some given ranges. Someone - # could have left and joined multiple times during the given range but we only - # care about end-result so we grab the last one. - last_membership_change_by_room_id_in_from_to_range: Dict[str, EventBase] = {} - for event in membership_change_events_in_from_to_range: - last_membership_change_by_room_id_in_from_to_range[event.room_id] = event + # # 1) ----------------------------------------------------- - # 2) Fixup - for ( - last_membership_change_in_from_to_range - ) in last_membership_change_by_room_id_in_from_to_range.values(): - room_id = last_membership_change_in_from_to_range.room_id + # # 1) Fetch membership changes that fall in the range from `to_token` up to + # # `membership_snapshot_token` + # # + # # If our `to_token` is already the same or ahead of the latest room membership + # # for the user, we don't need to do any "2)" fix-ups and can just straight-up + # # use the room list from the snapshot as a base (nothing has changed) + # membership_change_events_after_to_token = [] + # if not membership_snapshot_token.is_before_or_eq(to_token.room_key): + # membership_change_events_after_to_token = ( + # await self.store.get_membership_changes_for_user( + # user_id, + # from_key=to_token.room_key, + # to_key=membership_snapshot_token, + # excluded_rooms=self.rooms_to_exclude_globally, + # ) + # ) - # 2) Add back newly_left rooms (> `from_token` and <= `to_token`). We - # include newly_left rooms because the last event that the user should see - # is their own leave event - if last_membership_change_in_from_to_range.membership == Membership.LEAVE: - sync_room_id_set[room_id] = convert_event_to_rooms_for_user( - last_membership_change_in_from_to_range - ) + # # 1) Assemble a list of the last membership events in some given ranges. Someone + # # could have left and joined multiple times during the given range but we only + # # care about end-result so we grab the last one. + # last_membership_change_by_room_id_after_to_token: Dict[str, EventBase] = {} + # # We also need the first membership event after the `to_token` so we can step + # # backward to the previous membership that would apply to the from/to range. + # first_membership_change_by_room_id_after_to_token: Dict[str, EventBase] = {} + # for event in membership_change_events_after_to_token: + # last_membership_change_by_room_id_after_to_token[event.room_id] = event + # # Only set if we haven't already set it + # first_membership_change_by_room_id_after_to_token.setdefault( + # event.room_id, event + # ) + + # # 1) Fixup + # for ( + # last_membership_change_after_to_token + # ) in last_membership_change_by_room_id_after_to_token.values(): + # room_id = last_membership_change_after_to_token.room_id + + # # We want to find the first membership change after the `to_token` then step + # # backward to know the membership in the from/to range. + # first_membership_change_after_to_token = ( + # first_membership_change_by_room_id_after_to_token.get(room_id) + # ) + # assert first_membership_change_after_to_token is not None, ( + # "If there was a `last_membership_change_after_to_token` that we're iterating over, " + # + "then there should be corresponding a first change. For example, even if there " + # + "is only one event after the `to_token`, the first and last event will be same event. " + # + "This is probably a mistake in assembling the `last_membership_change_by_room_id_after_to_token`" + # + "/`first_membership_change_by_room_id_after_to_token` dicts above." + # ) + # # TODO: Instead of reading from `unsigned`, refactor this to use the + # # `current_state_delta_stream` table in the future. Probably a new + # # `get_membership_changes_for_user()` function that uses + # # `current_state_delta_stream` with a join to `room_memberships`. This would + # # help in state reset scenarios since `prev_content` is looking at the + # # current branch vs the current room state. This is all just data given to + # # the client so no real harm to data integrity, but we'd like to be nice to + # # the client. Since the `current_state_delta_stream` table is new, it + # # doesn't have all events in it. Since this is Sliding Sync, if we ever need + # # to, we can signal the client to throw all of their state away by sending + # # "operation: RESET". + # prev_content = first_membership_change_after_to_token.unsigned.get( + # "prev_content", {} + # ) + # prev_membership = prev_content.get("membership", None) + # prev_sender = first_membership_change_after_to_token.unsigned.get( + # "prev_sender", None + # ) + + # # Check if the previous membership (membership that applies to the from/to + # # range) should be included in our `sync_room_id_set` + # should_prev_membership_be_included = ( + # prev_membership is not None + # and prev_sender is not None + # and filter_membership_for_sync( + # membership=prev_membership, + # user_id=user_id, + # sender=prev_sender, + # ) + # ) + + # # Check if the last membership (membership that applies to our snapshot) was + # # already included in our `sync_room_id_set` + # was_last_membership_already_included = filter_membership_for_sync( + # membership=last_membership_change_after_to_token.membership, + # user_id=user_id, + # sender=last_membership_change_after_to_token.sender, + # ) + + # # 1a) Add back rooms that the user left after the `to_token` + # # + # # For example, if the last membership event after the `to_token` is a leave + # # event, then the room was excluded from `sync_room_id_set` when we first + # # crafted it above. We should add these rooms back as long as the user also + # # was part of the room before the `to_token`. + # if ( + # not was_last_membership_already_included + # and should_prev_membership_be_included + # ): + # # TODO: Assign the correct membership event at the `to_token` here + # # (currently we're setting it as the last event after the `to_token`) + # sync_room_id_set[room_id] = convert_event_to_rooms_for_user( + # last_membership_change_after_to_token + # ) + # # 1b) Remove rooms that the user joined (hasn't left) after the `to_token` + # # + # # For example, if the last membership event after the `to_token` is a "join" + # # event, then the room was included `sync_room_id_set` when we first crafted + # # it above. We should remove these rooms as long as the user also wasn't + # # part of the room before the `to_token`. + # elif ( + # was_last_membership_already_included + # and not should_prev_membership_be_included + # ): + # del sync_room_id_set[room_id] + + # # 2) ----------------------------------------------------- + # # We fix-up newly_left rooms after the first fixup because it may have removed + # # some left rooms that we can figure out are newly_left in the following code + + # # 2) Fetch membership changes that fall in the range from `from_token` up to `to_token` + # membership_change_events_in_from_to_range = [] + # if from_token: + # membership_change_events_in_from_to_range = ( + # await self.store.get_membership_changes_for_user( + # user_id, + # from_key=from_token.room_key, + # to_key=to_token.room_key, + # excluded_rooms=self.rooms_to_exclude_globally, + # ) + # ) + + # # 2) Assemble a list of the last membership events in some given ranges. Someone + # # could have left and joined multiple times during the given range but we only + # # care about end-result so we grab the last one. + # last_membership_change_by_room_id_in_from_to_range: Dict[str, EventBase] = {} + # for event in membership_change_events_in_from_to_range: + # last_membership_change_by_room_id_in_from_to_range[event.room_id] = event + + # # 2) Fixup + # for ( + # last_membership_change_in_from_to_range + # ) in last_membership_change_by_room_id_in_from_to_range.values(): + # room_id = last_membership_change_in_from_to_range.room_id + + # # 2) Add back newly_left rooms (> `from_token` and <= `to_token`). We + # # include newly_left rooms because the last event that the user should see + # # is their own leave event + # if last_membership_change_in_from_to_range.membership == Membership.LEAVE: + # sync_room_id_set[room_id] = convert_event_to_rooms_for_user( + # last_membership_change_in_from_to_range + # ) return sync_room_id_set diff --git a/synapse/storage/databases/main/stream.py b/synapse/storage/databases/main/stream.py index c21e69ecda..f5de23080d 100644 --- a/synapse/storage/databases/main/stream.py +++ b/synapse/storage/databases/main/stream.py @@ -44,6 +44,7 @@ what sort order was used: import logging from typing import ( TYPE_CHECKING, + AbstractSet, Any, Collection, Dict, @@ -62,7 +63,7 @@ from typing_extensions import Literal from twisted.internet import defer -from synapse.api.constants import Direction +from synapse.api.constants import Direction, EventTypes from synapse.api.filtering import Filter from synapse.events import EventBase from synapse.logging.context import make_deferred_yieldable, run_in_background @@ -111,6 +112,24 @@ class _EventsAround: end: RoomStreamToken +@attr.s(slots=True, frozen=True, auto_attribs=True) +class CurrentStateDeltaMembership: + """ + Attributes: + event_id: The "current" membership event ID in this room. May be `None` if the + server is no longer in the room or a state reset happened. + prev_event_id: The previous membership event in this room that was replaced by + the "current" one. May be `None` if there was no previous membership event. + room_id: The room ID of the membership event. + """ + + event_id: Optional[str] + prev_event_id: Optional[str] + room_id: str + # Could be useful but we're not using it yet. + # event_pos: PersistedEventPosition + + def generate_pagination_where_clause( direction: Direction, column_names: Tuple[str, str], @@ -390,6 +409,42 @@ def _filter_results( return True +def _filter_results_by_stream( + lower_token: Optional[RoomStreamToken], + upper_token: Optional[RoomStreamToken], + instance_name: str, + stream_ordering: int, +) -> bool: + """ + Note: This function only works with "live" tokens with `stream_ordering` only. + + Returns True if the event persisted by the given instance at the given + topological/stream_ordering falls between the two tokens (taking a None + token to mean unbounded). + + Used to filter results from fetching events in the DB against the given + tokens. This is necessary to handle the case where the tokens include + position maps, which we handle by fetching more than necessary from the DB + and then filtering (rather than attempting to construct a complicated SQL + query). + """ + if lower_token: + assert lower_token.topological is None + + # If these are live tokens we compare the stream ordering against the + # writers stream position. + if stream_ordering <= lower_token.get_stream_pos_for_instance(instance_name): + return False + + if upper_token: + assert upper_token.topological is None + + if upper_token.get_stream_pos_for_instance(instance_name) < stream_ordering: + return False + + return True + + def filter_to_clause(event_filter: Optional[Filter]) -> Tuple[str, List[str]]: # NB: This may create SQL clauses that don't optimise well (and we don't # have indices on all possible clauses). E.g. it may create @@ -731,6 +786,94 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): return ret, key + async def get_current_state_delta_membership_changes_for_user( + self, + user_id: str, + from_key: RoomStreamToken, + to_key: RoomStreamToken, + excluded_rooms: Optional[List[str]] = None, + ) -> List[CurrentStateDeltaMembership]: + """ + TODO + + Note: This function only works with "live" tokens with `stream_ordering` only. + + All such events whose stream ordering `s` lies in the range `from_key < s <= + to_key` are returned. Events are sorted by `stream_ordering` ascending. + """ + # Start by ruling out cases where a DB query is not necessary. + if from_key == to_key: + return [] + + if from_key: + has_changed = self._membership_stream_cache.has_entity_changed( + user_id, int(from_key.stream) + ) + if not has_changed: + return [] + + def f(txn: LoggingTransaction) -> List[CurrentStateDeltaMembership]: + # To handle tokens with a non-empty instance_map we fetch more + # results than necessary and then filter down + min_from_id = from_key.stream + max_to_id = to_key.get_max_stream_pos() + + args: List[Any] = [EventTypes.Member, user_id, min_from_id, max_to_id] + + # TODO: It would be good to assert that the `to_token` is >= + # the first row in `current_state_delta_stream` for the rooms we're + # interested in. Otherwise, we will end up with empty results and not know + # it. + + # Note: There is no index for `(type, state_key)` in + # `current_state_delta_stream`. We also can't just add an index for + # `event_id` and join the `room_memberships` table by `event_id` because it + # may be `null` in `current_state_delta_stream` so nothing will match (it's + # `null` when the server is no longer in the room or a state reset happened + # and it was unset). + sql = """ + SELECT s.event_id, s.prev_event_id, s.room_id, s.instance_name, s.stream_id + FROM current_state_delta_stream AS s + WHERE s.type = ? AND s.state_key = ? + AND s.stream_id > ? AND s.stream_id <= ? + ORDER BY s.stream_id ASC + """ + + txn.execute(sql, args) + + return [ + CurrentStateDeltaMembership( + event_id=event_id, + prev_event_id=prev_event_id, + room_id=room_id, + # event_pos=PersistedEventPosition( + # instance_name=instance_name, + # stream=stream_ordering, + # ), + ) + for event_id, prev_event_id, room_id, instance_name, stream_ordering in txn + if _filter_results_by_stream( + from_key, + to_key, + instance_name, + stream_ordering, + ) + ] + + current_state_delta_membership_changes = await self.db_pool.runInteraction( + "get_current_state_delta_membership_changes_for_user", f + ) + + rooms_to_exclude: AbstractSet[str] = set() + if excluded_rooms is not None: + rooms_to_exclude = set(excluded_rooms) + + return [ + membership_change + for membership_change in current_state_delta_membership_changes + if membership_change.room_id not in rooms_to_exclude + ] + @cancellable async def get_membership_changes_for_user( self, @@ -766,10 +909,10 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): ignore_room_clause = "" if excluded_rooms is not None and len(excluded_rooms) > 0: - ignore_room_clause = "AND e.room_id NOT IN (%s)" % ",".join( - "?" for _ in excluded_rooms + ignore_room_clause, ignore_room_args = make_in_list_sql_clause( + txn.database_engine, "e.room_id", excluded_rooms, negative=True ) - args = args + excluded_rooms + args += ignore_room_args sql = """ SELECT m.event_id, instance_name, topological_ordering, stream_ordering diff --git a/tests/handlers/test_sliding_sync.py b/tests/handlers/test_sliding_sync.py index df262400e4..694fd17a02 100644 --- a/tests/handlers/test_sliding_sync.py +++ b/tests/handlers/test_sliding_sync.py @@ -1029,7 +1029,7 @@ class GetSyncRoomIdsForUserTestCase(HomeserverTestCase): ), ) - def test_display_name_changes( + def test_display_name_changes_in_token_range( self, ) -> None: """ @@ -1102,6 +1102,77 @@ class GetSyncRoomIdsForUserTestCase(HomeserverTestCase): ), ) + def test_display_name_changes_before_and_after_token_range( + self, + ) -> None: + """ + Test that we point to the correct membership event even though there are no + membership events in the from/range but there are `displayname`/`avatar_url` + changes before/after the token range. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + + # We create the room with user2 so the room isn't left with no members when we + # leave and can still re-join. + room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok, is_public=True) + join_response = self.helper.join(room_id1, user1_id, tok=user1_tok) + # Update the displayname before the token range + displayname_change_before_token_range_response = self.helper.send_state( + room_id1, + event_type=EventTypes.Member, + state_key=user1_id, + body={ + "membership": Membership.JOIN, + "displayname": "displayname during token range", + }, + tok=user1_tok, + ) + + after_room1_token = self.event_sources.get_current_token() + + # Update the displayname after the token range + displayname_change_after_token_range_response = self.helper.send_state( + room_id1, + event_type=EventTypes.Member, + state_key=user1_id, + body={ + "membership": Membership.JOIN, + "displayname": "displayname after token range", + }, + tok=user1_tok, + ) + + room_id_results = self.get_success( + self.sliding_sync_handler.get_sync_room_ids_for_user( + UserID.from_string(user1_id), + from_token=after_room1_token, + to_token=after_room1_token, + ) + ) + + # Room should show up because we were joined before the from/to range + self.assertEqual(room_id_results.keys(), {room_id1}) + # It should be pointing to the latest membership event in the from/to range + self.assertEqual( + room_id_results[room_id1].event_id, + displayname_change_before_token_range_response["event_id"], + "Corresponding map to disambiguate the opaque event IDs: " + + str( + { + "join_response": join_response["event_id"], + "displayname_change_before_token_range_response": displayname_change_before_token_range_response[ + "event_id" + ], + "displayname_change_after_token_range_response": displayname_change_after_token_range_response[ + "event_id" + ], + } + ), + ) + def test_display_name_changes_leave_after_token_range( self, ) -> None: From 6c791a88b34b5646324a22584d5f84d99501ff34 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 25 Jun 2024 21:07:19 -0500 Subject: [PATCH 31/62] WIP: Add back `newly_left` --- synapse/handlers/sliding_sync.py | 113 +++++++++++++++++------ synapse/storage/databases/main/stream.py | 18 +++- 2 files changed, 101 insertions(+), 30 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index 2e24b0c338..5603fdeb38 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -375,11 +375,6 @@ class SlidingSyncHandler: # instead from the time of the `to_token`. room_for_user.room_id: room_for_user for room_for_user in room_for_user_list - if filter_membership_for_sync( - membership=room_for_user.membership, - user_id=user_id, - sender=room_for_user.sender, - ) } # Get the `RoomStreamToken` that represents the spot we queried up to when we got @@ -408,6 +403,23 @@ class SlidingSyncHandler: instance_map=immutabledict(instance_to_max_stream_ordering_map), ) + # Since we fetched the users room list at some point in time after the from/to + # tokens, we need to revert/rewind some membership changes to match the point in + # time of the `to_token`. In particular, we need to make these fixups: + # + # - 1a) Remove rooms that the user joined after the `to_token` + # - 1b) Add back rooms that the user left after the `to_token` + # - 1c) Update room membership events to the point in time of the `to_token` + # - 2) Add back newly_left rooms (> `from_token` and <= `to_token`) + + # 1) ----------------------------------------------------- + + # 1) Fetch membership changes that fall in the range from `to_token` up to + # `membership_snapshot_token` + # + # If our `to_token` is already the same or ahead of the latest room membership + # for the user, we don't need to do any "2)" fix-ups and can just straight-up + # use the room list from the snapshot as a base (nothing has changed) current_state_delta_membership_changes_after_to_token = [] if not membership_snapshot_token.is_before_or_eq(to_token.room_key): current_state_delta_membership_changes_after_to_token = ( @@ -419,8 +431,9 @@ class SlidingSyncHandler: ) ) - # We need the first membership event after the `to_token` so we can step - # backward to the previous membership that would apply to the from/to range. + # 1) Assemble a list of the first membership event after the `to_token` so we can + # step backward to the previous membership that would apply to the from/to + # range. first_membership_change_by_room_id_after_to_token: Dict[ str, CurrentStateDeltaMembership ] = {} @@ -430,6 +443,8 @@ class SlidingSyncHandler: membership_change.room_id, membership_change ) + # 1) Fixup part 1 + # # Since we fetched a snapshot of the users room list at some point in time after # the from/to tokens, we need to revert/rewind some membership changes to match # the point in time of the `to_token`. @@ -444,37 +459,81 @@ class SlidingSyncHandler: or first_membership_change_after_to_token.prev_event_id is not None ) - # If the membership change was added after the `to_token`, we need to remove - # it + # 1a) Remove rooms that the user joined after the `to_token` if first_membership_change_after_to_token.prev_event_id is None: sync_room_id_set.pop(room_id, None) - # From the first membership event after the `to_token`, we need to step - # backward to the previous membership that would apply to the from/to range. + # 1b) 1c) From the first membership event after the `to_token`, step backward to the + # previous membership that would apply to the from/to range. else: prev_event_ids_in_from_to_range.append( first_membership_change_after_to_token.prev_event_id ) - # Fetch the previous membership events that apply to the from/to range and fixup - # our working list. + # 1) Fixup part 2 + # + # 1b) 1c) Fetch the previous membership events that apply to the from/to range + # and fixup our working list. prev_events_in_from_to_range = await self.store.get_events( prev_event_ids_in_from_to_range ) for prev_event_in_from_to_range in prev_events_in_from_to_range.values(): - # Update if the membership should be included - if filter_membership_for_sync( - membership=prev_event_in_from_to_range.membership, - user_id=user_id, - sender=prev_event_in_from_to_range.sender, - ): - sync_room_id_set[prev_event_in_from_to_range.room_id] = ( - convert_event_to_rooms_for_user(prev_event_in_from_to_range) - ) - # Otherwise, remove it - else: - sync_room_id_set.pop(prev_event_in_from_to_range.room_id, None) + # 1b) 1c) Update the membership with what we found + sync_room_id_set[prev_event_in_from_to_range.room_id] = ( + convert_event_to_rooms_for_user(prev_event_in_from_to_range) + ) - # TODO: Add back newly_left rooms + filtered_sync_room_id_set = { + room_id: room_for_user + for room_id, room_for_user in sync_room_id_set.items() + if filter_membership_for_sync( + membership=room_for_user.membership, + user_id=user_id, + sender=room_for_user.sender, + ) + } + + # 2) ----------------------------------------------------- + # We fix-up newly_left rooms after the first fixup because it may have removed + # some left rooms that we can figure out are newly_left in the following code + + # 2) Fetch membership changes that fall in the range from `from_token` up to `to_token` + current_state_delta_membership_changes_in_from_to_range = [] + if from_token: + current_state_delta_membership_changes_in_from_to_range = ( + await self.store.get_current_state_delta_membership_changes_for_user( + user_id, + from_key=from_token.room_key, + to_key=to_token.room_key, + excluded_rooms=self.rooms_to_exclude_globally, + ) + ) + + # 2) Assemble a list of the last membership events in some given ranges. Someone + # could have left and joined multiple times during the given range but we only + # care about end-result so we grab the last one. + last_membership_change_by_room_id_in_from_to_range: Dict[ + str, CurrentStateDeltaMembership + ] = {} + for ( + membership_change + ) in current_state_delta_membership_changes_in_from_to_range: + last_membership_change_by_room_id_in_from_to_range[ + membership_change.room_id + ] = membership_change + + # 2) Fixup + for ( + last_membership_change_in_from_to_range + ) in last_membership_change_by_room_id_in_from_to_range.values(): + room_id = last_membership_change_in_from_to_range.room_id + + # 2) Add back newly_left rooms (> `from_token` and <= `to_token`). We + # include newly_left rooms because the last event that the user should see + # is their own leave event + if last_membership_change_in_from_to_range.membership == Membership.LEAVE: + filtered_sync_room_id_set[room_id] = convert_event_to_rooms_for_user( + last_membership_change_in_from_to_range + ) # Since we fetched the users room list at some point in time after the from/to # tokens, we need to revert/rewind some membership changes to match the point in @@ -638,7 +697,7 @@ class SlidingSyncHandler: # last_membership_change_in_from_to_range # ) - return sync_room_id_set + return filtered_sync_room_id_set async def filter_rooms( self, diff --git a/synapse/storage/databases/main/stream.py b/synapse/storage/databases/main/stream.py index f5de23080d..595245e70e 100644 --- a/synapse/storage/databases/main/stream.py +++ b/synapse/storage/databases/main/stream.py @@ -63,7 +63,7 @@ from typing_extensions import Literal from twisted.internet import defer -from synapse.api.constants import Direction, EventTypes +from synapse.api.constants import Direction, EventTypes, Membership from synapse.api.filtering import Filter from synapse.events import EventBase from synapse.logging.context import make_deferred_yieldable, run_in_background @@ -126,6 +126,7 @@ class CurrentStateDeltaMembership: event_id: Optional[str] prev_event_id: Optional[str] room_id: str + membership: str # Could be useful but we're not using it yet. # event_pos: PersistedEventPosition @@ -832,7 +833,13 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): # `null` when the server is no longer in the room or a state reset happened # and it was unset). sql = """ - SELECT s.event_id, s.prev_event_id, s.room_id, s.instance_name, s.stream_id + SELECT + s.event_id, + s.prev_event_id, + s.room_id, + s.instance_name, + s.stream_id, + m.membership FROM current_state_delta_stream AS s WHERE s.type = ? AND s.state_key = ? AND s.stream_id > ? AND s.stream_id <= ? @@ -846,12 +853,17 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): event_id=event_id, prev_event_id=prev_event_id, room_id=room_id, + # We can assume that the membership is `LEAVE` as a default. This + # will happen when `current_state_delta_stream.event_id` is null + # because it was unset due to a state reset or the server is no + # longer in the room (everyone on our local server left). + membership=membership if membership else Membership.LEAVE, # event_pos=PersistedEventPosition( # instance_name=instance_name, # stream=stream_ordering, # ), ) - for event_id, prev_event_id, room_id, instance_name, stream_ordering in txn + for event_id, prev_event_id, room_id, instance_name, stream_ordering, membership in txn if _filter_results_by_stream( from_key, to_key, From 27d74b023e1a5679b4fbe6a5b4f6efaada8ec3b0 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 25 Jun 2024 22:20:27 -0500 Subject: [PATCH 32/62] Iterate --- synapse/handlers/sliding_sync.py | 33 ++++--- synapse/storage/databases/main/stream.py | 115 +++++++++-------------- 2 files changed, 68 insertions(+), 80 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index 5603fdeb38..dbbbbc66bf 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -443,22 +443,16 @@ class SlidingSyncHandler: membership_change.room_id, membership_change ) - # 1) Fixup part 1 + # 1) Fixup # # Since we fetched a snapshot of the users room list at some point in time after # the from/to tokens, we need to revert/rewind some membership changes to match # the point in time of the `to_token`. - prev_event_ids_in_from_to_range = [] + prev_event_ids_in_from_to_range: List[str] = [] for ( room_id, first_membership_change_after_to_token, ) in first_membership_change_by_room_id_after_to_token.items(): - # One of these should exist to be a valid row in `current_state_delta_stream` - assert ( - first_membership_change_after_to_token.event_id is not None - or first_membership_change_after_to_token.prev_event_id is not None - ) - # 1a) Remove rooms that the user joined after the `to_token` if first_membership_change_after_to_token.prev_event_id is None: sync_room_id_set.pop(room_id, None) @@ -469,7 +463,7 @@ class SlidingSyncHandler: first_membership_change_after_to_token.prev_event_id ) - # 1) Fixup part 2 + # 1) Fixup (more) # # 1b) 1c) Fetch the previous membership events that apply to the from/to range # and fixup our working list. @@ -522,18 +516,33 @@ class SlidingSyncHandler: ] = membership_change # 2) Fixup + last_membership_event_ids_to_include_in_from_to_range: List[str] = [] for ( last_membership_change_in_from_to_range ) in last_membership_change_by_room_id_in_from_to_range.values(): room_id = last_membership_change_in_from_to_range.room_id + sync_room_id_set[room_id] + # 2) Add back newly_left rooms (> `from_token` and <= `to_token`). We # include newly_left rooms because the last event that the user should see # is their own leave event if last_membership_change_in_from_to_range.membership == Membership.LEAVE: - filtered_sync_room_id_set[room_id] = convert_event_to_rooms_for_user( - last_membership_change_in_from_to_range - ) + # Save the look-up if we already have the `leave` event + if sync_room_id_set[room_id].event_id == last_membership_change_in_from_to_range.prev_event_id:: + filtered_sync_room_id_set[room_id] = sync_room_id_set[room_id] + else: + last_membership_event_ids_to_include_in_from_to_range.append(last_membership_change_in_from_to_range.event_id) + + # TODO + # last_membership_events_to_include_in_from_to_range = await self.store.get_events( + # last_membership_event_ids_to_include_in_from_to_range + # ) + # for prev_event_in_from_to_range in prev_events_in_from_to_range.values(): + # # 1b) 1c) Update the membership with what we found + # sync_room_id_set[prev_event_in_from_to_range.room_id] = ( + # convert_event_to_rooms_for_user(prev_event_in_from_to_range) + # ) # Since we fetched the users room list at some point in time after the from/to # tokens, we need to revert/rewind some membership changes to match the point in diff --git a/synapse/storage/databases/main/stream.py b/synapse/storage/databases/main/stream.py index 595245e70e..ed571b0de7 100644 --- a/synapse/storage/databases/main/stream.py +++ b/synapse/storage/databases/main/stream.py @@ -63,7 +63,7 @@ from typing_extensions import Literal from twisted.internet import defer -from synapse.api.constants import Direction, EventTypes, Membership +from synapse.api.constants import Direction from synapse.api.filtering import Filter from synapse.events import EventBase from synapse.logging.context import make_deferred_yieldable, run_in_background @@ -116,14 +116,13 @@ class _EventsAround: class CurrentStateDeltaMembership: """ Attributes: - event_id: The "current" membership event ID in this room. May be `None` if the - server is no longer in the room or a state reset happened. + event_id: The "current" membership event ID in this room. prev_event_id: The previous membership event in this room that was replaced by the "current" one. May be `None` if there was no previous membership event. room_id: The room ID of the membership event. """ - event_id: Optional[str] + event_id: str prev_event_id: Optional[str] room_id: str membership: str @@ -410,42 +409,6 @@ def _filter_results( return True -def _filter_results_by_stream( - lower_token: Optional[RoomStreamToken], - upper_token: Optional[RoomStreamToken], - instance_name: str, - stream_ordering: int, -) -> bool: - """ - Note: This function only works with "live" tokens with `stream_ordering` only. - - Returns True if the event persisted by the given instance at the given - topological/stream_ordering falls between the two tokens (taking a None - token to mean unbounded). - - Used to filter results from fetching events in the DB against the given - tokens. This is necessary to handle the case where the tokens include - position maps, which we handle by fetching more than necessary from the DB - and then filtering (rather than attempting to construct a complicated SQL - query). - """ - if lower_token: - assert lower_token.topological is None - - # If these are live tokens we compare the stream ordering against the - # writers stream position. - if stream_ordering <= lower_token.get_stream_pos_for_instance(instance_name): - return False - - if upper_token: - assert upper_token.topological is None - - if upper_token.get_stream_pos_for_instance(instance_name) < stream_ordering: - return False - - return True - - def filter_to_clause(event_filter: Optional[Filter]) -> Tuple[str, List[str]]: # NB: This may create SQL clauses that don't optimise well (and we don't # have indices on all possible clauses). E.g. it may create @@ -819,58 +782,74 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): min_from_id = from_key.stream max_to_id = to_key.get_max_stream_pos() - args: List[Any] = [EventTypes.Member, user_id, min_from_id, max_to_id] + args: List[Any] = [user_id, min_from_id, max_to_id] # TODO: It would be good to assert that the `to_token` is >= # the first row in `current_state_delta_stream` for the rooms we're # interested in. Otherwise, we will end up with empty results and not know # it. - # Note: There is no index for `(type, state_key)` in - # `current_state_delta_stream`. We also can't just add an index for - # `event_id` and join the `room_memberships` table by `event_id` because it - # may be `null` in `current_state_delta_stream` so nothing will match (it's - # `null` when the server is no longer in the room or a state reset happened - # and it was unset). + # We have to look-up events by `stream_ordering` because + # `current_state_delta_stream.event_id` can be `null` if the server is no + # longer in the room or a state reset happened and it was unset. + # `stream_ordering` is unique across the Synapse instance so this should + # work fine. sql = """ SELECT - s.event_id, + e.event_id, s.prev_event_id, s.room_id, s.instance_name, s.stream_id, + e.topological_ordering, m.membership FROM current_state_delta_stream AS s - WHERE s.type = ? AND s.state_key = ? + INNER JOIN events AS e ON e.stream_ordering = s.stream_id + INNER JOIN room_memberships AS m ON m.event_id = e.event_id + WHERE m.user_id = ? AND s.stream_id > ? AND s.stream_id <= ? ORDER BY s.stream_id ASC """ txn.execute(sql, args) - return [ - CurrentStateDeltaMembership( - event_id=event_id, - prev_event_id=prev_event_id, - room_id=room_id, - # We can assume that the membership is `LEAVE` as a default. This - # will happen when `current_state_delta_stream.event_id` is null - # because it was unset due to a state reset or the server is no - # longer in the room (everyone on our local server left). - membership=membership if membership else Membership.LEAVE, - # event_pos=PersistedEventPosition( - # instance_name=instance_name, - # stream=stream_ordering, - # ), - ) - for event_id, prev_event_id, room_id, instance_name, stream_ordering, membership in txn - if _filter_results_by_stream( + membership_changes: List[CurrentStateDeltaMembership] = [] + for ( + event_id, + prev_event_id, + room_id, + instance_name, + stream_ordering, + topological_ordering, + membership, + ) in txn: + assert event_id is not None + # `prev_event_id` can be `None` + assert room_id is not None + assert instance_name is not None + assert stream_ordering is not None + assert topological_ordering is not None + assert membership is not None + + if _filter_results( from_key, to_key, instance_name, + topological_ordering, stream_ordering, - ) - ] + ): + membership_changes.append( + CurrentStateDeltaMembership( + event_id=event_id, + prev_event_id=prev_event_id, + room_id=room_id, + membership=membership, + # event_pos=PersistedEventPosition( + # instance_name=instance_name, + # stream=stream_ordering, + # ), + ) + ) current_state_delta_membership_changes = await self.db_pool.runInteraction( "get_current_state_delta_membership_changes_for_user", f From fb8fbd489cb920b6d29282e3b2912a311bade162 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 25 Jun 2024 22:37:45 -0500 Subject: [PATCH 33/62] Just fetch full events for `get_current_state_delta_membership_changes_for_user(...)` Makes downstream logic simpler and although we may look-up some events we don't use, the lookup is all done in one go instead of fetching events from event_ids in a couple different places. --- synapse/handlers/sliding_sync.py | 203 +---------------------- synapse/storage/databases/main/stream.py | 54 +++++- 2 files changed, 51 insertions(+), 206 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index dbbbbc66bf..5d63099499 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -448,34 +448,20 @@ class SlidingSyncHandler: # Since we fetched a snapshot of the users room list at some point in time after # the from/to tokens, we need to revert/rewind some membership changes to match # the point in time of the `to_token`. - prev_event_ids_in_from_to_range: List[str] = [] for ( room_id, first_membership_change_after_to_token, ) in first_membership_change_by_room_id_after_to_token.items(): # 1a) Remove rooms that the user joined after the `to_token` - if first_membership_change_after_to_token.prev_event_id is None: + if first_membership_change_after_to_token.prev_event is None: sync_room_id_set.pop(room_id, None) # 1b) 1c) From the first membership event after the `to_token`, step backward to the # previous membership that would apply to the from/to range. else: - prev_event_ids_in_from_to_range.append( - first_membership_change_after_to_token.prev_event_id + sync_room_id_set[room_id] = convert_event_to_rooms_for_user( + first_membership_change_after_to_token.prev_event ) - # 1) Fixup (more) - # - # 1b) 1c) Fetch the previous membership events that apply to the from/to range - # and fixup our working list. - prev_events_in_from_to_range = await self.store.get_events( - prev_event_ids_in_from_to_range - ) - for prev_event_in_from_to_range in prev_events_in_from_to_range.values(): - # 1b) 1c) Update the membership with what we found - sync_room_id_set[prev_event_in_from_to_range.room_id] = ( - convert_event_to_rooms_for_user(prev_event_in_from_to_range) - ) - filtered_sync_room_id_set = { room_id: room_for_user for room_id, room_for_user in sync_room_id_set.items() @@ -516,195 +502,18 @@ class SlidingSyncHandler: ] = membership_change # 2) Fixup - last_membership_event_ids_to_include_in_from_to_range: List[str] = [] for ( last_membership_change_in_from_to_range ) in last_membership_change_by_room_id_in_from_to_range.values(): room_id = last_membership_change_in_from_to_range.room_id - sync_room_id_set[room_id] - # 2) Add back newly_left rooms (> `from_token` and <= `to_token`). We # include newly_left rooms because the last event that the user should see # is their own leave event if last_membership_change_in_from_to_range.membership == Membership.LEAVE: - # Save the look-up if we already have the `leave` event - if sync_room_id_set[room_id].event_id == last_membership_change_in_from_to_range.prev_event_id:: - filtered_sync_room_id_set[room_id] = sync_room_id_set[room_id] - else: - last_membership_event_ids_to_include_in_from_to_range.append(last_membership_change_in_from_to_range.event_id) - - # TODO - # last_membership_events_to_include_in_from_to_range = await self.store.get_events( - # last_membership_event_ids_to_include_in_from_to_range - # ) - # for prev_event_in_from_to_range in prev_events_in_from_to_range.values(): - # # 1b) 1c) Update the membership with what we found - # sync_room_id_set[prev_event_in_from_to_range.room_id] = ( - # convert_event_to_rooms_for_user(prev_event_in_from_to_range) - # ) - - # Since we fetched the users room list at some point in time after the from/to - # tokens, we need to revert/rewind some membership changes to match the point in - # time of the `to_token`. In particular, we need to make these fixups: - # - # - 1a) Remove rooms that the user joined after the `to_token` - # - 1b) Add back rooms that the user left after the `to_token` - # - 2) Add back newly_left rooms (> `from_token` and <= `to_token`) - - # # 1) ----------------------------------------------------- - - # # 1) Fetch membership changes that fall in the range from `to_token` up to - # # `membership_snapshot_token` - # # - # # If our `to_token` is already the same or ahead of the latest room membership - # # for the user, we don't need to do any "2)" fix-ups and can just straight-up - # # use the room list from the snapshot as a base (nothing has changed) - # membership_change_events_after_to_token = [] - # if not membership_snapshot_token.is_before_or_eq(to_token.room_key): - # membership_change_events_after_to_token = ( - # await self.store.get_membership_changes_for_user( - # user_id, - # from_key=to_token.room_key, - # to_key=membership_snapshot_token, - # excluded_rooms=self.rooms_to_exclude_globally, - # ) - # ) - - # # 1) Assemble a list of the last membership events in some given ranges. Someone - # # could have left and joined multiple times during the given range but we only - # # care about end-result so we grab the last one. - # last_membership_change_by_room_id_after_to_token: Dict[str, EventBase] = {} - # # We also need the first membership event after the `to_token` so we can step - # # backward to the previous membership that would apply to the from/to range. - # first_membership_change_by_room_id_after_to_token: Dict[str, EventBase] = {} - # for event in membership_change_events_after_to_token: - # last_membership_change_by_room_id_after_to_token[event.room_id] = event - # # Only set if we haven't already set it - # first_membership_change_by_room_id_after_to_token.setdefault( - # event.room_id, event - # ) - - # # 1) Fixup - # for ( - # last_membership_change_after_to_token - # ) in last_membership_change_by_room_id_after_to_token.values(): - # room_id = last_membership_change_after_to_token.room_id - - # # We want to find the first membership change after the `to_token` then step - # # backward to know the membership in the from/to range. - # first_membership_change_after_to_token = ( - # first_membership_change_by_room_id_after_to_token.get(room_id) - # ) - # assert first_membership_change_after_to_token is not None, ( - # "If there was a `last_membership_change_after_to_token` that we're iterating over, " - # + "then there should be corresponding a first change. For example, even if there " - # + "is only one event after the `to_token`, the first and last event will be same event. " - # + "This is probably a mistake in assembling the `last_membership_change_by_room_id_after_to_token`" - # + "/`first_membership_change_by_room_id_after_to_token` dicts above." - # ) - # # TODO: Instead of reading from `unsigned`, refactor this to use the - # # `current_state_delta_stream` table in the future. Probably a new - # # `get_membership_changes_for_user()` function that uses - # # `current_state_delta_stream` with a join to `room_memberships`. This would - # # help in state reset scenarios since `prev_content` is looking at the - # # current branch vs the current room state. This is all just data given to - # # the client so no real harm to data integrity, but we'd like to be nice to - # # the client. Since the `current_state_delta_stream` table is new, it - # # doesn't have all events in it. Since this is Sliding Sync, if we ever need - # # to, we can signal the client to throw all of their state away by sending - # # "operation: RESET". - # prev_content = first_membership_change_after_to_token.unsigned.get( - # "prev_content", {} - # ) - # prev_membership = prev_content.get("membership", None) - # prev_sender = first_membership_change_after_to_token.unsigned.get( - # "prev_sender", None - # ) - - # # Check if the previous membership (membership that applies to the from/to - # # range) should be included in our `sync_room_id_set` - # should_prev_membership_be_included = ( - # prev_membership is not None - # and prev_sender is not None - # and filter_membership_for_sync( - # membership=prev_membership, - # user_id=user_id, - # sender=prev_sender, - # ) - # ) - - # # Check if the last membership (membership that applies to our snapshot) was - # # already included in our `sync_room_id_set` - # was_last_membership_already_included = filter_membership_for_sync( - # membership=last_membership_change_after_to_token.membership, - # user_id=user_id, - # sender=last_membership_change_after_to_token.sender, - # ) - - # # 1a) Add back rooms that the user left after the `to_token` - # # - # # For example, if the last membership event after the `to_token` is a leave - # # event, then the room was excluded from `sync_room_id_set` when we first - # # crafted it above. We should add these rooms back as long as the user also - # # was part of the room before the `to_token`. - # if ( - # not was_last_membership_already_included - # and should_prev_membership_be_included - # ): - # # TODO: Assign the correct membership event at the `to_token` here - # # (currently we're setting it as the last event after the `to_token`) - # sync_room_id_set[room_id] = convert_event_to_rooms_for_user( - # last_membership_change_after_to_token - # ) - # # 1b) Remove rooms that the user joined (hasn't left) after the `to_token` - # # - # # For example, if the last membership event after the `to_token` is a "join" - # # event, then the room was included `sync_room_id_set` when we first crafted - # # it above. We should remove these rooms as long as the user also wasn't - # # part of the room before the `to_token`. - # elif ( - # was_last_membership_already_included - # and not should_prev_membership_be_included - # ): - # del sync_room_id_set[room_id] - - # # 2) ----------------------------------------------------- - # # We fix-up newly_left rooms after the first fixup because it may have removed - # # some left rooms that we can figure out are newly_left in the following code - - # # 2) Fetch membership changes that fall in the range from `from_token` up to `to_token` - # membership_change_events_in_from_to_range = [] - # if from_token: - # membership_change_events_in_from_to_range = ( - # await self.store.get_membership_changes_for_user( - # user_id, - # from_key=from_token.room_key, - # to_key=to_token.room_key, - # excluded_rooms=self.rooms_to_exclude_globally, - # ) - # ) - - # # 2) Assemble a list of the last membership events in some given ranges. Someone - # # could have left and joined multiple times during the given range but we only - # # care about end-result so we grab the last one. - # last_membership_change_by_room_id_in_from_to_range: Dict[str, EventBase] = {} - # for event in membership_change_events_in_from_to_range: - # last_membership_change_by_room_id_in_from_to_range[event.room_id] = event - - # # 2) Fixup - # for ( - # last_membership_change_in_from_to_range - # ) in last_membership_change_by_room_id_in_from_to_range.values(): - # room_id = last_membership_change_in_from_to_range.room_id - - # # 2) Add back newly_left rooms (> `from_token` and <= `to_token`). We - # # include newly_left rooms because the last event that the user should see - # # is their own leave event - # if last_membership_change_in_from_to_range.membership == Membership.LEAVE: - # sync_room_id_set[room_id] = convert_event_to_rooms_for_user( - # last_membership_change_in_from_to_range - # ) + filtered_sync_room_id_set[room_id] = convert_event_to_rooms_for_user( + last_membership_change_in_from_to_range.event + ) return filtered_sync_room_id_set diff --git a/synapse/storage/databases/main/stream.py b/synapse/storage/databases/main/stream.py index ed571b0de7..ce135ededc 100644 --- a/synapse/storage/databases/main/stream.py +++ b/synapse/storage/databases/main/stream.py @@ -113,21 +113,37 @@ class _EventsAround: @attr.s(slots=True, frozen=True, auto_attribs=True) -class CurrentStateDeltaMembership: +class _CurrentStateDeltaMembershipReturn: """ Attributes: event_id: The "current" membership event ID in this room. prev_event_id: The previous membership event in this room that was replaced by the "current" one. May be `None` if there was no previous membership event. room_id: The room ID of the membership event. + membership: The membership state of the user in the room. """ event_id: str prev_event_id: Optional[str] room_id: str membership: str - # Could be useful but we're not using it yet. - # event_pos: PersistedEventPosition + + +@attr.s(slots=True, frozen=True, auto_attribs=True) +class CurrentStateDeltaMembership: + """ + Attributes: + event: The "current" membership event in this room. + prev_event: The previous membership event in this room that was replaced by + the "current" one. May be `None` if there was no previous membership event. + room_id: The room ID of the membership event. + membership: The membership state of the user in the room. + """ + + event: EventBase + prev_event: Optional[EventBase] + room_id: str + membership: str def generate_pagination_where_clause( @@ -776,7 +792,7 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): if not has_changed: return [] - def f(txn: LoggingTransaction) -> List[CurrentStateDeltaMembership]: + def f(txn: LoggingTransaction) -> List[_CurrentStateDeltaMembershipReturn]: # To handle tokens with a non-empty instance_map we fetch more # results than necessary and then filter down min_from_id = from_key.stream @@ -813,7 +829,7 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): txn.execute(sql, args) - membership_changes: List[CurrentStateDeltaMembership] = [] + membership_changes: List[_CurrentStateDeltaMembershipReturn] = [] for ( event_id, prev_event_id, @@ -839,7 +855,7 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): stream_ordering, ): membership_changes.append( - CurrentStateDeltaMembership( + _CurrentStateDeltaMembershipReturn( event_id=event_id, prev_event_id=prev_event_id, room_id=room_id, @@ -851,17 +867,37 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): ) ) - current_state_delta_membership_changes = await self.db_pool.runInteraction( + return membership_changes + + membership_changes = await self.db_pool.runInteraction( "get_current_state_delta_membership_changes_for_user", f ) + # Fetch all events in one go + event_ids = [] + for m in membership_changes: + event_ids.append(m.event_id) + if m.prev_event_id is not None: + event_ids.append(m.prev_event_id) + + events = await self.get_events(event_ids, get_prev_content=False) + rooms_to_exclude: AbstractSet[str] = set() if excluded_rooms is not None: rooms_to_exclude = set(excluded_rooms) return [ - membership_change - for membership_change in current_state_delta_membership_changes + CurrentStateDeltaMembership( + event=events[membership_change.event_id], + prev_event=( + events[membership_change.prev_event_id] + if membership_change.prev_event_id + else None + ), + room_id=membership_change.room_id, + membership=membership_change.membership, + ) + for membership_change in membership_changes if membership_change.room_id not in rooms_to_exclude ] From d91aa0018ca082cc88a3b3bfb7e06d1becb74227 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 25 Jun 2024 22:45:59 -0500 Subject: [PATCH 34/62] Remove extras --- synapse/handlers/sliding_sync.py | 15 +++++---- synapse/storage/databases/main/stream.py | 40 +++++++----------------- 2 files changed, 21 insertions(+), 34 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index 5d63099499..fed663ac36 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -427,7 +427,7 @@ class SlidingSyncHandler: user_id, from_key=to_token.room_key, to_key=membership_snapshot_token, - excluded_rooms=self.rooms_to_exclude_globally, + excluded_room_ids=self.rooms_to_exclude_globally, ) ) @@ -440,7 +440,7 @@ class SlidingSyncHandler: for membership_change in current_state_delta_membership_changes_after_to_token: # Only set if we haven't already set it first_membership_change_by_room_id_after_to_token.setdefault( - membership_change.room_id, membership_change + membership_change.event.room_id, membership_change ) # 1) Fixup @@ -484,7 +484,7 @@ class SlidingSyncHandler: user_id, from_key=from_token.room_key, to_key=to_token.room_key, - excluded_rooms=self.rooms_to_exclude_globally, + excluded_room_ids=self.rooms_to_exclude_globally, ) ) @@ -498,19 +498,22 @@ class SlidingSyncHandler: membership_change ) in current_state_delta_membership_changes_in_from_to_range: last_membership_change_by_room_id_in_from_to_range[ - membership_change.room_id + membership_change.event.room_id ] = membership_change # 2) Fixup for ( last_membership_change_in_from_to_range ) in last_membership_change_by_room_id_in_from_to_range.values(): - room_id = last_membership_change_in_from_to_range.room_id + room_id = last_membership_change_in_from_to_range.event.room_id # 2) Add back newly_left rooms (> `from_token` and <= `to_token`). We # include newly_left rooms because the last event that the user should see # is their own leave event - if last_membership_change_in_from_to_range.membership == Membership.LEAVE: + if ( + last_membership_change_in_from_to_range.event.membership + == Membership.LEAVE + ): filtered_sync_room_id_set[room_id] = convert_event_to_rooms_for_user( last_membership_change_in_from_to_range.event ) diff --git a/synapse/storage/databases/main/stream.py b/synapse/storage/databases/main/stream.py index ce135ededc..efc0b88797 100644 --- a/synapse/storage/databases/main/stream.py +++ b/synapse/storage/databases/main/stream.py @@ -120,13 +120,11 @@ class _CurrentStateDeltaMembershipReturn: prev_event_id: The previous membership event in this room that was replaced by the "current" one. May be `None` if there was no previous membership event. room_id: The room ID of the membership event. - membership: The membership state of the user in the room. """ event_id: str prev_event_id: Optional[str] room_id: str - membership: str @attr.s(slots=True, frozen=True, auto_attribs=True) @@ -136,14 +134,10 @@ class CurrentStateDeltaMembership: event: The "current" membership event in this room. prev_event: The previous membership event in this room that was replaced by the "current" one. May be `None` if there was no previous membership event. - room_id: The room ID of the membership event. - membership: The membership state of the user in the room. """ event: EventBase prev_event: Optional[EventBase] - room_id: str - membership: str def generate_pagination_where_clause( @@ -771,7 +765,7 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): user_id: str, from_key: RoomStreamToken, to_key: RoomStreamToken, - excluded_rooms: Optional[List[str]] = None, + excluded_room_ids: Optional[List[str]] = None, ) -> List[CurrentStateDeltaMembership]: """ TODO @@ -817,8 +811,7 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): s.room_id, s.instance_name, s.stream_id, - e.topological_ordering, - m.membership + e.topological_ordering FROM current_state_delta_stream AS s INNER JOIN events AS e ON e.stream_ordering = s.stream_id INNER JOIN room_memberships AS m ON m.event_id = e.event_id @@ -837,7 +830,6 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): instance_name, stream_ordering, topological_ordering, - membership, ) in txn: assert event_id is not None # `prev_event_id` can be `None` @@ -845,7 +837,6 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): assert instance_name is not None assert stream_ordering is not None assert topological_ordering is not None - assert membership is not None if _filter_results( from_key, @@ -859,46 +850,39 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): event_id=event_id, prev_event_id=prev_event_id, room_id=room_id, - membership=membership, - # event_pos=PersistedEventPosition( - # instance_name=instance_name, - # stream=stream_ordering, - # ), ) ) return membership_changes - membership_changes = await self.db_pool.runInteraction( + raw_membership_changes = await self.db_pool.runInteraction( "get_current_state_delta_membership_changes_for_user", f ) # Fetch all events in one go event_ids = [] - for m in membership_changes: + for m in raw_membership_changes: event_ids.append(m.event_id) if m.prev_event_id is not None: event_ids.append(m.prev_event_id) events = await self.get_events(event_ids, get_prev_content=False) - rooms_to_exclude: AbstractSet[str] = set() - if excluded_rooms is not None: - rooms_to_exclude = set(excluded_rooms) + room_ids_to_exclude: AbstractSet[str] = set() + if excluded_room_ids is not None: + room_ids_to_exclude = set(excluded_room_ids) return [ CurrentStateDeltaMembership( - event=events[membership_change.event_id], + event=events[raw_membership_change.event_id], prev_event=( - events[membership_change.prev_event_id] - if membership_change.prev_event_id + events[raw_membership_change.prev_event_id] + if raw_membership_change.prev_event_id else None ), - room_id=membership_change.room_id, - membership=membership_change.membership, ) - for membership_change in membership_changes - if membership_change.room_id not in rooms_to_exclude + for raw_membership_change in raw_membership_changes + if raw_membership_change.room_id not in room_ids_to_exclude ] @cancellable From daa7e3691aa73f2d8a81de1823a0a44b54fe838f Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 25 Jun 2024 23:01:28 -0500 Subject: [PATCH 35/62] Add docstring --- synapse/storage/databases/main/stream.py | 32 ++++++++++++++++++++---- 1 file changed, 27 insertions(+), 5 deletions(-) diff --git a/synapse/storage/databases/main/stream.py b/synapse/storage/databases/main/stream.py index efc0b88797..730e55d135 100644 --- a/synapse/storage/databases/main/stream.py +++ b/synapse/storage/databases/main/stream.py @@ -768,12 +768,34 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): excluded_room_ids: Optional[List[str]] = None, ) -> List[CurrentStateDeltaMembership]: """ - TODO + Fetch membership events (and the previous event that was replaced by that one) + for a given user. - Note: This function only works with "live" tokens with `stream_ordering` only. + We're looking for membership changes in the token range (> `from_key` and <= + `to_key`). - All such events whose stream ordering `s` lies in the range `from_key < s <= - to_key` are returned. Events are sorted by `stream_ordering` ascending. + Please be mindful to only use this with `from_key` and `to_key` tokens that are + recent enough to be after when the first local user joined the room. Otherwise, + the results may be incomplete or too greedy. For example, if you use a token + range before the first local user joined the room, you will see 0 events since + `current_state_delta_stream` tracks what the server thinks is the current state + of the room as time goes. It does not track how state progresses from the + beginning of the room. So for example, when you remotely join a room, the first + rows will just be the state when you joined and progress from there. + + You can probably reasonably use this with `/sync` because the `to_key` passed in + will be the "current" now token and the range will cover when the user joined + the room. + + Args: + user_id: The user ID to fetch membership events for. + from_key: The point in the stream to sync from (fetching events > this point). + to_key: The token to fetch rooms up to (fetching events <= this point). + excluded_room_ids: Optional list of room IDs to exclude from the results. + + Returns: + All membership changes to the current state in the token range. Events are + sorted by `stream_ordering` ascending. """ # Start by ruling out cases where a DB query is not necessary. if from_key == to_key: @@ -794,7 +816,7 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): args: List[Any] = [user_id, min_from_id, max_to_id] - # TODO: It would be good to assert that the `to_token` is >= + # TODO: It would be good to assert that the `from_token`/`to_token` is >= # the first row in `current_state_delta_stream` for the rooms we're # interested in. Otherwise, we will end up with empty results and not know # it. From cccbd15e7ece55ec8aab2632fcb7099215b29c86 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 25 Jun 2024 23:40:55 -0500 Subject: [PATCH 36/62] Refactor back to not pulling out full events --- synapse/handlers/sliding_sync.py | 159 +++++++++++++---------- synapse/storage/databases/main/stream.py | 75 +++++------ 2 files changed, 125 insertions(+), 109 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index fed663ac36..c1cfec5000 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -28,7 +28,6 @@ from synapse.events import EventBase from synapse.events.utils import strip_event from synapse.handlers.relations import BundledAggregations from synapse.storage.databases.main.stream import CurrentStateDeltaMembership -from synapse.storage.roommember import RoomsForUser from synapse.types import ( JsonDict, PersistedEventPosition, @@ -48,27 +47,6 @@ if TYPE_CHECKING: logger = logging.getLogger(__name__) -def convert_event_to_rooms_for_user(event: EventBase) -> RoomsForUser: - """ - Quick helper to convert an event to a `RoomsForUser` object. - """ - # These fields should be present for all persisted events - assert event.internal_metadata.stream_ordering is not None - assert event.internal_metadata.instance_name is not None - - return RoomsForUser( - room_id=event.room_id, - sender=event.sender, - membership=event.membership, - event_id=event.event_id, - event_pos=PersistedEventPosition( - event.internal_metadata.instance_name, - event.internal_metadata.stream_ordering, - ), - room_version_id=event.room_version.identifier, - ) - - def filter_membership_for_sync(*, membership: str, user_id: str, sender: str) -> bool: """ Returns True if the membership event should be included in the sync response, @@ -108,6 +86,25 @@ class RoomSyncConfig: required_state: Set[Tuple[str, str]] +@attr.s(slots=True, frozen=True, auto_attribs=True) +class _RoomMembershipForUser: + """ + Attributes: + event_id: The event ID of the membership event + event_pos: The stream position of the membership event + membership: The membership state of the user in the room + sender: The person who sent the membership event + newly_joined: Whether the user newly joined the room during the given token + range + """ + + event_id: str + event_pos: PersistedEventPosition + membership: str + sender: str + newly_joined: bool + + class SlidingSyncHandler: def __init__(self, hs: "HomeServer"): self.clock = hs.get_clock() @@ -302,7 +299,7 @@ class SlidingSyncHandler: user=sync_config.user, room_id=room_id, room_sync_config=room_sync_config, - rooms_for_user_membership_at_to_token=sync_room_map[room_id], + rooms_membership_for_user_at_to_token=sync_room_map[room_id], from_token=from_token, to_token=to_token, ) @@ -321,7 +318,7 @@ class SlidingSyncHandler: user: UserID, to_token: StreamToken, from_token: Optional[StreamToken] = None, - ) -> Dict[str, RoomsForUser]: + ) -> Dict[str, _RoomMembershipForUser]: """ Fetch room IDs that should be listed for this user in the sync response (the full room list that will be filtered, sorted, and sliced). @@ -373,7 +370,13 @@ class SlidingSyncHandler: # Note: The `room_for_user` we're assigning here will need to be fixed up # (below) because they are potentially from the current snapshot time # instead from the time of the `to_token`. - room_for_user.room_id: room_for_user + room_for_user.room_id: _RoomMembershipForUser( + event_id=room_for_user.event_id, + event_pos=room_for_user.event_pos, + membership=room_for_user.membership, + sender=room_for_user.sender, + newly_joined=False, + ) for room_for_user in room_for_user_list } @@ -440,7 +443,7 @@ class SlidingSyncHandler: for membership_change in current_state_delta_membership_changes_after_to_token: # Only set if we haven't already set it first_membership_change_by_room_id_after_to_token.setdefault( - membership_change.event.room_id, membership_change + membership_change.room_id, membership_change ) # 1) Fixup @@ -448,27 +451,59 @@ class SlidingSyncHandler: # Since we fetched a snapshot of the users room list at some point in time after # the from/to tokens, we need to revert/rewind some membership changes to match # the point in time of the `to_token`. + prev_event_ids_in_from_to_range: List[str] = [] for ( room_id, first_membership_change_after_to_token, ) in first_membership_change_by_room_id_after_to_token.items(): # 1a) Remove rooms that the user joined after the `to_token` - if first_membership_change_after_to_token.prev_event is None: + if first_membership_change_after_to_token.prev_event_id is None: sync_room_id_set.pop(room_id, None) # 1b) 1c) From the first membership event after the `to_token`, step backward to the # previous membership that would apply to the from/to range. else: - sync_room_id_set[room_id] = convert_event_to_rooms_for_user( - first_membership_change_after_to_token.prev_event + prev_event_ids_in_from_to_range.append( + first_membership_change_after_to_token.prev_event_id ) + # 1) Fixup (more) + # + # 1b) 1c) Fetch the previous membership events that apply to the from/to range + # and fixup our working list. + prev_events_in_from_to_range = await self.store.get_events( + prev_event_ids_in_from_to_range + ) + for prev_event_in_from_to_range in prev_events_in_from_to_range.values(): + # These fields should be present for all persisted events + assert ( + prev_event_in_from_to_range.internal_metadata.instance_name is not None + ) + assert ( + prev_event_in_from_to_range.internal_metadata.stream_ordering + is not None + ) + + # 1b) 1c) Update the membership with what we found + sync_room_id_set[prev_event_in_from_to_range.room_id] = ( + _RoomMembershipForUser( + event_id=prev_event_in_from_to_range.event_id, + event_pos=PersistedEventPosition( + instance_name=prev_event_in_from_to_range.internal_metadata.instance_name, + stream=prev_event_in_from_to_range.internal_metadata.stream_ordering, + ), + membership=prev_event_in_from_to_range.membership, + sender=prev_event_in_from_to_range.sender, + newly_joined=False, + ) + ) + filtered_sync_room_id_set = { - room_id: room_for_user - for room_id, room_for_user in sync_room_id_set.items() + room_id: room_membership_for_user + for room_id, room_membership_for_user in sync_room_id_set.items() if filter_membership_for_sync( - membership=room_for_user.membership, + membership=room_membership_for_user.membership, user_id=user_id, - sender=room_for_user.sender, + sender=room_membership_for_user.sender, ) } @@ -498,35 +533,38 @@ class SlidingSyncHandler: membership_change ) in current_state_delta_membership_changes_in_from_to_range: last_membership_change_by_room_id_in_from_to_range[ - membership_change.event.room_id + membership_change.room_id ] = membership_change # 2) Fixup for ( last_membership_change_in_from_to_range ) in last_membership_change_by_room_id_in_from_to_range.values(): - room_id = last_membership_change_in_from_to_range.event.room_id + room_id = last_membership_change_in_from_to_range.room_id # 2) Add back newly_left rooms (> `from_token` and <= `to_token`). We # include newly_left rooms because the last event that the user should see # is their own leave event - if ( - last_membership_change_in_from_to_range.event.membership - == Membership.LEAVE - ): - filtered_sync_room_id_set[room_id] = convert_event_to_rooms_for_user( - last_membership_change_in_from_to_range.event + if last_membership_change_in_from_to_range.membership == Membership.LEAVE: + filtered_sync_room_id_set[room_id] = _RoomMembershipForUser( + event_id=last_membership_change_in_from_to_range.event_id, + event_pos=last_membership_change_in_from_to_range.event_pos, + membership=last_membership_change_in_from_to_range.membership, + sender=last_membership_change_in_from_to_range.sender, + newly_joined=False, ) + # TODO: Figure out `newly_joined` + return filtered_sync_room_id_set async def filter_rooms( self, user: UserID, - sync_room_map: Dict[str, RoomsForUser], + sync_room_map: Dict[str, _RoomMembershipForUser], filters: SlidingSyncConfig.SlidingSyncList.Filters, to_token: StreamToken, - ) -> Dict[str, RoomsForUser]: + ) -> Dict[str, _RoomMembershipForUser]: """ Filter rooms based on the sync request. @@ -627,9 +665,9 @@ class SlidingSyncHandler: async def sort_rooms( self, - sync_room_map: Dict[str, RoomsForUser], + sync_room_map: Dict[str, _RoomMembershipForUser], to_token: StreamToken, - ) -> List[Tuple[str, RoomsForUser]]: + ) -> List[Tuple[str, _RoomMembershipForUser]]: """ Sort by `stream_ordering` of the last event that the user should see in the room. `stream_ordering` is unique so we get a stable sort. @@ -682,7 +720,7 @@ class SlidingSyncHandler: user: UserID, room_id: str, room_sync_config: RoomSyncConfig, - rooms_for_user_membership_at_to_token: RoomsForUser, + rooms_membership_for_user_at_to_token: _RoomMembershipForUser, from_token: Optional[StreamToken], to_token: StreamToken, ) -> SlidingSyncResult.RoomResult: @@ -696,7 +734,7 @@ class SlidingSyncHandler: room_id: The room ID to fetch data for room_sync_config: Config for what data we should fetch for a room in the sync response. - rooms_for_user_membership_at_to_token: Membership information for the user + rooms_membership_for_user_at_to_token: Membership information for the user in the room at the time of `to_token`. from_token: The point in the stream to sync from. to_token: The point in the stream to sync up to. @@ -716,7 +754,7 @@ class SlidingSyncHandler: if ( room_sync_config.timeline_limit > 0 # No timeline for invite/knock rooms (just `stripped_state`) - and rooms_for_user_membership_at_to_token.membership + and rooms_membership_for_user_at_to_token.membership not in (Membership.INVITE, Membership.KNOCK) ): limited = False @@ -726,27 +764,15 @@ class SlidingSyncHandler: # position once we've fetched the events to point to the earliest event fetched. prev_batch_token = to_token - newly_joined = False - if ( - # We can only determine new-ness if we have a `from_token` to define our range - from_token is not None - and rooms_for_user_membership_at_to_token.membership == Membership.JOIN - ): - newly_joined = ( - rooms_for_user_membership_at_to_token.event_pos.persisted_after( - from_token.room_key - ) - ) - # We're going to paginate backwards from the `to_token` from_bound = to_token.room_key # People shouldn't see past their leave/ban event - if rooms_for_user_membership_at_to_token.membership in ( + if rooms_membership_for_user_at_to_token.membership in ( Membership.LEAVE, Membership.BAN, ): from_bound = ( - rooms_for_user_membership_at_to_token.event_pos.to_room_stream_token() + rooms_membership_for_user_at_to_token.event_pos.to_room_stream_token() ) # Determine whether we should limit the timeline to the token range. @@ -760,7 +786,8 @@ class SlidingSyncHandler: # connection before to_bound = ( from_token.room_key - if from_token is not None and not newly_joined + if from_token is not None + and not rooms_membership_for_user_at_to_token.newly_joined else None ) @@ -797,7 +824,7 @@ class SlidingSyncHandler: self.storage_controllers, user.to_string(), timeline_events, - is_peeking=rooms_for_user_membership_at_to_token.membership + is_peeking=rooms_membership_for_user_at_to_token.membership != Membership.JOIN, filter_send_to_client=True, ) @@ -852,12 +879,12 @@ class SlidingSyncHandler: # Figure out any stripped state events for invite/knocks. This allows the # potential joiner to identify the room. stripped_state: List[JsonDict] = [] - if rooms_for_user_membership_at_to_token.membership in ( + if rooms_membership_for_user_at_to_token.membership in ( Membership.INVITE, Membership.KNOCK, ): invite_or_knock_event = await self.store.get_event( - rooms_for_user_membership_at_to_token.event_id + rooms_membership_for_user_at_to_token.event_id ) stripped_state = [] diff --git a/synapse/storage/databases/main/stream.py b/synapse/storage/databases/main/stream.py index 730e55d135..c5e6537980 100644 --- a/synapse/storage/databases/main/stream.py +++ b/synapse/storage/databases/main/stream.py @@ -112,32 +112,25 @@ class _EventsAround: end: RoomStreamToken -@attr.s(slots=True, frozen=True, auto_attribs=True) -class _CurrentStateDeltaMembershipReturn: - """ - Attributes: - event_id: The "current" membership event ID in this room. - prev_event_id: The previous membership event in this room that was replaced by - the "current" one. May be `None` if there was no previous membership event. - room_id: The room ID of the membership event. - """ - - event_id: str - prev_event_id: Optional[str] - room_id: str - - @attr.s(slots=True, frozen=True, auto_attribs=True) class CurrentStateDeltaMembership: """ Attributes: - event: The "current" membership event in this room. - prev_event: The previous membership event in this room that was replaced by + event_id: The "current" membership event ID in this room. + event_pos: The position of the "current" membership event in the event stream. + prev_event_id: The previous membership event in this room that was replaced by the "current" one. May be `None` if there was no previous membership event. + room_id: The room ID of the membership event. + membership: The membership state of the user in the room + sender: The person who sent the membership event """ - event: EventBase - prev_event: Optional[EventBase] + event_id: str + event_pos: PersistedEventPosition + prev_event_id: Optional[str] + room_id: str + membership: str + sender: str def generate_pagination_where_clause( @@ -808,7 +801,7 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): if not has_changed: return [] - def f(txn: LoggingTransaction) -> List[_CurrentStateDeltaMembershipReturn]: + def f(txn: LoggingTransaction) -> List[CurrentStateDeltaMembership]: # To handle tokens with a non-empty instance_map we fetch more # results than necessary and then filter down min_from_id = from_key.stream @@ -833,7 +826,9 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): s.room_id, s.instance_name, s.stream_id, - e.topological_ordering + e.topological_ordering, + m.membership, + e.sender FROM current_state_delta_stream AS s INNER JOIN events AS e ON e.stream_ordering = s.stream_id INNER JOIN room_memberships AS m ON m.event_id = e.event_id @@ -844,7 +839,7 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): txn.execute(sql, args) - membership_changes: List[_CurrentStateDeltaMembershipReturn] = [] + membership_changes: List[CurrentStateDeltaMembership] = [] for ( event_id, prev_event_id, @@ -852,6 +847,8 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): instance_name, stream_ordering, topological_ordering, + membership, + sender, ) in txn: assert event_id is not None # `prev_event_id` can be `None` @@ -859,6 +856,8 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): assert instance_name is not None assert stream_ordering is not None assert topological_ordering is not None + assert membership is not None + assert sender is not None if _filter_results( from_key, @@ -868,43 +867,33 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): stream_ordering, ): membership_changes.append( - _CurrentStateDeltaMembershipReturn( + CurrentStateDeltaMembership( event_id=event_id, + event_pos=PersistedEventPosition( + instance_name=instance_name, + stream=stream_ordering, + ), prev_event_id=prev_event_id, room_id=room_id, + membership=membership, + sender=sender, ) ) return membership_changes - raw_membership_changes = await self.db_pool.runInteraction( + membership_changes = await self.db_pool.runInteraction( "get_current_state_delta_membership_changes_for_user", f ) - # Fetch all events in one go - event_ids = [] - for m in raw_membership_changes: - event_ids.append(m.event_id) - if m.prev_event_id is not None: - event_ids.append(m.prev_event_id) - - events = await self.get_events(event_ids, get_prev_content=False) - room_ids_to_exclude: AbstractSet[str] = set() if excluded_room_ids is not None: room_ids_to_exclude = set(excluded_room_ids) return [ - CurrentStateDeltaMembership( - event=events[raw_membership_change.event_id], - prev_event=( - events[raw_membership_change.prev_event_id] - if raw_membership_change.prev_event_id - else None - ), - ) - for raw_membership_change in raw_membership_changes - if raw_membership_change.room_id not in room_ids_to_exclude + membership_change + for membership_change in membership_changes + if membership_change.room_id not in room_ids_to_exclude ] @cancellable From 62c6a4e8609f5d563b85f576d0a4d5b764c1f9c2 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 26 Jun 2024 01:10:00 -0500 Subject: [PATCH 37/62] Add `newly_joined` support to `get_sync_room_ids_for_user(...)` --- synapse/handlers/sliding_sync.py | 82 +++++++++- tests/handlers/test_sliding_sync.py | 224 +++++++++++++++++++++++++++- 2 files changed, 300 insertions(+), 6 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index c1cfec5000..97b04698b2 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -18,7 +18,8 @@ # # import logging -from typing import TYPE_CHECKING, Dict, List, Optional, Set, Tuple +from collections import defaultdict +from typing import TYPE_CHECKING, Any, Dict, List, Optional, Set, Tuple import attr from immutabledict import immutabledict @@ -104,6 +105,9 @@ class _RoomMembershipForUser: sender: str newly_joined: bool + def copy_and_replace(self, **kwds: Any) -> "_RoomMembershipForUser": + return attr.evolve(self, **kwds) + class SlidingSyncHandler: def __init__(self, hs: "HomeServer"): @@ -414,6 +418,7 @@ class SlidingSyncHandler: # - 1b) Add back rooms that the user left after the `to_token` # - 1c) Update room membership events to the point in time of the `to_token` # - 2) Add back newly_left rooms (> `from_token` and <= `to_token`) + # - 3) Figure out which rooms are `newly_joined` # 1) ----------------------------------------------------- @@ -529,19 +534,49 @@ class SlidingSyncHandler: last_membership_change_by_room_id_in_from_to_range: Dict[ str, CurrentStateDeltaMembership ] = {} + # We also want to assemble a list of the first membership events during the token + # range so we can step backward to the previous membership that would apply to + # before the token range to see if we have `newly_joined` the room. + first_membership_change_by_room_id_in_from_to_range: Dict[ + str, CurrentStateDeltaMembership + ] = {} + non_join_event_ids_by_room_id_in_from_to_range: Dict[str, List[str]] = ( + defaultdict(list) + ) for ( membership_change ) in current_state_delta_membership_changes_in_from_to_range: - last_membership_change_by_room_id_in_from_to_range[ - membership_change.room_id - ] = membership_change + room_id = membership_change.room_id + + last_membership_change_by_room_id_in_from_to_range[room_id] = ( + membership_change + ) + + # Only set if we haven't already set it + first_membership_change_by_room_id_in_from_to_range.setdefault( + room_id, membership_change + ) + + if membership_change.membership != Membership.JOIN: + non_join_event_ids_by_room_id_in_from_to_range[room_id].append( + membership_change.event_id + ) # 2) Fixup + # + # 3) We also want to assemble a list of possibly newly joined rooms. Someone + # could have left and joined multiple times during the given range but we only + # care about whether they are joined at the end of the token range so we are + # working with the last membership even in the token range. + possibly_newly_joined_room_ids = set() for ( last_membership_change_in_from_to_range ) in last_membership_change_by_room_id_in_from_to_range.values(): room_id = last_membership_change_in_from_to_range.room_id + if last_membership_change_in_from_to_range.membership == Membership.JOIN: + possibly_newly_joined_room_ids.add(room_id) + # 2) Add back newly_left rooms (> `from_token` and <= `to_token`). We # include newly_left rooms because the last event that the user should see # is their own leave event @@ -554,7 +589,44 @@ class SlidingSyncHandler: newly_joined=False, ) - # TODO: Figure out `newly_joined` + # 3) Figure out `newly_joined` + prev_event_ids_before_token_range: List[str] = [] + for possibly_newly_joined_room_id in possibly_newly_joined_room_ids: + non_joins_for_room = non_join_event_ids_by_room_id_in_from_to_range[ + possibly_newly_joined_room_id + ] + if len(non_joins_for_room) > 0: + # We found a `newly_joined` room (we left and joined within the token range) + filtered_sync_room_id_set[room_id] = filtered_sync_room_id_set[ + room_id + ].copy_and_replace(newly_joined=True) + else: + prev_event_id = first_membership_change_by_room_id_in_from_to_range[ + room_id + ].prev_event_id + + if prev_event_id is None: + # We found a `newly_joined` room (we are joining the room for the + # first time within the token range) + filtered_sync_room_id_set[room_id] = filtered_sync_room_id_set[ + room_id + ].copy_and_replace(newly_joined=True) + else: + # Last resort, we need to step back to the previous membership event + # just before the token range to see if we're joined then or not. + prev_event_ids_before_token_range.append(prev_event_id) + + # 3) more + prev_events_before_token_range = await self.store.get_events( + prev_event_ids_before_token_range + ) + for prev_event_before_token_range in prev_events_before_token_range.values(): + if prev_event_before_token_range.membership != Membership.JOIN: + # We found a `newly_joined` room (we left before the token range + # and joined within the token range) + filtered_sync_room_id_set[room_id] = filtered_sync_room_id_set[ + room_id + ].copy_and_replace(newly_joined=True) return filtered_sync_room_id_set diff --git a/tests/handlers/test_sliding_sync.py b/tests/handlers/test_sliding_sync.py index 694fd17a02..c25ca41098 100644 --- a/tests/handlers/test_sliding_sync.py +++ b/tests/handlers/test_sliding_sync.py @@ -116,6 +116,9 @@ class GetSyncRoomIdsForUserTestCase(HomeserverTestCase): room_id_results[room_id].event_id, join_response["event_id"], ) + # We should be considered `newly_joined` because we joined during the token + # range + self.assertEqual(room_id_results[room_id].newly_joined, True) def test_get_already_joined_room(self) -> None: """ @@ -146,6 +149,8 @@ class GetSyncRoomIdsForUserTestCase(HomeserverTestCase): room_id_results[room_id].event_id, join_response["event_id"], ) + # We should *NOT* be `newly_joined` because we joined before the token range + self.assertEqual(room_id_results[room_id].newly_joined, False) def test_get_invited_banned_knocked_room(self) -> None: """ @@ -232,6 +237,11 @@ class GetSyncRoomIdsForUserTestCase(HomeserverTestCase): room_id_results[knock_room_id].event_id, knock_room_membership_state_event.event_id, ) + # We should *NOT* be `newly_joined` because we were not joined at the the time + # of the `to_token`. + self.assertEqual(room_id_results[invited_room_id].newly_joined, False) + self.assertEqual(room_id_results[ban_room_id].newly_joined, False) + self.assertEqual(room_id_results[knock_room_id].newly_joined, False) def test_get_kicked_room(self) -> None: """ @@ -277,6 +287,9 @@ class GetSyncRoomIdsForUserTestCase(HomeserverTestCase): room_id_results[kick_room_id].event_id, kick_response["event_id"], ) + # We should *NOT* be `newly_joined` because we were not joined at the the time + # of the `to_token`. + self.assertEqual(room_id_results[kick_room_id].newly_joined, False) def test_forgotten_rooms(self) -> None: """ @@ -396,6 +409,8 @@ class GetSyncRoomIdsForUserTestCase(HomeserverTestCase): room_id_results[room_id2].event_id, leave_response["event_id"], ) + # We should *NOT* be `newly_joined` because we are instead `newly_left` + self.assertEqual(room_id_results[room_id2].newly_joined, False) def test_no_joins_after_to_token(self) -> None: """ @@ -432,6 +447,8 @@ class GetSyncRoomIdsForUserTestCase(HomeserverTestCase): room_id_results[room_id1].event_id, join_response1["event_id"], ) + # We should be `newly_joined` because we joined during the token range + self.assertEqual(room_id_results[room_id1].newly_joined, True) def test_join_during_range_and_left_room_after_to_token(self) -> None: """ @@ -477,6 +494,8 @@ class GetSyncRoomIdsForUserTestCase(HomeserverTestCase): } ), ) + # We should be `newly_joined` because we joined during the token range + self.assertEqual(room_id_results[room_id1].newly_joined, True) def test_join_before_range_and_left_room_after_to_token(self) -> None: """ @@ -519,6 +538,8 @@ class GetSyncRoomIdsForUserTestCase(HomeserverTestCase): } ), ) + # We should *NOT* be `newly_joined` because we joined before the token range + self.assertEqual(room_id_results[room_id1].newly_joined, False) def test_kicked_before_range_and_left_after_to_token(self) -> None: """ @@ -581,6 +602,8 @@ class GetSyncRoomIdsForUserTestCase(HomeserverTestCase): } ), ) + # We should *NOT* be `newly_joined` because we were kicked + self.assertEqual(room_id_results[kick_room_id].newly_joined, False) def test_newly_left_during_range_and_join_leave_after_to_token(self) -> None: """ @@ -632,6 +655,8 @@ class GetSyncRoomIdsForUserTestCase(HomeserverTestCase): } ), ) + # We should *NOT* be `newly_joined` because we left during the token range + self.assertEqual(room_id_results[room_id1].newly_joined, False) def test_newly_left_during_range_and_join_after_to_token(self) -> None: """ @@ -681,6 +706,8 @@ class GetSyncRoomIdsForUserTestCase(HomeserverTestCase): } ), ) + # We should *NOT* be `newly_joined` because we left during the token range + self.assertEqual(room_id_results[room_id1].newly_joined, False) def test_no_from_token(self) -> None: """ @@ -727,6 +754,9 @@ class GetSyncRoomIdsForUserTestCase(HomeserverTestCase): room_id_results[room_id1].event_id, join_response1["event_id"], ) + # We should *NOT* be `newly_joined` because there is no `from_token` to + # define a "live" range to compare against + self.assertEqual(room_id_results[room_id1].newly_joined, False) def test_from_token_ahead_of_to_token(self) -> None: """ @@ -793,6 +823,8 @@ class GetSyncRoomIdsForUserTestCase(HomeserverTestCase): room_id_results[room_id1].event_id, join_response1["event_id"], ) + # We should *NOT* be `newly_joined` because we joined `room1` before either of the tokens + self.assertEqual(room_id_results[room_id1].newly_joined, False) def test_leave_before_range_and_join_leave_after_to_token(self) -> None: """ @@ -920,6 +952,8 @@ class GetSyncRoomIdsForUserTestCase(HomeserverTestCase): } ), ) + # We should be `newly_joined` because we joined during the token range + self.assertEqual(room_id_results[room_id1].newly_joined, True) def test_join_leave_multiple_times_before_range_and_after_to_token( self, @@ -976,6 +1010,8 @@ class GetSyncRoomIdsForUserTestCase(HomeserverTestCase): } ), ) + # We should *NOT* be `newly_joined` because we joined before the token range + self.assertEqual(room_id_results[room_id1].newly_joined, False) def test_invite_before_range_and_join_leave_after_to_token( self, @@ -1028,8 +1064,11 @@ class GetSyncRoomIdsForUserTestCase(HomeserverTestCase): } ), ) + # We should *NOT* be `newly_joined` because we were only invited before the + # token range + self.assertEqual(room_id_results[room_id1].newly_joined, False) - def test_display_name_changes_in_token_range( + def test_join_and_display_name_changes_in_token_range( self, ) -> None: """ @@ -1101,6 +1140,68 @@ class GetSyncRoomIdsForUserTestCase(HomeserverTestCase): } ), ) + # We should be `newly_joined` because we joined during the token range + self.assertEqual(room_id_results[room_id1].newly_joined, True) + + def test_display_name_changes_in_token_range( + self, + ) -> None: + """ + Test that we point to the correct membership event within the from/to range even + if there is `displayname`/`avatar_url` updates. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + + # We create the room with user2 so the room isn't left with no members when we + # leave and can still re-join. + room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok, is_public=True) + join_response = self.helper.join(room_id1, user1_id, tok=user1_tok) + + after_room1_token = self.event_sources.get_current_token() + + # Update the displayname during the token range + displayname_change_during_token_range_response = self.helper.send_state( + room_id1, + event_type=EventTypes.Member, + state_key=user1_id, + body={ + "membership": Membership.JOIN, + "displayname": "displayname during token range", + }, + tok=user1_tok, + ) + + after_change1_token = self.event_sources.get_current_token() + + room_id_results = self.get_success( + self.sliding_sync_handler.get_sync_room_ids_for_user( + UserID.from_string(user1_id), + from_token=after_room1_token, + to_token=after_change1_token, + ) + ) + + # Room should show up because we were joined during the from/to range + self.assertEqual(room_id_results.keys(), {room_id1}) + # It should be pointing to the latest membership event in the from/to range + self.assertEqual( + room_id_results[room_id1].event_id, + displayname_change_during_token_range_response["event_id"], + "Corresponding map to disambiguate the opaque event IDs: " + + str( + { + "join_response": join_response["event_id"], + "displayname_change_during_token_range_response": displayname_change_during_token_range_response[ + "event_id" + ], + } + ), + ) + # We should *NOT* be `newly_joined` because we joined before the token range + self.assertEqual(room_id_results[room_id1].newly_joined, False) def test_display_name_changes_before_and_after_token_range( self, @@ -1172,6 +1273,8 @@ class GetSyncRoomIdsForUserTestCase(HomeserverTestCase): } ), ) + # We should *NOT* be `newly_joined` because we joined before the token range + self.assertEqual(room_id_results[room_id1].newly_joined, False) def test_display_name_changes_leave_after_token_range( self, @@ -1250,6 +1353,8 @@ class GetSyncRoomIdsForUserTestCase(HomeserverTestCase): } ), ) + # We should be `newly_joined` because we joined during the token range + self.assertEqual(room_id_results[room_id1].newly_joined, True) def test_display_name_changes_join_after_token_range( self, @@ -1298,6 +1403,123 @@ class GetSyncRoomIdsForUserTestCase(HomeserverTestCase): # Room shouldn't show up because we joined after the from/to range self.assertEqual(room_id_results.keys(), set()) + def test_newly_joined_with_leave_join_in_token_range( + self, + ) -> None: + """ + Test that `newly_joined` TODO + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + + # We create the room with user2 so the room isn't left with no members when we + # leave and can still re-join. + room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok, is_public=True) + self.helper.join(room_id1, user1_id, tok=user1_tok) + + after_room1_token = self.event_sources.get_current_token() + + # Leave and join back during the token range + self.helper.leave(room_id1, user1_id, tok=user1_tok) + join_response2 = self.helper.join(room_id1, user1_id, tok=user1_tok) + + after_more_changes_token = self.event_sources.get_current_token() + + room_id_results = self.get_success( + self.sliding_sync_handler.get_sync_room_ids_for_user( + UserID.from_string(user1_id), + from_token=after_room1_token, + to_token=after_more_changes_token, + ) + ) + + # Room should show up because we were joined during the from/to range + self.assertEqual(room_id_results.keys(), {room_id1}) + # It should be pointing to the latest membership event in the from/to range + self.assertEqual( + room_id_results[room_id1].event_id, + join_response2["event_id"], + ) + # We should be considered `newly_joined` because there is some non-join event in + # between our latest join event. + self.assertEqual(room_id_results[room_id1].newly_joined, True) + + def test_newly_joined_only_joins_during_token_range( + self, + ) -> None: + """ + Test that a join and more joins caused by display name changes, all during the + token range, still count as `newly_joined`. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + + before_room1_token = self.event_sources.get_current_token() + + # We create the room with user2 so the room isn't left with no members when we + # leave and can still re-join. + room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok, is_public=True) + # Join, leave, join back to the room before the from/to range + join_response1 = self.helper.join(room_id1, user1_id, tok=user1_tok) + # Update the displayname during the token range (looks like another join) + displayname_change_during_token_range_response1 = self.helper.send_state( + room_id1, + event_type=EventTypes.Member, + state_key=user1_id, + body={ + "membership": Membership.JOIN, + "displayname": "displayname during token range", + }, + tok=user1_tok, + ) + # Update the displayname during the token range (looks like another join) + displayname_change_during_token_range_response2 = self.helper.send_state( + room_id1, + event_type=EventTypes.Member, + state_key=user1_id, + body={ + "membership": Membership.JOIN, + "displayname": "displayname during token range", + }, + tok=user1_tok, + ) + + after_room1_token = self.event_sources.get_current_token() + + room_id_results = self.get_success( + self.sliding_sync_handler.get_sync_room_ids_for_user( + UserID.from_string(user1_id), + from_token=before_room1_token, + to_token=after_room1_token, + ) + ) + + # Room should show up because it was newly_left and joined during the from/to range + self.assertEqual(room_id_results.keys(), {room_id1}) + # It should be pointing to the latest membership event in the from/to range + self.assertEqual( + room_id_results[room_id1].event_id, + displayname_change_during_token_range_response2["event_id"], + "Corresponding map to disambiguate the opaque event IDs: " + + str( + { + "join_response1": join_response1["event_id"], + "displayname_change_during_token_range_response1": displayname_change_during_token_range_response1[ + "event_id" + ], + "displayname_change_during_token_range_response2": displayname_change_during_token_range_response2[ + "event_id" + ], + } + ), + ) + # We should be `newly_joined` because we first joined during the token range + self.assertEqual(room_id_results[room_id1].newly_joined, True) + def test_multiple_rooms_are_not_confused( self, ) -> None: From 39259f66fa8ccd13818b8a5681b81fa020a8d4d2 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 26 Jun 2024 01:16:46 -0500 Subject: [PATCH 38/62] Join both tables with stream_ordering --- synapse/storage/databases/main/stream.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/storage/databases/main/stream.py b/synapse/storage/databases/main/stream.py index c5e6537980..2646dfd9cb 100644 --- a/synapse/storage/databases/main/stream.py +++ b/synapse/storage/databases/main/stream.py @@ -831,7 +831,7 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): e.sender FROM current_state_delta_stream AS s INNER JOIN events AS e ON e.stream_ordering = s.stream_id - INNER JOIN room_memberships AS m ON m.event_id = e.event_id + INNER JOIN room_memberships AS m ON m.event_stream_ordering = s.stream_id WHERE m.user_id = ? AND s.stream_id > ? AND s.stream_id <= ? ORDER BY s.stream_id ASC From c60aca755b35f9e655b2f2c71367ba5806db64e5 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 26 Jun 2024 02:02:34 -0500 Subject: [PATCH 39/62] Fix clause change --- synapse/storage/databases/main/stream.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/synapse/storage/databases/main/stream.py b/synapse/storage/databases/main/stream.py index 2646dfd9cb..562dc6eacf 100644 --- a/synapse/storage/databases/main/stream.py +++ b/synapse/storage/databases/main/stream.py @@ -934,6 +934,7 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): ignore_room_clause, ignore_room_args = make_in_list_sql_clause( txn.database_engine, "e.room_id", excluded_rooms, negative=True ) + ignore_room_clause = f"AND {ignore_room_clause}" args += ignore_room_args sql = """ @@ -948,6 +949,8 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): ignore_room_clause, ) + logger.info("get_membership_changes_for_user: %s", sql) + txn.execute(sql, args) rows = [ From 11db1befa2845f89d09be78e32d53b9b4b9bbad4 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 26 Jun 2024 02:05:25 -0500 Subject: [PATCH 40/62] Remove debug log --- synapse/storage/databases/main/stream.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/synapse/storage/databases/main/stream.py b/synapse/storage/databases/main/stream.py index 562dc6eacf..f6be97698e 100644 --- a/synapse/storage/databases/main/stream.py +++ b/synapse/storage/databases/main/stream.py @@ -949,8 +949,6 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): ignore_room_clause, ) - logger.info("get_membership_changes_for_user: %s", sql) - txn.execute(sql, args) rows = [ From 7395e1042072b3ab9f04898afa3989fda55a0978 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 26 Jun 2024 02:19:31 -0500 Subject: [PATCH 41/62] Fix `builtins.SyntaxError: EOL while scanning string literal (test_sync.py, line 1885)` See https://github.com/element-hq/synapse/actions/runs/9675073109/job/26692003103?pr=17320#step:9:5552 Worked fine locally but failed in CI with Python 3.8 --- tests/rest/client/test_sync.py | 20 +++++--------------- 1 file changed, 5 insertions(+), 15 deletions(-) diff --git a/tests/rest/client/test_sync.py b/tests/rest/client/test_sync.py index 338149f09a..bd1e7d521b 100644 --- a/tests/rest/client/test_sync.py +++ b/tests/rest/client/test_sync.py @@ -1882,9 +1882,7 @@ class SlidingSyncTestCase(unittest.HomeserverTestCase): channel = self.make_request( "POST", self.sync_endpoint - + f"?pos={self.get_success( - from_token.to_string(self.store) - )}", + + f"?pos={self.get_success(from_token.to_string(self.store))}", { "lists": { "foo-list": { @@ -2074,9 +2072,7 @@ class SlidingSyncTestCase(unittest.HomeserverTestCase): channel = self.make_request( "POST", self.sync_endpoint - + f"?pos={self.get_success( - from_token.to_string(self.store) - )}", + + f"?pos={self.get_success(from_token.to_string(self.store))}", { "lists": { "foo-list": { @@ -2323,9 +2319,7 @@ class SlidingSyncTestCase(unittest.HomeserverTestCase): channel = self.make_request( "POST", self.sync_endpoint - + f"?pos={self.get_success( - from_token.to_string(self.store) - )}", + + f"?pos={self.get_success(from_token.to_string(self.store))}", { "lists": { "foo-list": { @@ -2493,9 +2487,7 @@ class SlidingSyncTestCase(unittest.HomeserverTestCase): channel = self.make_request( "POST", self.sync_endpoint - + f"?pos={self.get_success( - from_token.to_string(self.store) - )}", + + f"?pos={self.get_success(from_token.to_string(self.store))}", { "lists": { "foo-list": { @@ -2563,9 +2555,7 @@ class SlidingSyncTestCase(unittest.HomeserverTestCase): channel = self.make_request( "POST", self.sync_endpoint - + f"?pos={self.get_success( - from_token.to_string(self.store) - )}", + + f"?pos={self.get_success(from_token.to_string(self.store))}", { "lists": { "foo-list": { From 2bf39231ede3a9bcad65ad3f1321e788acfdcd15 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 26 Jun 2024 18:40:36 -0500 Subject: [PATCH 42/62] Add some tests for `get_current_state_delta_membership_changes_for_user(...)` --- synapse/storage/databases/main/stream.py | 14 +- tests/storage/test_stream.py | 515 +++++++++++++++++++++++ 2 files changed, 523 insertions(+), 6 deletions(-) diff --git a/synapse/storage/databases/main/stream.py b/synapse/storage/databases/main/stream.py index f6be97698e..e222f36bab 100644 --- a/synapse/storage/databases/main/stream.py +++ b/synapse/storage/databases/main/stream.py @@ -63,7 +63,7 @@ from typing_extensions import Literal from twisted.internet import defer -from synapse.api.constants import Direction +from synapse.api.constants import Direction, EventTypes from synapse.api.filtering import Filter from synapse.events import EventBase from synapse.logging.context import make_deferred_yieldable, run_in_background @@ -807,7 +807,7 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): min_from_id = from_key.stream max_to_id = to_key.get_max_stream_pos() - args: List[Any] = [user_id, min_from_id, max_to_id] + args: List[Any] = [min_from_id, max_to_id, user_id, EventTypes.Member] # TODO: It would be good to assert that the `from_token`/`to_token` is >= # the first row in `current_state_delta_stream` for the rooms we're @@ -824,16 +824,18 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): e.event_id, s.prev_event_id, s.room_id, - s.instance_name, - s.stream_id, + e.instance_name, + e.stream_ordering, e.topological_ordering, m.membership, e.sender FROM current_state_delta_stream AS s INNER JOIN events AS e ON e.stream_ordering = s.stream_id INNER JOIN room_memberships AS m ON m.event_stream_ordering = s.stream_id - WHERE m.user_id = ? - AND s.stream_id > ? AND s.stream_id <= ? + WHERE s.stream_id > ? AND s.stream_id <= ? + AND m.user_id = ? + AND s.state_key = m.user_id + AND s.type = ? ORDER BY s.stream_id ASC """ diff --git a/tests/storage/test_stream.py b/tests/storage/test_stream.py index fe1e873e15..64f123987a 100644 --- a/tests/storage/test_stream.py +++ b/tests/storage/test_stream.py @@ -28,9 +28,12 @@ from twisted.test.proto_helpers import MemoryReactor from synapse.api.constants import Direction, EventTypes, RelationTypes from synapse.api.filtering import Filter +from synapse.api.room_versions import RoomVersions +from synapse.events import make_event_from_dict from synapse.rest import admin from synapse.rest.client import login, room from synapse.server import HomeServer +from synapse.storage.databases.main.stream import CurrentStateDeltaMembership from synapse.types import JsonDict, PersistedEventPosition, RoomStreamToken from synapse.util import Clock @@ -543,3 +546,515 @@ class GetLastEventInRoomBeforeStreamOrderingTestCase(HomeserverTestCase): } ), ) + + +class GetCurrentStateDeltaMembershipChangesForUserTestCase(HomeserverTestCase): + """ + Test `get_current_state_delta_membership_changes_for_user(...)` + """ + + servlets = [ + admin.register_servlets, + room.register_servlets, + login.register_servlets, + ] + + def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None: + self.store = hs.get_datastores().main + self.event_sources = hs.get_event_sources() + self.state_handler = self.hs.get_state_handler() + persistence = hs.get_storage_controllers().persistence + assert persistence is not None + self.persistence = persistence + + def test_returns_membership_events(self) -> None: + """ + A basic test that a membership event in the token range is returned for the user. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + + before_room1_token = self.event_sources.get_current_token() + + room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok) + join_response = self.helper.join(room_id1, user1_id, tok=user1_tok) + join_pos = self.get_success( + self.store.get_position_for_event(join_response["event_id"]) + ) + + after_room1_token = self.event_sources.get_current_token() + + membership_changes = self.get_success( + self.store.get_current_state_delta_membership_changes_for_user( + user1_id, + from_key=before_room1_token.room_key, + to_key=after_room1_token.room_key, + ) + ) + + # Let the whole diff show on failure + self.maxDiff = None + self.assertEqual( + membership_changes, + [ + CurrentStateDeltaMembership( + event_id=join_response["event_id"], + event_pos=join_pos, + prev_event_id=None, + room_id=room_id1, + membership="join", + sender=user1_id, + ) + ], + ) + + def test_server_left_after_us_room(self) -> None: + """ + Test that when probing over part of the DAG where the server left the room *after + us*, we still see the join and leave changes. + + This is to make sure we play nicely with this behavior: When the server leaves a + room, it will insert new rows with `event_id = null` into the + `current_state_delta_stream` table for all current state. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + + before_room1_token = self.event_sources.get_current_token() + + room_id1 = self.helper.create_room_as( + user2_id, + tok=user2_tok, + extra_content={ + "power_level_content_override": { + "users": { + user2_id: 100, + # Allow user1 to send state in the room + user1_id: 100, + } + } + }, + ) + join_response1 = self.helper.join(room_id1, user1_id, tok=user1_tok) + join_pos1 = self.get_success( + self.store.get_position_for_event(join_response1["event_id"]) + ) + # Make sure random other non-member state that happens to have a state_key + # matching the user ID doesn't mess with things. + self.helper.send_state( + room_id1, + event_type="foobarbazdummy", + state_key=user1_id, + body={"foo": "bar"}, + tok=user1_tok, + ) + # User1 should leave the room first + leave_response1 = self.helper.leave(room_id1, user1_id, tok=user1_tok) + leave_pos1 = self.get_success( + self.store.get_position_for_event(leave_response1["event_id"]) + ) + + # User2 should also leave the room (everyone has left the room which means the + # server is no longer in the room). + self.helper.leave(room_id1, user2_id, tok=user2_tok) + + after_room1_token = self.event_sources.get_current_token() + + membership_changes = self.get_success( + self.store.get_current_state_delta_membership_changes_for_user( + user1_id, + from_key=before_room1_token.room_key, + to_key=after_room1_token.room_key, + ) + ) + + # Let the whole diff show on failure + self.maxDiff = None + self.assertEqual( + membership_changes, + [ + CurrentStateDeltaMembership( + event_id=join_response1["event_id"], + event_pos=join_pos1, + prev_event_id=None, + room_id=room_id1, + membership="join", + sender=user1_id, + ), + CurrentStateDeltaMembership( + event_id=leave_response1["event_id"], + event_pos=leave_pos1, + prev_event_id=join_response1["event_id"], + room_id=room_id1, + membership="leave", + sender=user1_id, + ), + ], + ) + + def test_server_left_room(self) -> None: + """ + Test that when probing over part of the DAG where we leave the room causing the + server to leave the room (because we were the last local user in the room), we + still see the join and leave changes. + + This is to make sure we play nicely with this behavior: When the server leaves a + room, it will insert new rows with `event_id = null` into the + `current_state_delta_stream` table for all current state. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + + before_room1_token = self.event_sources.get_current_token() + + room_id1 = self.helper.create_room_as( + user2_id, + tok=user2_tok, + extra_content={ + "power_level_content_override": { + "users": { + user2_id: 100, + # Allow user1 to send state in the room + user1_id: 100, + } + } + }, + ) + join_response1 = self.helper.join(room_id1, user1_id, tok=user1_tok) + join_pos1 = self.get_success( + self.store.get_position_for_event(join_response1["event_id"]) + ) + # Make sure random other non-member state that happens to have a state_key + # matching the user ID doesn't mess with things. + self.helper.send_state( + room_id1, + event_type="foobarbazdummy", + state_key=user1_id, + body={"foo": "bar"}, + tok=user1_tok, + ) + + # User2 should leave the room first. + self.helper.leave(room_id1, user2_id, tok=user2_tok) + + # User1 (the person we're testing with) should also leave the room (everyone has + # left the room which means the server is no longer in the room). + leave_response1 = self.helper.leave(room_id1, user1_id, tok=user1_tok) + leave_pos1 = self.get_success( + self.store.get_position_for_event(leave_response1["event_id"]) + ) + + after_room1_token = self.event_sources.get_current_token() + + membership_changes = self.get_success( + self.store.get_current_state_delta_membership_changes_for_user( + user1_id, + from_key=before_room1_token.room_key, + to_key=after_room1_token.room_key, + ) + ) + + # Let the whole diff show on failure + self.maxDiff = None + self.assertEqual( + membership_changes, + [ + CurrentStateDeltaMembership( + event_id=join_response1["event_id"], + event_pos=join_pos1, + prev_event_id=None, + room_id=room_id1, + membership="join", + sender=user1_id, + ), + CurrentStateDeltaMembership( + event_id=leave_response1["event_id"], + event_pos=leave_pos1, + prev_event_id=join_response1["event_id"], + room_id=room_id1, + membership="leave", + sender=user1_id, + ), + ], + ) + + def test_membership_persisted_in_same_batch(self) -> None: + """ + Test batch of membership events being processed at once. This will result in all + of the memberships being stored in the `current_state_delta_stream` table with + the same `stream_ordering` even though the individual events have different + `stream_ordering`s. + """ + user1_id = self.register_user("user1", "pass") + _user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + user3_id = self.register_user("user3", "pass") + _user3_tok = self.login(user3_id, "pass") + user4_id = self.register_user("user4", "pass") + _user4_tok = self.login(user4_id, "pass") + + before_room1_token = self.event_sources.get_current_token() + + # User2 is just the designated person to create the room (we do this across the + # tests to be consistent) + room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok) + + # Persist the user1, user3, and user4 join events in the same batch so they all + # end up in the `current_state_delta_stream` table with the same + # stream_ordering. + join_event1 = make_event_from_dict( + { + "sender": user1_id, + "type": EventTypes.Member, + "state_key": user1_id, + "content": {"membership": "join"}, + "room_id": room_id1, + "depth": 0, + "origin_server_ts": 0, + "prev_events": [], + "auth_events": [], + }, + room_version=RoomVersions.V10, + ) + join_event_context1 = self.get_success( + self.state_handler.compute_event_context(join_event1) + ) + join_event3 = make_event_from_dict( + { + "sender": user3_id, + "type": EventTypes.Member, + "state_key": user3_id, + "content": {"membership": "join"}, + "room_id": room_id1, + "depth": 1, + "origin_server_ts": 1, + "prev_events": [], + "auth_events": [], + }, + room_version=RoomVersions.V10, + ) + join_event_context3 = self.get_success( + self.state_handler.compute_event_context(join_event3) + ) + join_event4 = make_event_from_dict( + { + "sender": user4_id, + "type": EventTypes.Member, + "state_key": user4_id, + "content": {"membership": "join"}, + "room_id": room_id1, + "depth": 2, + "origin_server_ts": 2, + "prev_events": [], + "auth_events": [], + }, + room_version=RoomVersions.V10, + ) + join_event_context4 = self.get_success( + self.state_handler.compute_event_context(join_event4) + ) + self.get_success( + self.persistence.persist_events( + [ + (join_event1, join_event_context1), + (join_event3, join_event_context3), + (join_event4, join_event_context4), + ] + ) + ) + + after_room1_token = self.event_sources.get_current_token() + + # Let's get membership changes from user3's perspective because it was in the + # middle of the batch. This way, if rows in` current_state_delta_stream` are + # stored with the first or last event's `stream_ordering`, we will still catch + # bugs. + membership_changes = self.get_success( + self.store.get_current_state_delta_membership_changes_for_user( + user3_id, + from_key=before_room1_token.room_key, + to_key=after_room1_token.room_key, + ) + ) + + join_pos3 = self.get_success( + self.store.get_position_for_event(join_event3.event_id) + ) + + # Let the whole diff show on failure + self.maxDiff = None + self.assertEqual( + membership_changes, + [ + CurrentStateDeltaMembership( + event_id=join_event3.event_id, + event_pos=join_pos3, + prev_event_id=None, + room_id=room_id1, + membership="join", + sender=user1_id, + ), + ], + ) + + # TODO: Test remote join where the first rows will just be the state when you joined + + # TODO: Test state reset where the user gets removed from the room (when there is no + # corresponding leave event) + + def test_excluded_room_ids(self) -> None: + """ + Test that the `excluded_room_ids` option excludes changes from the specified rooms. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + + before_room1_token = self.event_sources.get_current_token() + + room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok) + join_response1 = self.helper.join(room_id1, user1_id, tok=user1_tok) + join_pos1 = self.get_success( + self.store.get_position_for_event(join_response1["event_id"]) + ) + + room_id2 = self.helper.create_room_as(user2_id, tok=user2_tok) + join_response2 = self.helper.join(room_id2, user1_id, tok=user1_tok) + join_pos2 = self.get_success( + self.store.get_position_for_event(join_response2["event_id"]) + ) + + after_room1_token = self.event_sources.get_current_token() + + # First test the the room is returned without the `excluded_room_ids` option + membership_changes = self.get_success( + self.store.get_current_state_delta_membership_changes_for_user( + user1_id, + from_key=before_room1_token.room_key, + to_key=after_room1_token.room_key, + ) + ) + + # Let the whole diff show on failure + self.maxDiff = None + self.assertEqual( + membership_changes, + [ + CurrentStateDeltaMembership( + event_id=join_response1["event_id"], + event_pos=join_pos1, + prev_event_id=None, + room_id=room_id1, + membership="join", + sender=user1_id, + ), + CurrentStateDeltaMembership( + event_id=join_response2["event_id"], + event_pos=join_pos2, + prev_event_id=None, + room_id=room_id2, + membership="join", + sender=user1_id, + ), + ], + ) + + # The test that `excluded_room_ids` excludes room2 as expected + membership_changes = self.get_success( + self.store.get_current_state_delta_membership_changes_for_user( + user1_id, + from_key=before_room1_token.room_key, + to_key=after_room1_token.room_key, + excluded_room_ids=[room_id2], + ) + ) + + # Let the whole diff show on failure + self.maxDiff = None + self.assertEqual( + membership_changes, + [ + CurrentStateDeltaMembership( + event_id=join_response1["event_id"], + event_pos=join_pos1, + prev_event_id=None, + room_id=room_id1, + membership="join", + sender=user1_id, + ) + ], + ) + + +# class GetCurrentStateDeltaMembershipChangesForUserFederationTestCase(BaseMultiWorkerStreamTestCase): +# """ +# TODO +# """ + +# servlets = [ +# admin.register_servlets_for_client_rest_resource, +# room.register_servlets, +# login.register_servlets, +# ] + +# def default_config(self) -> dict: +# conf = super().default_config() +# conf["federation_custom_ca_list"] = [get_test_ca_cert_file()] +# return conf + +# def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None: +# self.sliding_sync_handler = self.hs.get_sliding_sync_handler() +# self.store = self.hs.get_datastores().main +# self.event_sources = hs.get_event_sources() + + +# def test_sharded_event_persisters(self) -> None: +# """ +# TODO +# """ +# user1_id = self.register_user("user1", "pass") +# user1_tok = self.login(user1_id, "pass") +# user2_id = self.register_user("user2", "pass") +# user2_tok = self.login(user2_id, "pass") + +# remote_hs = self.make_worker_hs("synapse.app.generic_worker") + +# channel = make_request( +# self.reactor, +# self._hs_to_site[hs], +# "GET", +# f"/_matrix/media/r0/download/{target}/{media_id}", +# shorthand=False, +# access_token=self.access_token, +# await_result=False, +# ) + +# remote_hs + +# worker_store2 = worker_hs2.get_datastores().main +# assert isinstance(worker_store2._stream_id_gen, MultiWriterIdGenerator) +# actx = worker_store2._stream_id_gen.get_next() + +# self.assertEqual( +# room_id_results.keys(), +# { +# room_id1, +# # room_id2 shouldn't show up because we left before the from/to range +# # and the join event during the range happened while worker2 was stuck. +# # This means that from the perspective of the master, where the +# # `stuck_activity_token` is generated, the stream position for worker2 +# # wasn't advanced to the join yet. Looking at the `instance_map`, the +# # join technically comes after `stuck_activity_token``. +# # +# # room_id2, +# room_id3, +# }, +# ) From ec2d8dc1e3c602dadb4fac289bcd38b211f6b34d Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 26 Jun 2024 18:52:24 -0500 Subject: [PATCH 43/62] Create events using helper --- tests/storage/test_stream.py | 76 +++++++++++++----------------------- 1 file changed, 28 insertions(+), 48 deletions(-) diff --git a/tests/storage/test_stream.py b/tests/storage/test_stream.py index 64f123987a..39cb5a25c5 100644 --- a/tests/storage/test_stream.py +++ b/tests/storage/test_stream.py @@ -37,6 +37,7 @@ from synapse.storage.databases.main.stream import CurrentStateDeltaMembership from synapse.types import JsonDict, PersistedEventPosition, RoomStreamToken from synapse.util import Clock +from tests.test_utils.event_injection import create_event from tests.unittest import HomeserverTestCase logger = logging.getLogger(__name__) @@ -809,56 +810,35 @@ class GetCurrentStateDeltaMembershipChangesForUserTestCase(HomeserverTestCase): # Persist the user1, user3, and user4 join events in the same batch so they all # end up in the `current_state_delta_stream` table with the same # stream_ordering. - join_event1 = make_event_from_dict( - { - "sender": user1_id, - "type": EventTypes.Member, - "state_key": user1_id, - "content": {"membership": "join"}, - "room_id": room_id1, - "depth": 0, - "origin_server_ts": 0, - "prev_events": [], - "auth_events": [], - }, - room_version=RoomVersions.V10, + join_event1, join_event_context1 = self.get_success( + create_event( + self.hs, + sender=user1_id, + type=EventTypes.Member, + state_key=user1_id, + content={"membership": "join"}, + room_id=room_id1, + ) ) - join_event_context1 = self.get_success( - self.state_handler.compute_event_context(join_event1) + join_event3, join_event_context3 = self.get_success( + create_event( + self.hs, + sender=user3_id, + type=EventTypes.Member, + state_key=user3_id, + content={"membership": "join"}, + room_id=room_id1, + ) ) - join_event3 = make_event_from_dict( - { - "sender": user3_id, - "type": EventTypes.Member, - "state_key": user3_id, - "content": {"membership": "join"}, - "room_id": room_id1, - "depth": 1, - "origin_server_ts": 1, - "prev_events": [], - "auth_events": [], - }, - room_version=RoomVersions.V10, - ) - join_event_context3 = self.get_success( - self.state_handler.compute_event_context(join_event3) - ) - join_event4 = make_event_from_dict( - { - "sender": user4_id, - "type": EventTypes.Member, - "state_key": user4_id, - "content": {"membership": "join"}, - "room_id": room_id1, - "depth": 2, - "origin_server_ts": 2, - "prev_events": [], - "auth_events": [], - }, - room_version=RoomVersions.V10, - ) - join_event_context4 = self.get_success( - self.state_handler.compute_event_context(join_event4) + join_event4, join_event_context4 = self.get_success( + create_event( + self.hs, + sender=user4_id, + type=EventTypes.Member, + state_key=user4_id, + content={"membership": "join"}, + room_id=room_id1, + ) ) self.get_success( self.persistence.persist_events( From 0b9a903ca12831e431b596daacf127e53ecbd050 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 26 Jun 2024 19:35:18 -0500 Subject: [PATCH 44/62] Add test that remotely joins room --- tests/storage/test_stream.py | 239 ++++++++++++++++++++++++++--------- 1 file changed, 178 insertions(+), 61 deletions(-) diff --git a/tests/storage/test_stream.py b/tests/storage/test_stream.py index 39cb5a25c5..3b825dbbbe 100644 --- a/tests/storage/test_stream.py +++ b/tests/storage/test_stream.py @@ -21,24 +21,32 @@ import logging from typing import List, Tuple +from unittest.mock import AsyncMock, patch from immutabledict import immutabledict from twisted.test.proto_helpers import MemoryReactor -from synapse.api.constants import Direction, EventTypes, RelationTypes +from synapse.api.constants import Direction, EventTypes, Membership, RelationTypes from synapse.api.filtering import Filter -from synapse.api.room_versions import RoomVersions -from synapse.events import make_event_from_dict +from synapse.crypto.event_signing import add_hashes_and_signatures +from synapse.events import FrozenEventV3 +from synapse.federation.federation_client import SendJoinResult from synapse.rest import admin from synapse.rest.client import login, room from synapse.server import HomeServer from synapse.storage.databases.main.stream import CurrentStateDeltaMembership -from synapse.types import JsonDict, PersistedEventPosition, RoomStreamToken +from synapse.types import ( + JsonDict, + PersistedEventPosition, + RoomStreamToken, + UserID, + create_requester, +) from synapse.util import Clock from tests.test_utils.event_injection import create_event -from tests.unittest import HomeserverTestCase +from tests.unittest import FederatingHomeserverTestCase, HomeserverTestCase logger = logging.getLogger(__name__) @@ -884,8 +892,6 @@ class GetCurrentStateDeltaMembershipChangesForUserTestCase(HomeserverTestCase): ], ) - # TODO: Test remote join where the first rows will just be the state when you joined - # TODO: Test state reset where the user gets removed from the room (when there is no # corresponding leave event) @@ -974,67 +980,178 @@ class GetCurrentStateDeltaMembershipChangesForUserTestCase(HomeserverTestCase): ) -# class GetCurrentStateDeltaMembershipChangesForUserFederationTestCase(BaseMultiWorkerStreamTestCase): -# """ -# TODO -# """ +class GetCurrentStateDeltaMembershipChangesForUserFederationTestCase( + FederatingHomeserverTestCase +): + """ + Test `get_current_state_delta_membership_changes_for_user(...)` when joining remote federated rooms. + """ -# servlets = [ -# admin.register_servlets_for_client_rest_resource, -# room.register_servlets, -# login.register_servlets, -# ] + servlets = [ + admin.register_servlets_for_client_rest_resource, + room.register_servlets, + login.register_servlets, + ] -# def default_config(self) -> dict: -# conf = super().default_config() -# conf["federation_custom_ca_list"] = [get_test_ca_cert_file()] -# return conf + def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None: + self.sliding_sync_handler = self.hs.get_sliding_sync_handler() + self.store = self.hs.get_datastores().main + self.event_sources = hs.get_event_sources() + self.room_member_handler = hs.get_room_member_handler() -# def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None: -# self.sliding_sync_handler = self.hs.get_sliding_sync_handler() -# self.store = self.hs.get_datastores().main -# self.event_sources = hs.get_event_sources() + def test_remote_join(self) -> None: + """ + Test remote join where the first rows will just be the state when you joined + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + intially_unjoined_room_id = f"!example:{self.OTHER_SERVER_NAME}" -# def test_sharded_event_persisters(self) -> None: -# """ -# TODO -# """ -# user1_id = self.register_user("user1", "pass") -# user1_tok = self.login(user1_id, "pass") -# user2_id = self.register_user("user2", "pass") -# user2_tok = self.login(user2_id, "pass") + # Remotely join a room on another homeserver. + # + # To do this we have to mock the responses from the remote homeserver. We also + # patch out a bunch of event checks on our end. + create_event_source = { + "auth_events": [], + "content": { + "creator": f"@creator:{self.OTHER_SERVER_NAME}", + "room_version": self.hs.config.server.default_room_version.identifier, + }, + "depth": 0, + "origin_server_ts": 0, + "prev_events": [], + "room_id": intially_unjoined_room_id, + "sender": f"@creator:{self.OTHER_SERVER_NAME}", + "state_key": "", + "type": EventTypes.Create, + } + self.add_hashes_and_signatures_from_other_server( + create_event_source, + self.hs.config.server.default_room_version, + ) + create_event = FrozenEventV3( + create_event_source, + self.hs.config.server.default_room_version, + {}, + None, + ) + creator_join_event_source = { + "auth_events": [create_event.event_id], + "content": { + "membership": "join", + }, + "depth": 1, + "origin_server_ts": 1, + "prev_events": [], + "room_id": intially_unjoined_room_id, + "sender": f"@creator:{self.OTHER_SERVER_NAME}", + "state_key": f"@creator:{self.OTHER_SERVER_NAME}", + "type": EventTypes.Member, + } + self.add_hashes_and_signatures_from_other_server( + creator_join_event_source, + self.hs.config.server.default_room_version, + ) + creator_join_event = FrozenEventV3( + creator_join_event_source, + self.hs.config.server.default_room_version, + {}, + None, + ) -# remote_hs = self.make_worker_hs("synapse.app.generic_worker") + # Our local user is going to remote join the room + join_event_source = { + "auth_events": [create_event.event_id], + "content": {"membership": "join"}, + "depth": 1, + "origin_server_ts": 100, + "prev_events": [creator_join_event.event_id], + "sender": user1_id, + "state_key": user1_id, + "room_id": intially_unjoined_room_id, + "type": EventTypes.Member, + } + add_hashes_and_signatures( + self.hs.config.server.default_room_version, + join_event_source, + self.hs.hostname, + self.hs.signing_key, + ) + join_event = FrozenEventV3( + join_event_source, + self.hs.config.server.default_room_version, + {}, + None, + ) -# channel = make_request( -# self.reactor, -# self._hs_to_site[hs], -# "GET", -# f"/_matrix/media/r0/download/{target}/{media_id}", -# shorthand=False, -# access_token=self.access_token, -# await_result=False, -# ) + mock_make_membership_event = AsyncMock( + return_value=( + self.OTHER_SERVER_NAME, + join_event, + self.hs.config.server.default_room_version, + ) + ) + mock_send_join = AsyncMock( + return_value=SendJoinResult( + join_event, + self.OTHER_SERVER_NAME, + state=[create_event, creator_join_event], + auth_chain=[create_event, creator_join_event], + partial_state=False, + servers_in_room=frozenset(), + ) + ) -# remote_hs + with patch.object( + self.room_member_handler.federation_handler.federation_client, + "make_membership_event", + mock_make_membership_event, + ), patch.object( + self.room_member_handler.federation_handler.federation_client, + "send_join", + mock_send_join, + ), patch( + "synapse.event_auth._is_membership_change_allowed", + return_value=None, + ), patch( + "synapse.handlers.federation_event.check_state_dependent_auth_rules", + return_value=None, + ): + self.get_success( + self.room_member_handler.update_membership( + requester=create_requester(user1_id), + target=UserID.from_string(user1_id), + room_id=intially_unjoined_room_id, + action=Membership.JOIN, + remote_room_hosts=[self.OTHER_SERVER_NAME], + ) + ) -# worker_store2 = worker_hs2.get_datastores().main -# assert isinstance(worker_store2._stream_id_gen, MultiWriterIdGenerator) -# actx = worker_store2._stream_id_gen.get_next() + events_db_dump = self.get_success( + self.store.db_pool.simple_select_list( + table="events", + keyvalues={}, + retcols=[ + "*", + ], + desc="debug dump events", + ) + ) -# self.assertEqual( -# room_id_results.keys(), -# { -# room_id1, -# # room_id2 shouldn't show up because we left before the from/to range -# # and the join event during the range happened while worker2 was stuck. -# # This means that from the perspective of the master, where the -# # `stuck_activity_token` is generated, the stream position for worker2 -# # wasn't advanced to the join yet. Looking at the `instance_map`, the -# # join technically comes after `stuck_activity_token``. -# # -# # room_id2, -# room_id3, -# }, -# ) + logger.info("events_db_dump: %s", events_db_dump) + + current_state_delta_stream_db_dump = self.get_success( + self.store.db_pool.simple_select_list( + table="current_state_delta_stream", + keyvalues={}, + retcols=[ + "*", + ], + desc="debug dump current_state_delta_stream", + ) + ) + + logger.info( + "current_state_delta_stream_db_dump: %s", current_state_delta_stream_db_dump + ) From 48d0acfbcda30f956d79cef873fa762f88530341 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 26 Jun 2024 19:52:12 -0500 Subject: [PATCH 45/62] Actually test `get_current_state_delta_membership_changes_for_user(...)` in remote join test --- tests/storage/test_stream.py | 59 ++++++++++++++++++++++-------------- 1 file changed, 36 insertions(+), 23 deletions(-) diff --git a/tests/storage/test_stream.py b/tests/storage/test_stream.py index 3b825dbbbe..dfca17db64 100644 --- a/tests/storage/test_stream.py +++ b/tests/storage/test_stream.py @@ -1001,10 +1001,13 @@ class GetCurrentStateDeltaMembershipChangesForUserFederationTestCase( def test_remote_join(self) -> None: """ - Test remote join where the first rows will just be the state when you joined + Test remote join where the first rows in `current_state_delta_stream` will just + be the state when you joined the remote room. """ user1_id = self.register_user("user1", "pass") - user1_tok = self.login(user1_id, "pass") + _user1_tok = self.login(user1_id, "pass") + + before_join_token = self.event_sources.get_current_token() intially_unjoined_room_id = f"!example:{self.OTHER_SERVER_NAME}" @@ -1128,30 +1131,40 @@ class GetCurrentStateDeltaMembershipChangesForUserFederationTestCase( ) ) - events_db_dump = self.get_success( - self.store.db_pool.simple_select_list( - table="events", - keyvalues={}, - retcols=[ - "*", - ], - desc="debug dump events", + after_join_token = self.event_sources.get_current_token() + + # Get the membership changes for the user at this point, the + # `current_state_delta_stream` table should look like: + # + # | stream_id | room_id | type | state_key | event_id | prev_event_id | instance_name | + # |-----------|------------------------------|-----------------|------------------------------|----------|---------------|----------------| + # | 2 | '!example:other.example.com' | 'm.room.member' | '@user1:test' | '$xxx' | None | 'master' | + # | 2 | '!example:other.example.com' | 'm.room.create' | '' | '$xxx' | None | 'master' | + # | 2 | '!example:other.example.com' | 'm.room.member' | '@creator:other.example.com' | '$xxx' | None | 'master' | + membership_changes = self.get_success( + self.store.get_current_state_delta_membership_changes_for_user( + user1_id, + from_key=before_join_token.room_key, + to_key=after_join_token.room_key, ) ) - logger.info("events_db_dump: %s", events_db_dump) - - current_state_delta_stream_db_dump = self.get_success( - self.store.db_pool.simple_select_list( - table="current_state_delta_stream", - keyvalues={}, - retcols=[ - "*", - ], - desc="debug dump current_state_delta_stream", - ) + join_pos = self.get_success( + self.store.get_position_for_event(join_event.event_id) ) - logger.info( - "current_state_delta_stream_db_dump: %s", current_state_delta_stream_db_dump + # Let the whole diff show on failure + self.maxDiff = None + self.assertEqual( + membership_changes, + [ + CurrentStateDeltaMembership( + event_id=join_event.event_id, + event_pos=join_pos, + prev_event_id=None, + room_id=intially_unjoined_room_id, + membership="join", + sender=user1_id, + ), + ], ) From 2a944ffcef16744ade6b0172fcb98c7eeb281766 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 26 Jun 2024 20:20:17 -0500 Subject: [PATCH 46/62] Add state of the db in each situation --- tests/storage/test_stream.py | 68 ++++++++++++++++++++++++++++++++++-- 1 file changed, 66 insertions(+), 2 deletions(-) diff --git a/tests/storage/test_stream.py b/tests/storage/test_stream.py index dfca17db64..2ac88f18ea 100644 --- a/tests/storage/test_stream.py +++ b/tests/storage/test_stream.py @@ -673,6 +673,29 @@ class GetCurrentStateDeltaMembershipChangesForUserTestCase(HomeserverTestCase): after_room1_token = self.event_sources.get_current_token() + # Get the membership changes for the user. + # + # At this point, the `current_state_delta_stream` table should look like the + # following. When the server leaves a room, it will insert new rows with + # `event_id = null` for all current state. + # + # | stream_id | room_id | type | state_key | event_id | prev_event_id | instance_name | + # |-----------|----------|-----------------------------|----------------|----------|---------------|---------------| + # | 2 | !x:test | 'm.room.create' | '' | $xxx | None | 'master' | + # | 3 | !x:test | 'm.room.member' | '@user2:test' | $aaa | None | 'master' | + # | 4 | !x:test | 'm.room.history_visibility' | '' | $xxx | None | 'master' | + # | 4 | !x:test | 'm.room.join_rules' | '' | $xxx | None | 'master' | + # | 4 | !x:test | 'm.room.power_levels' | '' | $xxx | None | 'master' | + # | 7 | !x:test | 'm.room.member' | '@user1:test' | $ooo | None | 'master' | + # | 8 | !x:test | 'foobarbazdummy' | '@user1:test' | $xxx | None | 'master' | + # | 9 | !x:test | 'm.room.member' | '@user1:test' | $ppp | $ooo | 'master' | + # | 10 | !x:test | 'foobarbazdummy' | '@user1:test' | None | $xxx | 'master' | + # | 10 | !x:test | 'm.room.create' | '' | None | $xxx | 'master' | + # | 10 | !x:test | 'm.room.history_visibility' | '' | None | $xxx | 'master' | + # | 10 | !x:test | 'm.room.join_rules' | '' | None | $xxx | 'master' | + # | 10 | !x:test | 'm.room.member' | '@user1:test' | None | $ppp | 'master' | + # | 10 | !x:test | 'm.room.member' | '@user2:test' | None | $aaa | 'master' | + # | 10 | !x:test | 'm.room.power_levels' | | None | $xxx | 'master' | membership_changes = self.get_success( self.store.get_current_state_delta_membership_changes_for_user( user1_id, @@ -761,6 +784,29 @@ class GetCurrentStateDeltaMembershipChangesForUserTestCase(HomeserverTestCase): after_room1_token = self.event_sources.get_current_token() + # Get the membership changes for the user. + # + # At this point, the `current_state_delta_stream` table should look like the + # following. When the server leaves a room, it will insert new rows with + # `event_id = null` for all current state. + # + # | stream_id | room_id | type | state_key | event_id | prev_event_id | instance_name | + # |-----------|-----------|-----------------------------|---------------|----------|---------------|---------------| + # | 2 | '!x:test' | 'm.room.create' | '' | '$xxx' | None | 'master' | + # | 3 | '!x:test' | 'm.room.member' | '@user2:test' | '$aaa' | None | 'master' | + # | 4 | '!x:test' | 'm.room.history_visibility' | '' | '$xxx' | None | 'master' | + # | 4 | '!x:test' | 'm.room.join_rules' | '' | '$xxx' | None | 'master' | + # | 4 | '!x:test' | 'm.room.power_levels' | '' | '$xxx' | None | 'master' | + # | 7 | '!x:test' | 'm.room.member' | '@user1:test' | '$ooo' | None | 'master' | + # | 8 | '!x:test' | 'foobarbazdummy' | '@user1:test' | '$xxx' | None | 'master' | + # | 9 | '!x:test' | 'm.room.member' | '@user2:test' | '$bbb' | '$aaa' | 'master' | + # | 10 | '!x:test' | 'foobarbazdummy' | '@user1:test' | None | '$xxx' | 'master' | + # | 10 | '!x:test' | 'm.room.create' | '' | None | '$xxx' | 'master' | + # | 10 | '!x:test' | 'm.room.history_visibility' | '' | None | '$xxx' | 'master' | + # | 10 | '!x:test' | 'm.room.join_rules' | '' | None | '$xxx' | 'master' | + # | 10 | '!x:test' | 'm.room.member' | '@user1:test' | None | '$ooo' | 'master' | + # | 10 | '!x:test' | 'm.room.member' | '@user2:test' | None | '$bbb' | 'master' | + # | 10 | '!x:test' | 'm.room.power_levels' | '' | None | '$xxx' | 'master' | membership_changes = self.get_success( self.store.get_current_state_delta_membership_changes_for_user( user1_id, @@ -864,6 +910,21 @@ class GetCurrentStateDeltaMembershipChangesForUserTestCase(HomeserverTestCase): # middle of the batch. This way, if rows in` current_state_delta_stream` are # stored with the first or last event's `stream_ordering`, we will still catch # bugs. + # + # At this point, the `current_state_delta_stream` table should look like (notice + # those three memberships at the end with `stream_id=7` because we persisted + # them in the same batch): + # + # | stream_id | room_id | type | state_key | event_id | prev_event_id | instance_name | + # |-----------|-----------|----------------------------|------------------|----------|---------------|---------------| + # | 2 | '!x:test' | 'm.room.create' | '' | '$xxx' | None | 'master' | + # | 3 | '!x:test' | 'm.room.member' | '@user2:test' | '$xxx' | None | 'master' | + # | 4 | '!x:test' | 'm.room.history_visibility'| '' | '$xxx' | None | 'master' | + # | 4 | '!x:test' | 'm.room.join_rules' | '' | '$xxx' | None | 'master' | + # | 4 | '!x:test' | 'm.room.power_levels' | '' | '$xxx' | None | 'master' | + # | 7 | '!x:test' | 'm.room.member' | '@user3:test' | '$xxx' | None | 'master' | + # | 7 | '!x:test' | 'm.room.member' | '@user1:test' | '$xxx' | None | 'master' | + # | 7 | '!x:test' | 'm.room.member' | '@user4:test' | '$xxx' | None | 'master' | membership_changes = self.get_success( self.store.get_current_state_delta_membership_changes_for_user( user3_id, @@ -1133,8 +1194,11 @@ class GetCurrentStateDeltaMembershipChangesForUserFederationTestCase( after_join_token = self.event_sources.get_current_token() - # Get the membership changes for the user at this point, the - # `current_state_delta_stream` table should look like: + # Get the membership changes for the user. + # + # At this point, the `current_state_delta_stream` table should look like the + # following. Notice that all of the events are at the same `stream_id` because + # the current state starts out where we remotely joined: # # | stream_id | room_id | type | state_key | event_id | prev_event_id | instance_name | # |-----------|------------------------------|-----------------|------------------------------|----------|---------------|----------------| From 8df39d1baff8cac6aa446c8b71b3a64a8bf29a1e Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 26 Jun 2024 20:22:03 -0500 Subject: [PATCH 47/62] Remove redundant `instance_name` column --- tests/storage/test_stream.py | 98 ++++++++++++++++++------------------ 1 file changed, 49 insertions(+), 49 deletions(-) diff --git a/tests/storage/test_stream.py b/tests/storage/test_stream.py index 2ac88f18ea..840f980344 100644 --- a/tests/storage/test_stream.py +++ b/tests/storage/test_stream.py @@ -679,23 +679,23 @@ class GetCurrentStateDeltaMembershipChangesForUserTestCase(HomeserverTestCase): # following. When the server leaves a room, it will insert new rows with # `event_id = null` for all current state. # - # | stream_id | room_id | type | state_key | event_id | prev_event_id | instance_name | - # |-----------|----------|-----------------------------|----------------|----------|---------------|---------------| - # | 2 | !x:test | 'm.room.create' | '' | $xxx | None | 'master' | - # | 3 | !x:test | 'm.room.member' | '@user2:test' | $aaa | None | 'master' | - # | 4 | !x:test | 'm.room.history_visibility' | '' | $xxx | None | 'master' | - # | 4 | !x:test | 'm.room.join_rules' | '' | $xxx | None | 'master' | - # | 4 | !x:test | 'm.room.power_levels' | '' | $xxx | None | 'master' | - # | 7 | !x:test | 'm.room.member' | '@user1:test' | $ooo | None | 'master' | - # | 8 | !x:test | 'foobarbazdummy' | '@user1:test' | $xxx | None | 'master' | - # | 9 | !x:test | 'm.room.member' | '@user1:test' | $ppp | $ooo | 'master' | - # | 10 | !x:test | 'foobarbazdummy' | '@user1:test' | None | $xxx | 'master' | - # | 10 | !x:test | 'm.room.create' | '' | None | $xxx | 'master' | - # | 10 | !x:test | 'm.room.history_visibility' | '' | None | $xxx | 'master' | - # | 10 | !x:test | 'm.room.join_rules' | '' | None | $xxx | 'master' | - # | 10 | !x:test | 'm.room.member' | '@user1:test' | None | $ppp | 'master' | - # | 10 | !x:test | 'm.room.member' | '@user2:test' | None | $aaa | 'master' | - # | 10 | !x:test | 'm.room.power_levels' | | None | $xxx | 'master' | + # | stream_id | room_id | type | state_key | event_id | prev_event_id | + # |-----------|----------|-----------------------------|----------------|----------|---------------| + # | 2 | !x:test | 'm.room.create' | '' | $xxx | None | + # | 3 | !x:test | 'm.room.member' | '@user2:test' | $aaa | None | + # | 4 | !x:test | 'm.room.history_visibility' | '' | $xxx | None | + # | 4 | !x:test | 'm.room.join_rules' | '' | $xxx | None | + # | 4 | !x:test | 'm.room.power_levels' | '' | $xxx | None | + # | 7 | !x:test | 'm.room.member' | '@user1:test' | $ooo | None | + # | 8 | !x:test | 'foobarbazdummy' | '@user1:test' | $xxx | None | + # | 9 | !x:test | 'm.room.member' | '@user1:test' | $ppp | $ooo | + # | 10 | !x:test | 'foobarbazdummy' | '@user1:test' | None | $xxx | + # | 10 | !x:test | 'm.room.create' | '' | None | $xxx | + # | 10 | !x:test | 'm.room.history_visibility' | '' | None | $xxx | + # | 10 | !x:test | 'm.room.join_rules' | '' | None | $xxx | + # | 10 | !x:test | 'm.room.member' | '@user1:test' | None | $ppp | + # | 10 | !x:test | 'm.room.member' | '@user2:test' | None | $aaa | + # | 10 | !x:test | 'm.room.power_levels' | | None | $xxx | membership_changes = self.get_success( self.store.get_current_state_delta_membership_changes_for_user( user1_id, @@ -790,23 +790,23 @@ class GetCurrentStateDeltaMembershipChangesForUserTestCase(HomeserverTestCase): # following. When the server leaves a room, it will insert new rows with # `event_id = null` for all current state. # - # | stream_id | room_id | type | state_key | event_id | prev_event_id | instance_name | - # |-----------|-----------|-----------------------------|---------------|----------|---------------|---------------| - # | 2 | '!x:test' | 'm.room.create' | '' | '$xxx' | None | 'master' | - # | 3 | '!x:test' | 'm.room.member' | '@user2:test' | '$aaa' | None | 'master' | - # | 4 | '!x:test' | 'm.room.history_visibility' | '' | '$xxx' | None | 'master' | - # | 4 | '!x:test' | 'm.room.join_rules' | '' | '$xxx' | None | 'master' | - # | 4 | '!x:test' | 'm.room.power_levels' | '' | '$xxx' | None | 'master' | - # | 7 | '!x:test' | 'm.room.member' | '@user1:test' | '$ooo' | None | 'master' | - # | 8 | '!x:test' | 'foobarbazdummy' | '@user1:test' | '$xxx' | None | 'master' | - # | 9 | '!x:test' | 'm.room.member' | '@user2:test' | '$bbb' | '$aaa' | 'master' | - # | 10 | '!x:test' | 'foobarbazdummy' | '@user1:test' | None | '$xxx' | 'master' | - # | 10 | '!x:test' | 'm.room.create' | '' | None | '$xxx' | 'master' | - # | 10 | '!x:test' | 'm.room.history_visibility' | '' | None | '$xxx' | 'master' | - # | 10 | '!x:test' | 'm.room.join_rules' | '' | None | '$xxx' | 'master' | - # | 10 | '!x:test' | 'm.room.member' | '@user1:test' | None | '$ooo' | 'master' | - # | 10 | '!x:test' | 'm.room.member' | '@user2:test' | None | '$bbb' | 'master' | - # | 10 | '!x:test' | 'm.room.power_levels' | '' | None | '$xxx' | 'master' | + # | stream_id | room_id | type | state_key | event_id | prev_event_id | + # |-----------|-----------|-----------------------------|---------------|----------|---------------| + # | 2 | '!x:test' | 'm.room.create' | '' | '$xxx' | None | + # | 3 | '!x:test' | 'm.room.member' | '@user2:test' | '$aaa' | None | + # | 4 | '!x:test' | 'm.room.history_visibility' | '' | '$xxx' | None | + # | 4 | '!x:test' | 'm.room.join_rules' | '' | '$xxx' | None | + # | 4 | '!x:test' | 'm.room.power_levels' | '' | '$xxx' | None | + # | 7 | '!x:test' | 'm.room.member' | '@user1:test' | '$ooo' | None | + # | 8 | '!x:test' | 'foobarbazdummy' | '@user1:test' | '$xxx' | None | + # | 9 | '!x:test' | 'm.room.member' | '@user2:test' | '$bbb' | '$aaa' | + # | 10 | '!x:test' | 'foobarbazdummy' | '@user1:test' | None | '$xxx' | + # | 10 | '!x:test' | 'm.room.create' | '' | None | '$xxx' | + # | 10 | '!x:test' | 'm.room.history_visibility' | '' | None | '$xxx' | + # | 10 | '!x:test' | 'm.room.join_rules' | '' | None | '$xxx' | + # | 10 | '!x:test' | 'm.room.member' | '@user1:test' | None | '$ooo' | + # | 10 | '!x:test' | 'm.room.member' | '@user2:test' | None | '$bbb' | + # | 10 | '!x:test' | 'm.room.power_levels' | '' | None | '$xxx' | membership_changes = self.get_success( self.store.get_current_state_delta_membership_changes_for_user( user1_id, @@ -915,16 +915,16 @@ class GetCurrentStateDeltaMembershipChangesForUserTestCase(HomeserverTestCase): # those three memberships at the end with `stream_id=7` because we persisted # them in the same batch): # - # | stream_id | room_id | type | state_key | event_id | prev_event_id | instance_name | - # |-----------|-----------|----------------------------|------------------|----------|---------------|---------------| - # | 2 | '!x:test' | 'm.room.create' | '' | '$xxx' | None | 'master' | - # | 3 | '!x:test' | 'm.room.member' | '@user2:test' | '$xxx' | None | 'master' | - # | 4 | '!x:test' | 'm.room.history_visibility'| '' | '$xxx' | None | 'master' | - # | 4 | '!x:test' | 'm.room.join_rules' | '' | '$xxx' | None | 'master' | - # | 4 | '!x:test' | 'm.room.power_levels' | '' | '$xxx' | None | 'master' | - # | 7 | '!x:test' | 'm.room.member' | '@user3:test' | '$xxx' | None | 'master' | - # | 7 | '!x:test' | 'm.room.member' | '@user1:test' | '$xxx' | None | 'master' | - # | 7 | '!x:test' | 'm.room.member' | '@user4:test' | '$xxx' | None | 'master' | + # | stream_id | room_id | type | state_key | event_id | prev_event_id | + # |-----------|-----------|----------------------------|------------------|----------|---------------| + # | 2 | '!x:test' | 'm.room.create' | '' | '$xxx' | None | + # | 3 | '!x:test' | 'm.room.member' | '@user2:test' | '$xxx' | None | + # | 4 | '!x:test' | 'm.room.history_visibility'| '' | '$xxx' | None | + # | 4 | '!x:test' | 'm.room.join_rules' | '' | '$xxx' | None | + # | 4 | '!x:test' | 'm.room.power_levels' | '' | '$xxx' | None | + # | 7 | '!x:test' | 'm.room.member' | '@user3:test' | '$xxx' | None | + # | 7 | '!x:test' | 'm.room.member' | '@user1:test' | '$xxx' | None | + # | 7 | '!x:test' | 'm.room.member' | '@user4:test' | '$xxx' | None | membership_changes = self.get_success( self.store.get_current_state_delta_membership_changes_for_user( user3_id, @@ -1200,11 +1200,11 @@ class GetCurrentStateDeltaMembershipChangesForUserFederationTestCase( # following. Notice that all of the events are at the same `stream_id` because # the current state starts out where we remotely joined: # - # | stream_id | room_id | type | state_key | event_id | prev_event_id | instance_name | - # |-----------|------------------------------|-----------------|------------------------------|----------|---------------|----------------| - # | 2 | '!example:other.example.com' | 'm.room.member' | '@user1:test' | '$xxx' | None | 'master' | - # | 2 | '!example:other.example.com' | 'm.room.create' | '' | '$xxx' | None | 'master' | - # | 2 | '!example:other.example.com' | 'm.room.member' | '@creator:other.example.com' | '$xxx' | None | 'master' | + # | stream_id | room_id | type | state_key | event_id | prev_event_id | + # |-----------|------------------------------|-----------------|------------------------------|----------|---------------| + # | 2 | '!example:other.example.com' | 'm.room.member' | '@user1:test' | '$xxx' | None | + # | 2 | '!example:other.example.com' | 'm.room.create' | '' | '$xxx' | None | + # | 2 | '!example:other.example.com' | 'm.room.member' | '@creator:other.example.com' | '$xxx' | None | membership_changes = self.get_success( self.store.get_current_state_delta_membership_changes_for_user( user1_id, From b7914e76769ea330cdfa99e18fd7695f8301b02b Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 26 Jun 2024 21:01:54 -0500 Subject: [PATCH 48/62] Add skipped test for state resets --- tests/storage/test_stream.py | 92 ++++++++++++++++++++++++++++++++++-- 1 file changed, 88 insertions(+), 4 deletions(-) diff --git a/tests/storage/test_stream.py b/tests/storage/test_stream.py index 840f980344..04a0e24154 100644 --- a/tests/storage/test_stream.py +++ b/tests/storage/test_stream.py @@ -46,7 +46,7 @@ from synapse.types import ( from synapse.util import Clock from tests.test_utils.event_injection import create_event -from tests.unittest import FederatingHomeserverTestCase, HomeserverTestCase +from tests.unittest import FederatingHomeserverTestCase, HomeserverTestCase, skip_unless logger = logging.getLogger(__name__) @@ -839,6 +839,7 @@ class GetCurrentStateDeltaMembershipChangesForUserTestCase(HomeserverTestCase): ], ) + @skip_unless(False, "We don't support this yet") def test_membership_persisted_in_same_batch(self) -> None: """ Test batch of membership events being processed at once. This will result in all @@ -948,13 +949,96 @@ class GetCurrentStateDeltaMembershipChangesForUserTestCase(HomeserverTestCase): prev_event_id=None, room_id=room_id1, membership="join", - sender=user1_id, + sender=user3_id, ), ], ) - # TODO: Test state reset where the user gets removed from the room (when there is no - # corresponding leave event) + @skip_unless(False, "We don't support this yet") + def test_state_reset(self) -> None: + """ + Test a state reset scenario where the user gets removed from the room (when + there is no corresponding leave event) + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + + room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok) + join_response1 = self.helper.join(room_id1, user1_id, tok=user1_tok) + join_pos1 = self.get_success( + self.store.get_position_for_event(join_response1["event_id"]) + ) + + before_reset_token = self.event_sources.get_current_token() + + # Send another state event which we will cause the reset at + dummy_state_response = self.helper.send_state( + room_id1, + event_type="foobarbaz", + state_key="", + body={"foo": "bar"}, + tok=user2_tok, + ) + dummy_state_pos = self.get_success( + self.store.get_position_for_event(dummy_state_response["event_id"]) + ) + + # Mock a state reset removing the membership for user1 in the current state + self.get_success( + self.store.db_pool.simple_delete( + table="current_state_events", + keyvalues={ + "room_id": room_id1, + "type": EventTypes.Member, + "state_key": user1_id, + }, + desc="state reset user in current_state_delta_stream", + ) + ) + self.get_success( + self.store.db_pool.simple_insert( + table="current_state_delta_stream", + values={ + "stream_id": dummy_state_pos.stream, + "room_id": room_id1, + "type": EventTypes.Member, + "state_key": user1_id, + "event_id": None, + # FIXME: I'm not sure if a state reset should have a prev_event_id + "prev_event_id": None, + "instance_name": dummy_state_pos.instance_name, + }, + desc="state reset user in current_state_delta_stream", + ) + ) + + after_reset_token = self.event_sources.get_current_token() + + membership_changes = self.get_success( + self.store.get_current_state_delta_membership_changes_for_user( + user1_id, + from_key=before_reset_token.room_key, + to_key=after_reset_token.room_key, + ) + ) + + # Let the whole diff show on failure + self.maxDiff = None + self.assertEqual( + membership_changes, + [ + CurrentStateDeltaMembership( + event_id=TODO, + event_pos=TODO, + prev_event_id=None, + room_id=room_id1, + membership="leave", + sender=user1_id, + ), + ], + ) def test_excluded_room_ids(self) -> None: """ From 7eb1806ee3279f6581996b029f80251f8aaf3d69 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 26 Jun 2024 21:06:05 -0500 Subject: [PATCH 49/62] Fix lints --- tests/storage/test_stream.py | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/tests/storage/test_stream.py b/tests/storage/test_stream.py index 04a0e24154..5b30d7106f 100644 --- a/tests/storage/test_stream.py +++ b/tests/storage/test_stream.py @@ -966,10 +966,7 @@ class GetCurrentStateDeltaMembershipChangesForUserTestCase(HomeserverTestCase): user2_tok = self.login(user2_id, "pass") room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok) - join_response1 = self.helper.join(room_id1, user1_id, tok=user1_tok) - join_pos1 = self.get_success( - self.store.get_position_for_event(join_response1["event_id"]) - ) + self.helper.join(room_id1, user1_id, tok=user1_tok) before_reset_token = self.event_sources.get_current_token() @@ -1028,16 +1025,19 @@ class GetCurrentStateDeltaMembershipChangesForUserTestCase(HomeserverTestCase): self.maxDiff = None self.assertEqual( membership_changes, - [ - CurrentStateDeltaMembership( - event_id=TODO, - event_pos=TODO, - prev_event_id=None, - room_id=room_id1, - membership="leave", - sender=user1_id, - ), - ], + # TODO: Uncomment the expected membership. We just have a `False` value + # here so the test expectation fails and you look here. + False, + # [ + # CurrentStateDeltaMembership( + # event_id=TODO, + # event_pos=TODO, + # prev_event_id=None, + # room_id=room_id1, + # membership="leave", + # sender=user1_id, + # ), + # ], ) def test_excluded_room_ids(self) -> None: From 935b98c474f030f92bdd28cd69fcf20f3d6045fd Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 27 Jun 2024 00:48:17 -0500 Subject: [PATCH 50/62] All `get_current_state_delta_membership_changes_for_user(...)` tests passing --- synapse/storage/databases/main/stream.py | 80 ++++++++++++++++-------- tests/storage/test_stream.py | 39 ++++++------ 2 files changed, 75 insertions(+), 44 deletions(-) diff --git a/synapse/storage/databases/main/stream.py b/synapse/storage/databases/main/stream.py index e222f36bab..9ae1fe6c15 100644 --- a/synapse/storage/databases/main/stream.py +++ b/synapse/storage/databases/main/stream.py @@ -63,7 +63,7 @@ from typing_extensions import Literal from twisted.internet import defer -from synapse.api.constants import Direction, EventTypes +from synapse.api.constants import Direction, EventTypes, Membership from synapse.api.filtering import Filter from synapse.events import EventBase from synapse.logging.context import make_deferred_yieldable, run_in_background @@ -125,12 +125,12 @@ class CurrentStateDeltaMembership: sender: The person who sent the membership event """ - event_id: str + event_id: Optional[str] event_pos: PersistedEventPosition prev_event_id: Optional[str] room_id: str membership: str - sender: str + sender: Optional[str] def generate_pagination_where_clause( @@ -819,22 +819,32 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): # longer in the room or a state reset happened and it was unset. # `stream_ordering` is unique across the Synapse instance so this should # work fine. + # + # We `COALESCE` the `instance_name` and `stream_ordering` because we prefer + # the source of truth from the events table. This gives slightly more + # accurate results when available since `current_state_delta_stream` only + # tracks that the current state is at this stream position (not what stream + # position the state event was added) and batches events at the same + # `stream_id` in certain cases. + # + # TODO: We need to add indexes for `current_state_delta_stream.event_id` and + # `current_state_delta_stream.state_key`/`current_state_delta_stream.type` + # for this to be efficient. sql = """ SELECT e.event_id, s.prev_event_id, s.room_id, - e.instance_name, - e.stream_ordering, + COALESCE(e.instance_name, s.instance_name), + COALESCE(e.stream_ordering, s.stream_id), e.topological_ordering, m.membership, e.sender FROM current_state_delta_stream AS s - INNER JOIN events AS e ON e.stream_ordering = s.stream_id - INNER JOIN room_memberships AS m ON m.event_stream_ordering = s.stream_id + LEFT JOIN events AS e ON e.event_id = s.event_id + LEFT JOIN room_memberships AS m ON m.event_id = s.event_id WHERE s.stream_id > ? AND s.stream_id <= ? - AND m.user_id = ? - AND s.state_key = m.user_id + AND s.state_key = ? AND s.type = ? ORDER BY s.stream_id ASC """ @@ -842,6 +852,7 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): txn.execute(sql, args) membership_changes: List[CurrentStateDeltaMembership] = [] + membership_change_map: Dict[str, CurrentStateDeltaMembership] = {} for ( event_id, prev_event_id, @@ -852,36 +863,55 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): membership, sender, ) in txn: - assert event_id is not None - # `prev_event_id` can be `None` assert room_id is not None assert instance_name is not None assert stream_ordering is not None - assert topological_ordering is not None - assert membership is not None - assert sender is not None if _filter_results( from_key, to_key, instance_name, + # TODO: This isn't always filled now topological_ordering, stream_ordering, ): - membership_changes.append( - CurrentStateDeltaMembership( - event_id=event_id, - event_pos=PersistedEventPosition( - instance_name=instance_name, - stream=stream_ordering, - ), - prev_event_id=prev_event_id, - room_id=room_id, - membership=membership, - sender=sender, + # When the server leaves a room, it will insert new rows with + # `event_id = null` for all current state. This means we might + # already have a row for the leave event and then another for the + # same leave where the `event_id=null` but the `prev_event_id` is + # pointing back at the earlier leave event. Since we're assuming the + # `event_id = null` row is a `leave` and we don't want duplicate + # membership changes in our results, let's get rid of those + # (deduplicate) (see `test_server_left_after_us_room`). + if event_id is None: + already_tracked_membership_change = membership_change_map.get( + prev_event_id ) + if ( + already_tracked_membership_change is not None + and already_tracked_membership_change.membership + == Membership.LEAVE + ): + continue + + membership_change = CurrentStateDeltaMembership( + event_id=event_id, + event_pos=PersistedEventPosition( + instance_name=instance_name, + stream=stream_ordering, + ), + prev_event_id=prev_event_id, + room_id=room_id, + membership=( + membership if membership is not None else Membership.LEAVE + ), + sender=sender, ) + membership_changes.append(membership_change) + if event_id: + membership_change_map[event_id] = membership_change + return membership_changes membership_changes = await self.db_pool.runInteraction( diff --git a/tests/storage/test_stream.py b/tests/storage/test_stream.py index 5b30d7106f..ffa763bff2 100644 --- a/tests/storage/test_stream.py +++ b/tests/storage/test_stream.py @@ -46,7 +46,7 @@ from synapse.types import ( from synapse.util import Clock from tests.test_utils.event_injection import create_event -from tests.unittest import FederatingHomeserverTestCase, HomeserverTestCase, skip_unless +from tests.unittest import FederatingHomeserverTestCase, HomeserverTestCase logger = logging.getLogger(__name__) @@ -829,17 +829,16 @@ class GetCurrentStateDeltaMembershipChangesForUserTestCase(HomeserverTestCase): sender=user1_id, ), CurrentStateDeltaMembership( - event_id=leave_response1["event_id"], + event_id=None, # leave_response1["event_id"], event_pos=leave_pos1, prev_event_id=join_response1["event_id"], room_id=room_id1, membership="leave", - sender=user1_id, + sender=None, # user1_id, ), ], ) - @skip_unless(False, "We don't support this yet") def test_membership_persisted_in_same_batch(self) -> None: """ Test batch of membership events being processed at once. This will result in all @@ -954,7 +953,6 @@ class GetCurrentStateDeltaMembershipChangesForUserTestCase(HomeserverTestCase): ], ) - @skip_unless(False, "We don't support this yet") def test_state_reset(self) -> None: """ Test a state reset scenario where the user gets removed from the room (when @@ -970,7 +968,7 @@ class GetCurrentStateDeltaMembershipChangesForUserTestCase(HomeserverTestCase): before_reset_token = self.event_sources.get_current_token() - # Send another state event which we will cause the reset at + # Send another state event to make a position for the state reset to happen at dummy_state_response = self.helper.send_state( room_id1, event_type="foobarbaz", @@ -1011,6 +1009,12 @@ class GetCurrentStateDeltaMembershipChangesForUserTestCase(HomeserverTestCase): ) ) + # Manually bust the cache since we we're just manually messing with the database + # and not causing an actual state reset. + self.store._membership_stream_cache.entity_has_changed( + user1_id, dummy_state_pos.stream + ) + after_reset_token = self.event_sources.get_current_token() membership_changes = self.get_success( @@ -1025,19 +1029,16 @@ class GetCurrentStateDeltaMembershipChangesForUserTestCase(HomeserverTestCase): self.maxDiff = None self.assertEqual( membership_changes, - # TODO: Uncomment the expected membership. We just have a `False` value - # here so the test expectation fails and you look here. - False, - # [ - # CurrentStateDeltaMembership( - # event_id=TODO, - # event_pos=TODO, - # prev_event_id=None, - # room_id=room_id1, - # membership="leave", - # sender=user1_id, - # ), - # ], + [ + CurrentStateDeltaMembership( + event_id=None, + event_pos=dummy_state_pos, + prev_event_id=None, + room_id=room_id1, + membership="leave", + sender=None, # user1_id, + ), + ], ) def test_excluded_room_ids(self) -> None: From f163fcf08a435ea96de334b1f88bd99a0ccbcc25 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 27 Jun 2024 01:20:42 -0500 Subject: [PATCH 51/62] Remove need for topological_ordering --- synapse/storage/databases/main/stream.py | 45 +++++++++++++++++++++--- 1 file changed, 40 insertions(+), 5 deletions(-) diff --git a/synapse/storage/databases/main/stream.py b/synapse/storage/databases/main/stream.py index 9ae1fe6c15..9e94cb08f6 100644 --- a/synapse/storage/databases/main/stream.py +++ b/synapse/storage/databases/main/stream.py @@ -412,6 +412,43 @@ def _filter_results( return True +def _filter_results_by_stream( + lower_token: Optional[RoomStreamToken], + upper_token: Optional[RoomStreamToken], + instance_name: str, + stream_ordering: int, +) -> bool: + """ + This function only works with "live" tokens with `stream_ordering` only. See + `_filter_results(...)` if you want to work with all tokens. + + Returns True if the event persisted by the given instance at the given + stream_ordering falls between the two tokens (taking a None + token to mean unbounded). + + Used to filter results from fetching events in the DB against the given + tokens. This is necessary to handle the case where the tokens include + position maps, which we handle by fetching more than necessary from the DB + and then filtering (rather than attempting to construct a complicated SQL + query). + """ + if lower_token: + assert lower_token.topological is None + + # If these are live tokens we compare the stream ordering against the + # writers stream position. + if stream_ordering <= lower_token.get_stream_pos_for_instance(instance_name): + return False + + if upper_token: + assert upper_token.topological is None + + if upper_token.get_stream_pos_for_instance(instance_name) < stream_ordering: + return False + + return True + + def filter_to_clause(event_filter: Optional[Filter]) -> Tuple[str, List[str]]: # NB: This may create SQL clauses that don't optimise well (and we don't # have indices on all possible clauses). E.g. it may create @@ -764,6 +801,8 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): Fetch membership events (and the previous event that was replaced by that one) for a given user. + Note: This function only works with "live" tokens with `stream_ordering` only. + We're looking for membership changes in the token range (> `from_key` and <= `to_key`). @@ -837,7 +876,6 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): s.room_id, COALESCE(e.instance_name, s.instance_name), COALESCE(e.stream_ordering, s.stream_id), - e.topological_ordering, m.membership, e.sender FROM current_state_delta_stream AS s @@ -859,7 +897,6 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): room_id, instance_name, stream_ordering, - topological_ordering, membership, sender, ) in txn: @@ -867,12 +904,10 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): assert instance_name is not None assert stream_ordering is not None - if _filter_results( + if _filter_results_by_stream( from_key, to_key, instance_name, - # TODO: This isn't always filled now - topological_ordering, stream_ordering, ): # When the server leaves a room, it will insert new rows with From 956f20ef748b6e3caf76f91623e72b9a617ae235 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 27 Jun 2024 01:24:15 -0500 Subject: [PATCH 52/62] (currently failing) Add test to make sure membership changes don't re-appear if the server leaves the room later --- tests/storage/test_stream.py | 63 +++++++++++++++++++++++++++++++++--- 1 file changed, 59 insertions(+), 4 deletions(-) diff --git a/tests/storage/test_stream.py b/tests/storage/test_stream.py index ffa763bff2..0082132474 100644 --- a/tests/storage/test_stream.py +++ b/tests/storage/test_stream.py @@ -619,7 +619,7 @@ class GetCurrentStateDeltaMembershipChangesForUserTestCase(HomeserverTestCase): ], ) - def test_server_left_after_us_room(self) -> None: + def test_server_left_room_after_us(self) -> None: """ Test that when probing over part of the DAG where the server left the room *after us*, we still see the join and leave changes. @@ -652,7 +652,7 @@ class GetCurrentStateDeltaMembershipChangesForUserTestCase(HomeserverTestCase): join_pos1 = self.get_success( self.store.get_position_for_event(join_response1["event_id"]) ) - # Make sure random other non-member state that happens to have a state_key + # Make sure that random other non-member state that happens to have a `state_key` # matching the user ID doesn't mess with things. self.helper.send_state( room_id1, @@ -728,7 +728,62 @@ class GetCurrentStateDeltaMembershipChangesForUserTestCase(HomeserverTestCase): ], ) - def test_server_left_room(self) -> None: + def test_server_left_room_after_us_later(self) -> None: + """ + Test when the user leaves the room, then sometime later, everyone else leaves + the room, causing the server to leave the room, we shouldn't see any membership + changes. + + This is to make sure we play nicely with this behavior: When the server leaves a + room, it will insert new rows with `event_id = null` into the + `current_state_delta_stream` table for all current state. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + + room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok) + self.helper.join(room_id1, user1_id, tok=user1_tok) + # User1 should leave the room first + self.helper.leave(room_id1, user1_id, tok=user1_tok) + + after_user1_leave_token = self.event_sources.get_current_token() + + # User2 should also leave the room (everyone has left the room which means the + # server is no longer in the room). + self.helper.leave(room_id1, user2_id, tok=user2_tok) + + after_server_leave_token = self.event_sources.get_current_token() + + # Join another room as user1 just to advance the stream_ordering and bust + # `_membership_stream_cache` + room_id2 = self.helper.create_room_as(user2_id, tok=user2_tok) + self.helper.join(room_id2, user1_id, tok=user1_tok) + + # Get the membership changes for the user. + # + # At this point, the `current_state_delta_stream` table should look like the + # following. When the server leaves a room, it will insert new rows with + # `event_id = null` for all current state. + # + # TODO: Add DB rows to better see what's going on. + membership_changes = self.get_success( + self.store.get_current_state_delta_membership_changes_for_user( + user1_id, + from_key=after_user1_leave_token.room_key, + to_key=after_server_leave_token.room_key, + ) + ) + + # Let the whole diff show on failure + self.maxDiff = None + self.assertEqual( + membership_changes, + [], + ) + + def test_we_cause_server_left_room(self) -> None: """ Test that when probing over part of the DAG where we leave the room causing the server to leave the room (because we were the last local user in the room), we @@ -762,7 +817,7 @@ class GetCurrentStateDeltaMembershipChangesForUserTestCase(HomeserverTestCase): join_pos1 = self.get_success( self.store.get_position_for_event(join_response1["event_id"]) ) - # Make sure random other non-member state that happens to have a state_key + # Make sure that random other non-member state that happens to have a `state_key` # matching the user ID doesn't mess with things. self.helper.send_state( room_id1, From 830e09d2defc6ae742dce30bdc822dcaf9a74092 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 27 Jun 2024 10:13:02 -0500 Subject: [PATCH 53/62] Grab `prev_membership` to see whether the server left the room (fixes tests) See https://github.com/element-hq/synapse/pull/17320#discussion_r1657170493 `prev_membership` helps determine whether we should include the `event_id=null` row because we can check whether we have already left. - When we leave the room causing the server to leave the room, the `prev_event_id` will be our join event - When the server leaves the room after us, the `prev_event_id` will be leave event - In the state reset case, `prev_event_id` will be our join event --- synapse/storage/databases/main/stream.py | 20 ++++++-------------- tests/storage/test_stream.py | 7 +++---- 2 files changed, 9 insertions(+), 18 deletions(-) diff --git a/synapse/storage/databases/main/stream.py b/synapse/storage/databases/main/stream.py index 9e94cb08f6..d94b9366ab 100644 --- a/synapse/storage/databases/main/stream.py +++ b/synapse/storage/databases/main/stream.py @@ -877,10 +877,12 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): COALESCE(e.instance_name, s.instance_name), COALESCE(e.stream_ordering, s.stream_id), m.membership, - e.sender + e.sender, + m_prev.membership AS prev_membership FROM current_state_delta_stream AS s LEFT JOIN events AS e ON e.event_id = s.event_id LEFT JOIN room_memberships AS m ON m.event_id = s.event_id + LEFT JOIN room_memberships AS m_prev ON s.prev_event_id = m_prev.event_id WHERE s.stream_id > ? AND s.stream_id <= ? AND s.state_key = ? AND s.type = ? @@ -890,7 +892,6 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): txn.execute(sql, args) membership_changes: List[CurrentStateDeltaMembership] = [] - membership_change_map: Dict[str, CurrentStateDeltaMembership] = {} for ( event_id, prev_event_id, @@ -899,6 +900,7 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): stream_ordering, membership, sender, + prev_membership, ) in txn: assert room_id is not None assert instance_name is not None @@ -918,16 +920,8 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): # `event_id = null` row is a `leave` and we don't want duplicate # membership changes in our results, let's get rid of those # (deduplicate) (see `test_server_left_after_us_room`). - if event_id is None: - already_tracked_membership_change = membership_change_map.get( - prev_event_id - ) - if ( - already_tracked_membership_change is not None - and already_tracked_membership_change.membership - == Membership.LEAVE - ): - continue + if event_id is None and prev_membership == Membership.LEAVE: + continue membership_change = CurrentStateDeltaMembership( event_id=event_id, @@ -944,8 +938,6 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): ) membership_changes.append(membership_change) - if event_id: - membership_change_map[event_id] = membership_change return membership_changes diff --git a/tests/storage/test_stream.py b/tests/storage/test_stream.py index 0082132474..1342794d37 100644 --- a/tests/storage/test_stream.py +++ b/tests/storage/test_stream.py @@ -1019,7 +1019,7 @@ class GetCurrentStateDeltaMembershipChangesForUserTestCase(HomeserverTestCase): user2_tok = self.login(user2_id, "pass") room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok) - self.helper.join(room_id1, user1_id, tok=user1_tok) + join_response1 = self.helper.join(room_id1, user1_id, tok=user1_tok) before_reset_token = self.event_sources.get_current_token() @@ -1056,8 +1056,7 @@ class GetCurrentStateDeltaMembershipChangesForUserTestCase(HomeserverTestCase): "type": EventTypes.Member, "state_key": user1_id, "event_id": None, - # FIXME: I'm not sure if a state reset should have a prev_event_id - "prev_event_id": None, + "prev_event_id": join_response1["event_id"], "instance_name": dummy_state_pos.instance_name, }, desc="state reset user in current_state_delta_stream", @@ -1088,7 +1087,7 @@ class GetCurrentStateDeltaMembershipChangesForUserTestCase(HomeserverTestCase): CurrentStateDeltaMembership( event_id=None, event_pos=dummy_state_pos, - prev_event_id=None, + prev_event_id=join_response1["event_id"], room_id=room_id1, membership="leave", sender=None, # user1_id, From 15fcead2a5df17ee10278f1c0cdd16dbba26c76d Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 27 Jun 2024 11:33:41 -0500 Subject: [PATCH 54/62] Slight clean-up --- synapse/storage/databases/main/stream.py | 12 +++++------- tests/storage/test_stream.py | 6 +++--- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/synapse/storage/databases/main/stream.py b/synapse/storage/databases/main/stream.py index d94b9366ab..ab592dcf15 100644 --- a/synapse/storage/databases/main/stream.py +++ b/synapse/storage/databases/main/stream.py @@ -846,7 +846,7 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): min_from_id = from_key.stream max_to_id = to_key.get_max_stream_pos() - args: List[Any] = [min_from_id, max_to_id, user_id, EventTypes.Member] + args: List[Any] = [min_from_id, max_to_id, EventTypes.Member, user_id] # TODO: It would be good to assert that the `from_token`/`to_token` is >= # the first row in `current_state_delta_stream` for the rooms we're @@ -874,7 +874,7 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): e.event_id, s.prev_event_id, s.room_id, - COALESCE(e.instance_name, s.instance_name), + s.instance_name, COALESCE(e.stream_ordering, s.stream_id), m.membership, e.sender, @@ -884,8 +884,8 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): LEFT JOIN room_memberships AS m ON m.event_id = s.event_id LEFT JOIN room_memberships AS m_prev ON s.prev_event_id = m_prev.event_id WHERE s.stream_id > ? AND s.stream_id <= ? - AND s.state_key = ? AND s.type = ? + AND s.state_key = ? ORDER BY s.stream_id ASC """ @@ -916,10 +916,8 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): # `event_id = null` for all current state. This means we might # already have a row for the leave event and then another for the # same leave where the `event_id=null` but the `prev_event_id` is - # pointing back at the earlier leave event. Since we're assuming the - # `event_id = null` row is a `leave` and we don't want duplicate - # membership changes in our results, let's get rid of those - # (deduplicate) (see `test_server_left_after_us_room`). + # pointing back at the earlier leave event. We don't want to report + # the leave, if we already have a leave event. if event_id is None and prev_membership == Membership.LEAVE: continue diff --git a/tests/storage/test_stream.py b/tests/storage/test_stream.py index 1342794d37..5a054d7f2e 100644 --- a/tests/storage/test_stream.py +++ b/tests/storage/test_stream.py @@ -785,9 +785,9 @@ class GetCurrentStateDeltaMembershipChangesForUserTestCase(HomeserverTestCase): def test_we_cause_server_left_room(self) -> None: """ - Test that when probing over part of the DAG where we leave the room causing the - server to leave the room (because we were the last local user in the room), we - still see the join and leave changes. + Test that when probing over part of the DAG where the user leaves the room + causing the server to leave the room (because we were the last local user in the + room), we still see the join and leave changes. This is to make sure we play nicely with this behavior: When the server leaves a room, it will insert new rows with `event_id = null` into the From 81c06bec20d2f6732100672853a140a6e19ff67d Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 27 Jun 2024 11:50:18 -0500 Subject: [PATCH 55/62] Detect state resets --- synapse/storage/databases/main/stream.py | 51 +++++++++++++++++------- tests/storage/test_stream.py | 15 ++++++- 2 files changed, 50 insertions(+), 16 deletions(-) diff --git a/synapse/storage/databases/main/stream.py b/synapse/storage/databases/main/stream.py index ab592dcf15..19dba00a0f 100644 --- a/synapse/storage/databases/main/stream.py +++ b/synapse/storage/databases/main/stream.py @@ -123,6 +123,8 @@ class CurrentStateDeltaMembership: room_id: The room ID of the membership event. membership: The membership state of the user in the room sender: The person who sent the membership event + state_reset: Whether the membership in the room was changed without a + corresponding event (state reset). """ event_id: Optional[str] @@ -131,6 +133,7 @@ class CurrentStateDeltaMembership: room_id: str membership: str sender: Optional[str] + state_reset: bool def generate_pagination_where_clause( @@ -846,7 +849,15 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): min_from_id = from_key.stream max_to_id = to_key.get_max_stream_pos() - args: List[Any] = [min_from_id, max_to_id, EventTypes.Member, user_id] + args: List[Any] = [ + EventTypes.Member, + user_id, + user_id, + min_from_id, + max_to_id, + EventTypes.Member, + user_id, + ] # TODO: It would be good to assert that the `from_token`/`to_token` is >= # the first row in `current_state_delta_stream` for the rooms we're @@ -859,30 +870,35 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): # `stream_ordering` is unique across the Synapse instance so this should # work fine. # - # We `COALESCE` the `instance_name` and `stream_ordering` because we prefer - # the source of truth from the events table. This gives slightly more - # accurate results when available since `current_state_delta_stream` only - # tracks that the current state is at this stream position (not what stream - # position the state event was added) and batches events at the same - # `stream_id` in certain cases. + # We `COALESCE` the `stream_ordering` because we prefer the source of truth + # from the `events` table. This gives slightly more accurate results when + # available since `current_state_delta_stream` only tracks that the current + # state is at this stream position (not what stream position the state event + # was added) and uses the *minimum* stream position for batches of events. # - # TODO: We need to add indexes for `current_state_delta_stream.event_id` and - # `current_state_delta_stream.state_key`/`current_state_delta_stream.type` - # for this to be efficient. + # The extra `LEFT JOIN` by stream position are only needed to tell a state + # reset from the server leaving the room. Both cases have `event_id = null` + # but if we can find a corresponding event at that stream position, then we + # know it was just the server leaving the room. sql = """ SELECT - e.event_id, + COALESCE(e.event_id, e_by_stream.event_id) AS event_id, s.prev_event_id, s.room_id, s.instance_name, - COALESCE(e.stream_ordering, s.stream_id), - m.membership, - e.sender, + COALESCE(e.stream_ordering, e_by_stream.stream_ordering, s.stream_id) AS stream_ordering, + COALESCE(m.membership, m_by_stream.membership) AS membership, + COALESCE(e.sender, e_by_stream.sender) AS sender, m_prev.membership AS prev_membership FROM current_state_delta_stream AS s LEFT JOIN events AS e ON e.event_id = s.event_id LEFT JOIN room_memberships AS m ON m.event_id = s.event_id LEFT JOIN room_memberships AS m_prev ON s.prev_event_id = m_prev.event_id + LEFT JOIN events AS e_by_stream ON e_by_stream.stream_ordering = s.stream_id + AND e_by_stream.type = ? + AND e_by_stream.state_key = ? + LEFT JOIN room_memberships AS m_by_stream ON m_by_stream.event_stream_ordering = s.stream_id + AND m_by_stream.user_id = ? WHERE s.stream_id > ? AND s.stream_id <= ? AND s.type = ? AND s.state_key = ? @@ -921,6 +937,12 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): if event_id is None and prev_membership == Membership.LEAVE: continue + # We can detect a state reset if there was a membership change + # without a corresponding event. + state_reset = False + if event_id is None and membership != prev_membership: + state_reset = True + membership_change = CurrentStateDeltaMembership( event_id=event_id, event_pos=PersistedEventPosition( @@ -933,6 +955,7 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): membership if membership is not None else Membership.LEAVE ), sender=sender, + state_reset=state_reset, ) membership_changes.append(membership_change) diff --git a/tests/storage/test_stream.py b/tests/storage/test_stream.py index 5a054d7f2e..acb2f0e429 100644 --- a/tests/storage/test_stream.py +++ b/tests/storage/test_stream.py @@ -615,6 +615,7 @@ class GetCurrentStateDeltaMembershipChangesForUserTestCase(HomeserverTestCase): room_id=room_id1, membership="join", sender=user1_id, + state_reset=False, ) ], ) @@ -716,6 +717,7 @@ class GetCurrentStateDeltaMembershipChangesForUserTestCase(HomeserverTestCase): room_id=room_id1, membership="join", sender=user1_id, + state_reset=False, ), CurrentStateDeltaMembership( event_id=leave_response1["event_id"], @@ -724,6 +726,7 @@ class GetCurrentStateDeltaMembershipChangesForUserTestCase(HomeserverTestCase): room_id=room_id1, membership="leave", sender=user1_id, + state_reset=False, ), ], ) @@ -882,14 +885,16 @@ class GetCurrentStateDeltaMembershipChangesForUserTestCase(HomeserverTestCase): room_id=room_id1, membership="join", sender=user1_id, + state_reset=False, ), CurrentStateDeltaMembership( - event_id=None, # leave_response1["event_id"], + event_id=leave_response1["event_id"], event_pos=leave_pos1, prev_event_id=join_response1["event_id"], room_id=room_id1, membership="leave", - sender=None, # user1_id, + sender=user1_id, + state_reset=False, ), ], ) @@ -1004,6 +1009,7 @@ class GetCurrentStateDeltaMembershipChangesForUserTestCase(HomeserverTestCase): room_id=room_id1, membership="join", sender=user3_id, + state_reset=False, ), ], ) @@ -1091,6 +1097,7 @@ class GetCurrentStateDeltaMembershipChangesForUserTestCase(HomeserverTestCase): room_id=room_id1, membership="leave", sender=None, # user1_id, + state_reset=True, ), ], ) @@ -1141,6 +1148,7 @@ class GetCurrentStateDeltaMembershipChangesForUserTestCase(HomeserverTestCase): room_id=room_id1, membership="join", sender=user1_id, + state_reset=False, ), CurrentStateDeltaMembership( event_id=join_response2["event_id"], @@ -1149,6 +1157,7 @@ class GetCurrentStateDeltaMembershipChangesForUserTestCase(HomeserverTestCase): room_id=room_id2, membership="join", sender=user1_id, + state_reset=False, ), ], ) @@ -1175,6 +1184,7 @@ class GetCurrentStateDeltaMembershipChangesForUserTestCase(HomeserverTestCase): room_id=room_id1, membership="join", sender=user1_id, + state_reset=False, ) ], ) @@ -1368,6 +1378,7 @@ class GetCurrentStateDeltaMembershipChangesForUserFederationTestCase( room_id=intially_unjoined_room_id, membership="join", sender=user1_id, + state_reset=False, ), ], ) From eb159c11cd7bcc0a72983da46a728282fdbed8e7 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 27 Jun 2024 14:38:55 -0500 Subject: [PATCH 56/62] Don't worry about `state_reset` for now See: - Why no `COALESCE` https://github.com/element-hq/synapse/pull/17320#discussion_r1657435662 - Don't worry about `state_reset` for now, https://github.com/element-hq/synapse/pull/17320#discussion_r1657562645 --- synapse/storage/databases/main/stream.py | 53 ++++++----------------- tests/storage/test_stream.py | 55 +++++++++++------------- 2 files changed, 36 insertions(+), 72 deletions(-) diff --git a/synapse/storage/databases/main/stream.py b/synapse/storage/databases/main/stream.py index 19dba00a0f..c128eb5d5b 100644 --- a/synapse/storage/databases/main/stream.py +++ b/synapse/storage/databases/main/stream.py @@ -123,8 +123,6 @@ class CurrentStateDeltaMembership: room_id: The room ID of the membership event. membership: The membership state of the user in the room sender: The person who sent the membership event - state_reset: Whether the membership in the room was changed without a - corresponding event (state reset). """ event_id: Optional[str] @@ -133,7 +131,6 @@ class CurrentStateDeltaMembership: room_id: str membership: str sender: Optional[str] - state_reset: bool def generate_pagination_where_clause( @@ -849,56 +846,37 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): min_from_id = from_key.stream max_to_id = to_key.get_max_stream_pos() - args: List[Any] = [ - EventTypes.Member, - user_id, - user_id, - min_from_id, - max_to_id, - EventTypes.Member, - user_id, - ] + args: List[Any] = [min_from_id, max_to_id, EventTypes.Member, user_id] # TODO: It would be good to assert that the `from_token`/`to_token` is >= # the first row in `current_state_delta_stream` for the rooms we're # interested in. Otherwise, we will end up with empty results and not know # it. - # We have to look-up events by `stream_ordering` because - # `current_state_delta_stream.event_id` can be `null` if the server is no - # longer in the room or a state reset happened and it was unset. - # `stream_ordering` is unique across the Synapse instance so this should - # work fine. + # We could `COALESCE(e.stream_ordering, s.stream_id)` to get more accurate + # stream positioning when available but given our usages, we can avoid the + # complexity. Between two (valid) stream tokens, we will still get all of + # the state changes. Since those events are persisted in a batch, valid + # tokens will either be before or after the batch of events. # - # We `COALESCE` the `stream_ordering` because we prefer the source of truth - # from the `events` table. This gives slightly more accurate results when - # available since `current_state_delta_stream` only tracks that the current + # `stream_ordering` from the `events` table is more accurate when available + # since the `current_state_delta_stream` table only tracks that the current # state is at this stream position (not what stream position the state event # was added) and uses the *minimum* stream position for batches of events. - # - # The extra `LEFT JOIN` by stream position are only needed to tell a state - # reset from the server leaving the room. Both cases have `event_id = null` - # but if we can find a corresponding event at that stream position, then we - # know it was just the server leaving the room. sql = """ SELECT - COALESCE(e.event_id, e_by_stream.event_id) AS event_id, + e.event_id, s.prev_event_id, s.room_id, s.instance_name, - COALESCE(e.stream_ordering, e_by_stream.stream_ordering, s.stream_id) AS stream_ordering, - COALESCE(m.membership, m_by_stream.membership) AS membership, - COALESCE(e.sender, e_by_stream.sender) AS sender, + s.stream_id, + m.membership, + e.sender, m_prev.membership AS prev_membership FROM current_state_delta_stream AS s LEFT JOIN events AS e ON e.event_id = s.event_id LEFT JOIN room_memberships AS m ON m.event_id = s.event_id LEFT JOIN room_memberships AS m_prev ON s.prev_event_id = m_prev.event_id - LEFT JOIN events AS e_by_stream ON e_by_stream.stream_ordering = s.stream_id - AND e_by_stream.type = ? - AND e_by_stream.state_key = ? - LEFT JOIN room_memberships AS m_by_stream ON m_by_stream.event_stream_ordering = s.stream_id - AND m_by_stream.user_id = ? WHERE s.stream_id > ? AND s.stream_id <= ? AND s.type = ? AND s.state_key = ? @@ -937,12 +915,6 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): if event_id is None and prev_membership == Membership.LEAVE: continue - # We can detect a state reset if there was a membership change - # without a corresponding event. - state_reset = False - if event_id is None and membership != prev_membership: - state_reset = True - membership_change = CurrentStateDeltaMembership( event_id=event_id, event_pos=PersistedEventPosition( @@ -955,7 +927,6 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): membership if membership is not None else Membership.LEAVE ), sender=sender, - state_reset=state_reset, ) membership_changes.append(membership_change) diff --git a/tests/storage/test_stream.py b/tests/storage/test_stream.py index acb2f0e429..4f8f919a24 100644 --- a/tests/storage/test_stream.py +++ b/tests/storage/test_stream.py @@ -615,7 +615,6 @@ class GetCurrentStateDeltaMembershipChangesForUserTestCase(HomeserverTestCase): room_id=room_id1, membership="join", sender=user1_id, - state_reset=False, ) ], ) @@ -717,7 +716,6 @@ class GetCurrentStateDeltaMembershipChangesForUserTestCase(HomeserverTestCase): room_id=room_id1, membership="join", sender=user1_id, - state_reset=False, ), CurrentStateDeltaMembership( event_id=leave_response1["event_id"], @@ -726,7 +724,6 @@ class GetCurrentStateDeltaMembershipChangesForUserTestCase(HomeserverTestCase): room_id=room_id1, membership="leave", sender=user1_id, - state_reset=False, ), ], ) @@ -885,16 +882,14 @@ class GetCurrentStateDeltaMembershipChangesForUserTestCase(HomeserverTestCase): room_id=room_id1, membership="join", sender=user1_id, - state_reset=False, ), CurrentStateDeltaMembership( - event_id=leave_response1["event_id"], + event_id=None, # leave_response1["event_id"], event_pos=leave_pos1, prev_event_id=join_response1["event_id"], room_id=room_id1, membership="leave", - sender=user1_id, - state_reset=False, + sender=None, # user1_id, ), ], ) @@ -924,16 +919,6 @@ class GetCurrentStateDeltaMembershipChangesForUserTestCase(HomeserverTestCase): # Persist the user1, user3, and user4 join events in the same batch so they all # end up in the `current_state_delta_stream` table with the same # stream_ordering. - join_event1, join_event_context1 = self.get_success( - create_event( - self.hs, - sender=user1_id, - type=EventTypes.Member, - state_key=user1_id, - content={"membership": "join"}, - room_id=room_id1, - ) - ) join_event3, join_event_context3 = self.get_success( create_event( self.hs, @@ -944,6 +929,19 @@ class GetCurrentStateDeltaMembershipChangesForUserTestCase(HomeserverTestCase): room_id=room_id1, ) ) + # We want to put user1 in the middle of the batch. This way, regardless of the + # implementation that inserts rows into current_state_delta_stream` (whether it + # be minimum/maximum of stream position of the batch), we will still catch bugs. + join_event1, join_event_context1 = self.get_success( + create_event( + self.hs, + sender=user1_id, + type=EventTypes.Member, + state_key=user1_id, + content={"membership": "join"}, + room_id=room_id1, + ) + ) join_event4, join_event_context4 = self.get_success( create_event( self.hs, @@ -957,8 +955,8 @@ class GetCurrentStateDeltaMembershipChangesForUserTestCase(HomeserverTestCase): self.get_success( self.persistence.persist_events( [ - (join_event1, join_event_context1), (join_event3, join_event_context3), + (join_event1, join_event_context1), (join_event4, join_event_context4), ] ) @@ -966,10 +964,7 @@ class GetCurrentStateDeltaMembershipChangesForUserTestCase(HomeserverTestCase): after_room1_token = self.event_sources.get_current_token() - # Let's get membership changes from user3's perspective because it was in the - # middle of the batch. This way, if rows in` current_state_delta_stream` are - # stored with the first or last event's `stream_ordering`, we will still catch - # bugs. + # Get the membership changes for the user. # # At this point, the `current_state_delta_stream` table should look like (notice # those three memberships at the end with `stream_id=7` because we persisted @@ -987,7 +982,7 @@ class GetCurrentStateDeltaMembershipChangesForUserTestCase(HomeserverTestCase): # | 7 | '!x:test' | 'm.room.member' | '@user4:test' | '$xxx' | None | membership_changes = self.get_success( self.store.get_current_state_delta_membership_changes_for_user( - user3_id, + user1_id, from_key=before_room1_token.room_key, to_key=after_room1_token.room_key, ) @@ -1003,13 +998,16 @@ class GetCurrentStateDeltaMembershipChangesForUserTestCase(HomeserverTestCase): membership_changes, [ CurrentStateDeltaMembership( - event_id=join_event3.event_id, + event_id=join_event1.event_id, + # Ideally, this would be `join_pos1` (to match the `event_id`) but + # when events are persisted in a batch, they are all stored in the + # `current_state_delta_stream` table with the minimum + # `stream_ordering` from the batch. event_pos=join_pos3, prev_event_id=None, room_id=room_id1, membership="join", - sender=user3_id, - state_reset=False, + sender=user1_id, ), ], ) @@ -1097,7 +1095,6 @@ class GetCurrentStateDeltaMembershipChangesForUserTestCase(HomeserverTestCase): room_id=room_id1, membership="leave", sender=None, # user1_id, - state_reset=True, ), ], ) @@ -1148,7 +1145,6 @@ class GetCurrentStateDeltaMembershipChangesForUserTestCase(HomeserverTestCase): room_id=room_id1, membership="join", sender=user1_id, - state_reset=False, ), CurrentStateDeltaMembership( event_id=join_response2["event_id"], @@ -1157,7 +1153,6 @@ class GetCurrentStateDeltaMembershipChangesForUserTestCase(HomeserverTestCase): room_id=room_id2, membership="join", sender=user1_id, - state_reset=False, ), ], ) @@ -1184,7 +1179,6 @@ class GetCurrentStateDeltaMembershipChangesForUserTestCase(HomeserverTestCase): room_id=room_id1, membership="join", sender=user1_id, - state_reset=False, ) ], ) @@ -1378,7 +1372,6 @@ class GetCurrentStateDeltaMembershipChangesForUserFederationTestCase( room_id=intially_unjoined_room_id, membership="join", sender=user1_id, - state_reset=False, ), ], ) From ba56350642d33332d5ab3f3a94005e408cb9f433 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 27 Jun 2024 15:31:18 -0500 Subject: [PATCH 57/62] Passing current tests --- synapse/handlers/sliding_sync.py | 44 +++++++++++++++++++---------- tests/handlers/test_sliding_sync.py | 9 ++++-- 2 files changed, 35 insertions(+), 18 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index 3ce10d3ea7..b327e340ff 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -18,7 +18,6 @@ # # import logging -from collections import defaultdict from typing import TYPE_CHECKING, Any, Dict, List, Optional, Set, Tuple import attr @@ -48,7 +47,9 @@ if TYPE_CHECKING: logger = logging.getLogger(__name__) -def filter_membership_for_sync(*, membership: str, user_id: str, sender: str) -> bool: +def filter_membership_for_sync( + *, membership: str, user_id: str, sender: Optional[str] +) -> bool: """ Returns True if the membership event should be included in the sync response, otherwise False. @@ -65,6 +66,11 @@ def filter_membership_for_sync(*, membership: str, user_id: str, sender: str) -> # # This logic includes kicks (leave events where the sender is not the same user) and # can be read as "anything that isn't a leave or a leave with a different sender". + # + # When `sender=None` and `membership=Membership.LEAVE`, it means that a state reset + # happened that removed the user from the room, or the user was the last person + # locally to leave the room which caused the server to leave the room. In both + # cases, TODO return membership != Membership.LEAVE or sender != user_id @@ -99,10 +105,10 @@ class _RoomMembershipForUser: range """ - event_id: str + event_id: Optional[str] event_pos: PersistedEventPosition membership: str - sender: str + sender: Optional[str] newly_joined: bool def copy_and_replace(self, **kwds: Any) -> "_RoomMembershipForUser": @@ -540,9 +546,11 @@ class SlidingSyncHandler: first_membership_change_by_room_id_in_from_to_range: Dict[ str, CurrentStateDeltaMembership ] = {} - non_join_event_ids_by_room_id_in_from_to_range: Dict[str, List[str]] = ( - defaultdict(list) - ) + # Keep track if the room has a non-join event in the token range so we can later + # tell if it was a `newly_joined` room. If the last membership event in the + # token range is a join and there is also some non-join in the range, we know + # they `newly_joined`. + has_non_join_event_by_room_id_in_from_to_range: Dict[str, bool] = {} for ( membership_change ) in current_state_delta_membership_changes_in_from_to_range: @@ -551,16 +559,13 @@ class SlidingSyncHandler: last_membership_change_by_room_id_in_from_to_range[room_id] = ( membership_change ) - # Only set if we haven't already set it first_membership_change_by_room_id_in_from_to_range.setdefault( room_id, membership_change ) if membership_change.membership != Membership.JOIN: - non_join_event_ids_by_room_id_in_from_to_range[room_id].append( - membership_change.event_id - ) + has_non_join_event_by_room_id_in_from_to_range[room_id] = True # 2) Fixup # @@ -574,6 +579,7 @@ class SlidingSyncHandler: ) in last_membership_change_by_room_id_in_from_to_range.values(): room_id = last_membership_change_in_from_to_range.room_id + # 3) if last_membership_change_in_from_to_range.membership == Membership.JOIN: possibly_newly_joined_room_ids.add(room_id) @@ -592,10 +598,14 @@ class SlidingSyncHandler: # 3) Figure out `newly_joined` prev_event_ids_before_token_range: List[str] = [] for possibly_newly_joined_room_id in possibly_newly_joined_room_ids: - non_joins_for_room = non_join_event_ids_by_room_id_in_from_to_range[ - possibly_newly_joined_room_id - ] - if len(non_joins_for_room) > 0: + has_non_join_in_from_to_range = ( + has_non_join_event_by_room_id_in_from_to_range.get( + possibly_newly_joined_room_id, False + ) + ) + # If the last membership event in the token range is a join and there is + # also some non-join in the range, we know they `newly_joined`. + if has_non_join_in_from_to_range: # We found a `newly_joined` room (we left and joined within the token range) filtered_sync_room_id_set[room_id] = filtered_sync_room_id_set[ room_id @@ -968,6 +978,10 @@ class SlidingSyncHandler: Membership.INVITE, Membership.KNOCK, ): + # This should never happen. If someone is invited/knocked on room, then + # there should be an event for it. + assert rooms_membership_for_user_at_to_token.event_id is not None + invite_or_knock_event = await self.store.get_event( rooms_membership_for_user_at_to_token.event_id ) diff --git a/tests/handlers/test_sliding_sync.py b/tests/handlers/test_sliding_sync.py index 7339cb460e..a751fef1df 100644 --- a/tests/handlers/test_sliding_sync.py +++ b/tests/handlers/test_sliding_sync.py @@ -390,7 +390,7 @@ class GetSyncRoomIdsForUserTestCase(HomeserverTestCase): # Leave during the from_token/to_token range (newly_left) room_id2 = self.helper.create_room_as(user1_id, tok=user1_tok) - leave_response = self.helper.leave(room_id2, user1_id, tok=user1_tok) + _leave_response2 = self.helper.leave(room_id2, user1_id, tok=user1_tok) after_room2_token = self.event_sources.get_current_token() @@ -404,10 +404,13 @@ class GetSyncRoomIdsForUserTestCase(HomeserverTestCase): # Only the newly_left room should show up self.assertEqual(room_id_results.keys(), {room_id2}) - # It should be pointing to the latest membership event in the from/to range + # It should be pointing to the latest membership event in the from/to range but + # the `event_id` is `None` because we left the room causing the server to leave + # the room because no other local users are in it (quirk of the + # `current_state_delta_stream` table that we source things from) self.assertEqual( room_id_results[room_id2].event_id, - leave_response["event_id"], + None, # _leave_response2["event_id"], ) # We should *NOT* be `newly_joined` because we are instead `newly_left` self.assertEqual(room_id_results[room_id2].newly_joined, False) From f77403251cd2faf65689b785eba0a6af5366b5bd Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 27 Jun 2024 15:39:43 -0500 Subject: [PATCH 58/62] Add better comments --- synapse/handlers/sliding_sync.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index b327e340ff..3dd32ae1f1 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -70,8 +70,9 @@ def filter_membership_for_sync( # When `sender=None` and `membership=Membership.LEAVE`, it means that a state reset # happened that removed the user from the room, or the user was the last person # locally to leave the room which caused the server to leave the room. In both - # cases, TODO - return membership != Membership.LEAVE or sender != user_id + # cases, we can just remove the rooms since they are no longer relevant to the user. + # They could still be added back later if they are `newly_left`. + return membership != Membership.LEAVE or sender not in (user_id, None) # We can't freeze this class because we want to update it in place with the @@ -508,6 +509,8 @@ class SlidingSyncHandler: ) ) + # Filter the rooms that that we have updated room membership events to the point + # in time of the `to_token` (from the "1)" fixups) filtered_sync_room_id_set = { room_id: room_membership_for_user for room_id, room_membership_for_user in sync_room_id_set.items() From 325856e14b97aa6eca59d4d5d3b4145d050adfe0 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 27 Jun 2024 15:57:01 -0500 Subject: [PATCH 59/62] Inclusive ranges --- synapse/handlers/sliding_sync.py | 3 +- tests/rest/client/test_sync.py | 92 ++++++++++++++++++++++++++++++++ 2 files changed, 94 insertions(+), 1 deletion(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index 3dd32ae1f1..db5dd75d04 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -263,7 +263,8 @@ class SlidingSyncHandler: for range in list_config.ranges: sliced_room_ids = [ room_id - for room_id, _ in sorted_room_info[range[0] : range[1]] + # Both sides of range are inclusive + for room_id, _ in sorted_room_info[range[0] : range[1] + 1] ] ops.append( diff --git a/tests/rest/client/test_sync.py b/tests/rest/client/test_sync.py index bd1e7d521b..3f4f88c3d1 100644 --- a/tests/rest/client/test_sync.py +++ b/tests/rest/client/test_sync.py @@ -1616,6 +1616,98 @@ class SlidingSyncTestCase(unittest.HomeserverTestCase): channel.json_body["lists"]["foo-list"], ) + def test_sliced_windows(self) -> None: + """ + Test that the `lists` `ranges` are sliced correctly. Both sides of each range + are inclusive. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + + room_id1 = self.helper.create_room_as(user1_id, tok=user1_tok, is_public=True) + room_id2 = self.helper.create_room_as(user1_id, tok=user1_tok, is_public=True) + room_id3 = self.helper.create_room_as(user1_id, tok=user1_tok, is_public=True) + + # Make the Sliding Sync request for a single room + channel = self.make_request( + "POST", + self.sync_endpoint, + { + "lists": { + "foo-list": { + "ranges": [[0, 0]], + "required_state": [ + ["m.room.join_rules", ""], + ["m.room.history_visibility", ""], + ["m.space.child", "*"], + ], + "timeline_limit": 1, + } + } + }, + access_token=user1_tok, + ) + self.assertEqual(channel.code, 200, channel.json_body) + + # Make sure it has the foo-list we requested + self.assertListEqual( + list(channel.json_body["lists"].keys()), + ["foo-list"], + channel.json_body["lists"].keys(), + ) + # Make sure the list is sorted in the way we expect + self.assertListEqual( + list(channel.json_body["lists"]["foo-list"]["ops"]), + [ + { + "op": "SYNC", + "range": [0, 0], + "room_ids": [room_id3], + } + ], + channel.json_body["lists"]["foo-list"], + ) + + # Make the Sliding Sync request for the first two rooms + channel = self.make_request( + "POST", + self.sync_endpoint, + { + "lists": { + "foo-list": { + "ranges": [[0, 1]], + "required_state": [ + ["m.room.join_rules", ""], + ["m.room.history_visibility", ""], + ["m.space.child", "*"], + ], + "timeline_limit": 1, + } + } + }, + access_token=user1_tok, + ) + self.assertEqual(channel.code, 200, channel.json_body) + + # Make sure it has the foo-list we requested + self.assertListEqual( + list(channel.json_body["lists"].keys()), + ["foo-list"], + channel.json_body["lists"].keys(), + ) + # Make sure the list is sorted in the way we expect + self.assertListEqual( + list(channel.json_body["lists"]["foo-list"]["ops"]), + [ + { + "op": "SYNC", + "range": [0, 1], + "room_ids": [room_id3, room_id2], + } + ], + channel.json_body["lists"]["foo-list"], + ) + def test_rooms_limited_initial_sync(self) -> None: """ Test that we mark `rooms` as `limited=True` when we saturate the `timeline_limit` From 63c7b5017ad82ee20bc2ae5898b051a2660cf188 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 27 Jun 2024 16:34:41 -0500 Subject: [PATCH 60/62] (doesn't work) Add test for batch persisting multiple member events for the same user --- tests/storage/test_stream.py | 121 +++++++++++++++++++++++++++++++++-- 1 file changed, 115 insertions(+), 6 deletions(-) diff --git a/tests/storage/test_stream.py b/tests/storage/test_stream.py index 4f8f919a24..53a58bd82a 100644 --- a/tests/storage/test_stream.py +++ b/tests/storage/test_stream.py @@ -46,7 +46,7 @@ from synapse.types import ( from synapse.util import Clock from tests.test_utils.event_injection import create_event -from tests.unittest import FederatingHomeserverTestCase, HomeserverTestCase +from tests.unittest import FederatingHomeserverTestCase, HomeserverTestCase, skip_unless logger = logging.getLogger(__name__) @@ -894,12 +894,12 @@ class GetCurrentStateDeltaMembershipChangesForUserTestCase(HomeserverTestCase): ], ) - def test_membership_persisted_in_same_batch(self) -> None: + def test_different_user_membership_persisted_in_same_batch(self) -> None: """ - Test batch of membership events being processed at once. This will result in all - of the memberships being stored in the `current_state_delta_stream` table with - the same `stream_ordering` even though the individual events have different - `stream_ordering`s. + Test batch of membership events from different users being processed at once. + This will result in all of the memberships being stored in the + `current_state_delta_stream` table with the same `stream_ordering` even though + the individual events have different `stream_ordering`s. """ user1_id = self.register_user("user1", "pass") _user1_tok = self.login(user1_id, "pass") @@ -1012,6 +1012,115 @@ class GetCurrentStateDeltaMembershipChangesForUserTestCase(HomeserverTestCase): ], ) + @skip_unless( + False, + "persist code does not support multiple membership events for the same user in the same batch", + ) + def test_membership_persisted_in_same_batch(self) -> None: + """ + Test batch of membership events for the same user being processed at once. + + This *should* (doesn't happen currently) result in all of the memberships being + stored in the `current_state_delta_stream` table with the same `stream_ordering` + even though the individual events have different `stream_ordering`s. + + FIXME: Currently, only the `join_event` is recorded in the `current_state_delta_stream` + table. + """ + user1_id = self.register_user("user1", "pass") + _user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + + before_room1_token = self.event_sources.get_current_token() + + # User2 is just the designated person to create the room (we do this across the + # tests to be consistent) + room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok) + + # Persist a timeline event sandwiched between two membership events so they end + # up in the `current_state_delta_stream` table with the same `stream_id`. + join_event, join_event_context = self.get_success( + create_event( + self.hs, + sender=user1_id, + type=EventTypes.Member, + state_key=user1_id, + content={"membership": "join"}, + room_id=room_id1, + ) + ) + timeline_event, timeline_event_context = self.get_success( + create_event( + self.hs, + sender=user1_id, + type=EventTypes.Message, + state_key=user1_id, + content={"body": "foo bar", "msgtype": "m.text"}, + room_id=room_id1, + ) + ) + leave_event, leave_event_context = self.get_success( + create_event( + self.hs, + sender=user1_id, + type=EventTypes.Member, + state_key=user1_id, + content={"membership": "leave"}, + room_id=room_id1, + ) + ) + self.get_success( + self.persistence.persist_events( + [ + (join_event, join_event_context), + (timeline_event, timeline_event_context), + (leave_event, leave_event_context), + ] + ) + ) + + after_room1_token = self.event_sources.get_current_token() + + # Get the membership changes for the user. + # + # At this point, the `current_state_delta_stream` table should look like (notice + # those three memberships at the end with `stream_id=7` because we persisted + # them in the same batch): + # + # TODO: DB rows to better see what's going on. + membership_changes = self.get_success( + self.store.get_current_state_delta_membership_changes_for_user( + user1_id, + from_key=before_room1_token.room_key, + to_key=after_room1_token.room_key, + ) + ) + + join_pos = self.get_success( + self.store.get_position_for_event(join_event.event_id) + ) + + # Let the whole diff show on failure + self.maxDiff = None + self.assertEqual( + membership_changes, + [ + CurrentStateDeltaMembership( + event_id=leave_event.event_id, + # Ideally, this would be `leave_pos` (to match the `event_id`) but + # when events are persisted in a batch, they are all stored in the + # `current_state_delta_stream` table with the minimum + # `stream_ordering` from the batch. + event_pos=join_pos, # leave_pos, + prev_event_id=None, + room_id=room_id1, + membership="leave", + sender=user1_id, + ), + ], + ) + def test_state_reset(self) -> None: """ Test a state reset scenario where the user gets removed from the room (when From 1158058aa52e47d0463b44f115222e0e122e045e Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 27 Jun 2024 17:58:31 -0500 Subject: [PATCH 61/62] Opt for tackling more batch scenarios in future PRs --- tests/rest/client/test_sync.py | 2 +- tests/storage/test_stream.py | 111 +-------------------------------- 2 files changed, 2 insertions(+), 111 deletions(-) diff --git a/tests/rest/client/test_sync.py b/tests/rest/client/test_sync.py index 3f4f88c3d1..766c8850d0 100644 --- a/tests/rest/client/test_sync.py +++ b/tests/rest/client/test_sync.py @@ -1624,7 +1624,7 @@ class SlidingSyncTestCase(unittest.HomeserverTestCase): user1_id = self.register_user("user1", "pass") user1_tok = self.login(user1_id, "pass") - room_id1 = self.helper.create_room_as(user1_id, tok=user1_tok, is_public=True) + _room_id1 = self.helper.create_room_as(user1_id, tok=user1_tok, is_public=True) room_id2 = self.helper.create_room_as(user1_id, tok=user1_tok, is_public=True) room_id3 = self.helper.create_room_as(user1_id, tok=user1_tok, is_public=True) diff --git a/tests/storage/test_stream.py b/tests/storage/test_stream.py index 53a58bd82a..e420e680e2 100644 --- a/tests/storage/test_stream.py +++ b/tests/storage/test_stream.py @@ -46,7 +46,7 @@ from synapse.types import ( from synapse.util import Clock from tests.test_utils.event_injection import create_event -from tests.unittest import FederatingHomeserverTestCase, HomeserverTestCase, skip_unless +from tests.unittest import FederatingHomeserverTestCase, HomeserverTestCase logger = logging.getLogger(__name__) @@ -1012,115 +1012,6 @@ class GetCurrentStateDeltaMembershipChangesForUserTestCase(HomeserverTestCase): ], ) - @skip_unless( - False, - "persist code does not support multiple membership events for the same user in the same batch", - ) - def test_membership_persisted_in_same_batch(self) -> None: - """ - Test batch of membership events for the same user being processed at once. - - This *should* (doesn't happen currently) result in all of the memberships being - stored in the `current_state_delta_stream` table with the same `stream_ordering` - even though the individual events have different `stream_ordering`s. - - FIXME: Currently, only the `join_event` is recorded in the `current_state_delta_stream` - table. - """ - user1_id = self.register_user("user1", "pass") - _user1_tok = self.login(user1_id, "pass") - user2_id = self.register_user("user2", "pass") - user2_tok = self.login(user2_id, "pass") - - before_room1_token = self.event_sources.get_current_token() - - # User2 is just the designated person to create the room (we do this across the - # tests to be consistent) - room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok) - - # Persist a timeline event sandwiched between two membership events so they end - # up in the `current_state_delta_stream` table with the same `stream_id`. - join_event, join_event_context = self.get_success( - create_event( - self.hs, - sender=user1_id, - type=EventTypes.Member, - state_key=user1_id, - content={"membership": "join"}, - room_id=room_id1, - ) - ) - timeline_event, timeline_event_context = self.get_success( - create_event( - self.hs, - sender=user1_id, - type=EventTypes.Message, - state_key=user1_id, - content={"body": "foo bar", "msgtype": "m.text"}, - room_id=room_id1, - ) - ) - leave_event, leave_event_context = self.get_success( - create_event( - self.hs, - sender=user1_id, - type=EventTypes.Member, - state_key=user1_id, - content={"membership": "leave"}, - room_id=room_id1, - ) - ) - self.get_success( - self.persistence.persist_events( - [ - (join_event, join_event_context), - (timeline_event, timeline_event_context), - (leave_event, leave_event_context), - ] - ) - ) - - after_room1_token = self.event_sources.get_current_token() - - # Get the membership changes for the user. - # - # At this point, the `current_state_delta_stream` table should look like (notice - # those three memberships at the end with `stream_id=7` because we persisted - # them in the same batch): - # - # TODO: DB rows to better see what's going on. - membership_changes = self.get_success( - self.store.get_current_state_delta_membership_changes_for_user( - user1_id, - from_key=before_room1_token.room_key, - to_key=after_room1_token.room_key, - ) - ) - - join_pos = self.get_success( - self.store.get_position_for_event(join_event.event_id) - ) - - # Let the whole diff show on failure - self.maxDiff = None - self.assertEqual( - membership_changes, - [ - CurrentStateDeltaMembership( - event_id=leave_event.event_id, - # Ideally, this would be `leave_pos` (to match the `event_id`) but - # when events are persisted in a batch, they are all stored in the - # `current_state_delta_stream` table with the minimum - # `stream_ordering` from the batch. - event_pos=join_pos, # leave_pos, - prev_event_id=None, - room_id=room_id1, - membership="leave", - sender=user1_id, - ), - ], - ) - def test_state_reset(self) -> None: """ Test a state reset scenario where the user gets removed from the room (when From 32b8b68df67c6ef4a11921704c570236d2d08592 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 27 Jun 2024 18:13:34 -0500 Subject: [PATCH 62/62] Add TODO to handle state resets See https://github.com/element-hq/synapse/pull/17320#discussion_r1656548733 --- synapse/handlers/sliding_sync.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index db5dd75d04..5dc9867907 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -1002,6 +1002,12 @@ class SlidingSyncHandler: stripped_state.append(strip_event(invite_or_knock_event)) + # TODO: Handle state resets. For example, if we see + # `rooms_membership_for_user_at_to_token.membership = Membership.LEAVE` but + # `required_state` doesn't include it, we should indicate to the client that a + # state reset happened. Perhaps we should indicate this by setting `initial: + # True` and empty `required_state`. + return SlidingSyncResult.RoomResult( # TODO: Dummy value name=None,