mirror of
https://github.com/element-hq/synapse
synced 2024-07-01 07:13:29 +00:00
Add some debug to keep track of client state desynchronisation
Fixes https://github.com/element-hq/crypto-internal/issues/179
This commit is contained in:
parent
657b8cc75c
commit
bc435f7d9d
|
@ -88,6 +88,10 @@ if TYPE_CHECKING:
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# Logging for https://github.com/matrix-org/matrix-spec/issues/1209 and
|
||||||
|
# https://github.com/element-hq/synapse/issues/16940
|
||||||
|
client_state_desync_logger = logging.getLogger("synapse.client_state_desync_debug")
|
||||||
|
|
||||||
# Counts the number of times we returned a non-empty sync. `type` is one of
|
# Counts the number of times we returned a non-empty sync. `type` is one of
|
||||||
# "initial_sync", "full_state_sync" or "incremental_sync", `lazy_loaded` is
|
# "initial_sync", "full_state_sync" or "incremental_sync", `lazy_loaded` is
|
||||||
# "true" or "false" depending on if the request asked for lazy loaded members or
|
# "true" or "false" depending on if the request asked for lazy loaded members or
|
||||||
|
@ -1214,6 +1218,12 @@ class SyncHandler:
|
||||||
previous_timeline_end={},
|
previous_timeline_end={},
|
||||||
lazy_load_members=lazy_load_members,
|
lazy_load_members=lazy_load_members,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if client_state_desync_logger.isEnabledFor(logging.DEBUG):
|
||||||
|
await self._log_client_state_desync(
|
||||||
|
room_id, None, state_ids, timeline_state, lazy_load_members
|
||||||
|
)
|
||||||
|
|
||||||
return state_ids
|
return state_ids
|
||||||
|
|
||||||
async def _compute_state_delta_for_incremental_sync(
|
async def _compute_state_delta_for_incremental_sync(
|
||||||
|
@ -1359,6 +1369,15 @@ class SyncHandler:
|
||||||
lazy_load_members=lazy_load_members,
|
lazy_load_members=lazy_load_members,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if client_state_desync_logger.isEnabledFor(logging.DEBUG):
|
||||||
|
await self._log_client_state_desync(
|
||||||
|
room_id,
|
||||||
|
since_token,
|
||||||
|
state_ids,
|
||||||
|
timeline_state,
|
||||||
|
lazy_load_members,
|
||||||
|
)
|
||||||
|
|
||||||
return state_ids
|
return state_ids
|
||||||
|
|
||||||
async def _find_missing_partial_state_memberships(
|
async def _find_missing_partial_state_memberships(
|
||||||
|
@ -1475,6 +1494,125 @@ class SyncHandler:
|
||||||
|
|
||||||
return additional_state_ids
|
return additional_state_ids
|
||||||
|
|
||||||
|
async def _log_client_state_desync(
|
||||||
|
self,
|
||||||
|
room_id: str,
|
||||||
|
since_token: Optional[StreamToken],
|
||||||
|
sync_response_state_state: StateMap[str],
|
||||||
|
sync_response_timeline_state: StateMap[str],
|
||||||
|
lazy_load_members: bool,
|
||||||
|
) -> None:
|
||||||
|
"""
|
||||||
|
Logging to see how often the client's state gets out of sync with the
|
||||||
|
actual current state of the room.
|
||||||
|
|
||||||
|
There are few different potential failure modes here:
|
||||||
|
|
||||||
|
* State resolution can cause changes in the state of the room that don't
|
||||||
|
directly correspond to events with the corresponding (type, state_key).
|
||||||
|
https://github.com/matrix-org/matrix-spec/issues/1209 discusses this in
|
||||||
|
more detail.
|
||||||
|
|
||||||
|
* Even where there is an event that causes a given state change, Synapse
|
||||||
|
may not serve it to the client, since it works on state at specific points
|
||||||
|
in the DAG, rather than "current state".
|
||||||
|
See https://github.com/element-hq/synapse/issues/16940.
|
||||||
|
|
||||||
|
* Lazy-loading adds more complexity, as it means that events that would
|
||||||
|
normally be served via the `state` part of an incremental sync are filtered
|
||||||
|
out.
|
||||||
|
|
||||||
|
To try to get a handle on this, let's put ourselves in the shoes of a client,
|
||||||
|
and compare the state they will calculate against the actual current state.
|
||||||
|
"""
|
||||||
|
# We only care about membership events.
|
||||||
|
state_filter = StateFilter.from_types(types=(("m.room.member", None),))
|
||||||
|
|
||||||
|
if since_token is None:
|
||||||
|
if lazy_load_members:
|
||||||
|
# For initial syncs with lazy-loading enabled, there's not too much
|
||||||
|
# concern here. We know the client will do a `/members` query before
|
||||||
|
# doing any encryption, so what sync returns isn't too important.
|
||||||
|
#
|
||||||
|
# (Of course, then `/members` might also return an incomplete list, but
|
||||||
|
# that's a separate problem.)
|
||||||
|
return
|
||||||
|
|
||||||
|
# For regular initial syncs, compare the returned response with the actual
|
||||||
|
# current state.
|
||||||
|
client_calculated_state = {}
|
||||||
|
client_calculated_state.update(sync_response_state_state)
|
||||||
|
client_calculated_state.update(sync_response_timeline_state)
|
||||||
|
else:
|
||||||
|
# For an incremental (gappy or otherwise) sync, let's assume the client has
|
||||||
|
# a complete membership list as of the last sync (or rather, at
|
||||||
|
# `since_token`, which is the closest approximation we have to it
|
||||||
|
# right now), and see what they would calculate as the current state given
|
||||||
|
# this sync update.
|
||||||
|
|
||||||
|
client_calculated_state = dict(
|
||||||
|
await self.get_state_at(
|
||||||
|
room_id,
|
||||||
|
stream_position=since_token,
|
||||||
|
state_filter=state_filter,
|
||||||
|
await_full_state=False,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
client_calculated_state.update(sync_response_state_state)
|
||||||
|
client_calculated_state.update(sync_response_timeline_state)
|
||||||
|
|
||||||
|
current_state = await self._state_storage_controller.get_current_state_ids(
|
||||||
|
room_id, state_filter=state_filter, await_full_state=False
|
||||||
|
)
|
||||||
|
missing_users = await self._calculate_missing_members(
|
||||||
|
current_state, client_calculated_state
|
||||||
|
)
|
||||||
|
if missing_users:
|
||||||
|
client_state_desync_logger.debug(
|
||||||
|
"client state discrepancy in incremental sync in room %s: missing users %s",
|
||||||
|
room_id,
|
||||||
|
missing_users,
|
||||||
|
)
|
||||||
|
|
||||||
|
async def _calculate_missing_members(
|
||||||
|
self,
|
||||||
|
actual_state: StateMap[str],
|
||||||
|
client_calculated_state: StateMap[str],
|
||||||
|
) -> List[str]:
|
||||||
|
"""Helper for `_log_client_state_desync`: calculates the difference in
|
||||||
|
joined members between two state maps.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A list of user IDs
|
||||||
|
"""
|
||||||
|
missing_users = []
|
||||||
|
|
||||||
|
async def event_id_to_membership(event_id: Optional[str]) -> Optional[str]:
|
||||||
|
if event_id is None:
|
||||||
|
return None
|
||||||
|
event = await self.store.get_event(event_id, allow_none=True)
|
||||||
|
if event is None:
|
||||||
|
return "MISSING_EVENT"
|
||||||
|
return event.membership
|
||||||
|
|
||||||
|
# Check for joined members in the actual state that are missing or have a
|
||||||
|
# different membership in the actual state.
|
||||||
|
for (event_type, state_key), actual_event_id in actual_state.items():
|
||||||
|
if event_type != EventTypes.Member:
|
||||||
|
continue
|
||||||
|
|
||||||
|
calculated_event_id = client_calculated_state.get((event_type, state_key))
|
||||||
|
if calculated_event_id != actual_event_id:
|
||||||
|
actual_membership = event_id_to_membership(actual_event_id)
|
||||||
|
calculated_membership = event_id_to_membership(calculated_event_id)
|
||||||
|
if (
|
||||||
|
actual_membership == Membership.JOIN
|
||||||
|
and calculated_membership != Membership.JOIN
|
||||||
|
):
|
||||||
|
missing_users.append(state_key)
|
||||||
|
|
||||||
|
return missing_users
|
||||||
|
|
||||||
async def unread_notifs_for_room_id(
|
async def unread_notifs_for_room_id(
|
||||||
self, room_id: str, sync_config: SyncConfig
|
self, room_id: str, sync_config: SyncConfig
|
||||||
) -> RoomNotifCounts:
|
) -> RoomNotifCounts:
|
||||||
|
|
Loading…
Reference in a new issue