From 423cca9efe06d78aaca5f62fb74ee7e5bceebe49 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 4 Mar 2022 11:48:15 +0000 Subject: [PATCH] Spread out sending device lists to remote hosts (#12132) --- changelog.d/12132.feature | 1 + synapse/federation/send_queue.py | 2 +- synapse/federation/sender/__init__.py | 26 ++++++---- .../sender/per_destination_queue.py | 10 ++++ synapse/handlers/device.py | 2 +- synapse/replication/tcp/client.py | 2 +- tests/federation/test_federation_sender.py | 52 +++++++++++++++++-- 7 files changed, 79 insertions(+), 16 deletions(-) create mode 100644 changelog.d/12132.feature diff --git a/changelog.d/12132.feature b/changelog.d/12132.feature new file mode 100644 index 0000000000..3b8362ad35 --- /dev/null +++ b/changelog.d/12132.feature @@ -0,0 +1 @@ +Improve performance of logging in for large accounts. diff --git a/synapse/federation/send_queue.py b/synapse/federation/send_queue.py index 0d7c4f5067..d720b5fd3f 100644 --- a/synapse/federation/send_queue.py +++ b/synapse/federation/send_queue.py @@ -244,7 +244,7 @@ class FederationRemoteSendQueue(AbstractFederationSender): self.notifier.on_new_replication_data() - def send_device_messages(self, destination: str) -> None: + def send_device_messages(self, destination: str, immediate: bool = False) -> None: """As per FederationSender""" # We don't need to replicate this as it gets sent down a different # stream. diff --git a/synapse/federation/sender/__init__.py b/synapse/federation/sender/__init__.py index 6106a486d1..30e2421efc 100644 --- a/synapse/federation/sender/__init__.py +++ b/synapse/federation/sender/__init__.py @@ -118,7 +118,12 @@ class AbstractFederationSender(metaclass=abc.ABCMeta): raise NotImplementedError() @abc.abstractmethod - def send_device_messages(self, destination: str) -> None: + def send_device_messages(self, destination: str, immediate: bool = True) -> None: + """Tells the sender that a new device message is ready to be sent to the + destination. The `immediate` flag specifies whether the messages should + be tried to be sent immediately, or whether it can be delayed for a + short while (to aid performance). + """ raise NotImplementedError() @abc.abstractmethod @@ -146,9 +151,8 @@ class AbstractFederationSender(metaclass=abc.ABCMeta): @attr.s -class _PresenceQueue: - """A queue of destinations that need to be woken up due to new presence - updates. +class _DestinationWakeupQueue: + """A queue of destinations that need to be woken up due to new updates. Staggers waking up of per destination queues to ensure that we don't attempt to start TLS connections with many hosts all at once, leading to pinned CPU. @@ -175,7 +179,7 @@ class _PresenceQueue: if not self.processing: self._handle() - @wrap_as_background_process("_PresenceQueue.handle") + @wrap_as_background_process("_DestinationWakeupQueue.handle") async def _handle(self) -> None: """Background process to drain the queue.""" @@ -297,7 +301,7 @@ class FederationSender(AbstractFederationSender): self._external_cache = hs.get_external_cache() - self._presence_queue = _PresenceQueue(self, self.clock) + self._destination_wakeup_queue = _DestinationWakeupQueue(self, self.clock) def _get_per_destination_queue(self, destination: str) -> PerDestinationQueue: """Get or create a PerDestinationQueue for the given destination @@ -614,7 +618,7 @@ class FederationSender(AbstractFederationSender): states, start_loop=False ) - self._presence_queue.add_to_queue(destination) + self._destination_wakeup_queue.add_to_queue(destination) def build_and_send_edu( self, @@ -667,7 +671,7 @@ class FederationSender(AbstractFederationSender): else: queue.send_edu(edu) - def send_device_messages(self, destination: str) -> None: + def send_device_messages(self, destination: str, immediate: bool = False) -> None: if destination == self.server_name: logger.warning("Not sending device update to ourselves") return @@ -677,7 +681,11 @@ class FederationSender(AbstractFederationSender): ): return - self._get_per_destination_queue(destination).attempt_new_transaction() + if immediate: + self._get_per_destination_queue(destination).attempt_new_transaction() + else: + self._get_per_destination_queue(destination).mark_new_data() + self._destination_wakeup_queue.add_to_queue(destination) def wake_destination(self, destination: str) -> None: """Called when we want to retry sending transactions to a remote. diff --git a/synapse/federation/sender/per_destination_queue.py b/synapse/federation/sender/per_destination_queue.py index c8768f22bc..d80f0ac5e8 100644 --- a/synapse/federation/sender/per_destination_queue.py +++ b/synapse/federation/sender/per_destination_queue.py @@ -219,6 +219,16 @@ class PerDestinationQueue: self._pending_edus.append(edu) self.attempt_new_transaction() + def mark_new_data(self) -> None: + """Marks that the destination has new data to send, without starting a + new transaction. + + If a transaction loop is already in progress then a new transcation will + be attempted when the current one finishes. + """ + + self._new_data_to_send = True + def attempt_new_transaction(self) -> None: """Try to start a new transaction to this destination diff --git a/synapse/handlers/device.py b/synapse/handlers/device.py index 934b5bd734..d90cb259a6 100644 --- a/synapse/handlers/device.py +++ b/synapse/handlers/device.py @@ -506,7 +506,7 @@ class DeviceHandler(DeviceWorkerHandler): "Sending device list update notif for %r to: %r", user_id, hosts ) for host in hosts: - self.federation_sender.send_device_messages(host) + self.federation_sender.send_device_messages(host, immediate=False) log_kv({"message": "sent device update to host", "host": host}) async def notify_user_signature_update( diff --git a/synapse/replication/tcp/client.py b/synapse/replication/tcp/client.py index 1b8479b0b4..b8fc1d4db9 100644 --- a/synapse/replication/tcp/client.py +++ b/synapse/replication/tcp/client.py @@ -380,7 +380,7 @@ class FederationSenderHandler: # changes. hosts = {row.entity for row in rows if not row.entity.startswith("@")} for host in hosts: - self.federation_sender.send_device_messages(host) + self.federation_sender.send_device_messages(host, immediate=False) elif stream_name == ToDeviceStream.NAME: # The to_device stream includes stuff to be pushed to both local diff --git a/tests/federation/test_federation_sender.py b/tests/federation/test_federation_sender.py index 60e0c31f43..e90592855a 100644 --- a/tests/federation/test_federation_sender.py +++ b/tests/federation/test_federation_sender.py @@ -201,9 +201,12 @@ class FederationSenderDevicesTestCases(HomeserverTestCase): self.assertEqual(len(self.edus), 1) stream_id = self.check_device_update_edu(self.edus.pop(0), u1, "D1", None) + # We queue up device list updates to be sent over federation, so we + # advance to clear the queue. + self.reactor.advance(1) + # a second call should produce no new device EDUs self.hs.get_federation_sender().send_device_messages("host2") - self.pump() self.assertEqual(self.edus, []) # a second device @@ -232,6 +235,10 @@ class FederationSenderDevicesTestCases(HomeserverTestCase): device1_signing_key = self.generate_and_upload_device_signing_key(u1, "D1") device2_signing_key = self.generate_and_upload_device_signing_key(u1, "D2") + # We queue up device list updates to be sent over federation, so we + # advance to clear the queue. + self.reactor.advance(1) + # expect two more edus self.assertEqual(len(self.edus), 2) stream_id = self.check_device_update_edu(self.edus.pop(0), u1, "D1", stream_id) @@ -265,6 +272,10 @@ class FederationSenderDevicesTestCases(HomeserverTestCase): e2e_handler.upload_signing_keys_for_user(u1, cross_signing_keys) ) + # We queue up device list updates to be sent over federation, so we + # advance to clear the queue. + self.reactor.advance(1) + # expect signing key update edu self.assertEqual(len(self.edus), 2) self.assertEqual(self.edus.pop(0)["edu_type"], "m.signing_key_update") @@ -284,6 +295,10 @@ class FederationSenderDevicesTestCases(HomeserverTestCase): ) self.assertEqual(ret["failures"], {}) + # We queue up device list updates to be sent over federation, so we + # advance to clear the queue. + self.reactor.advance(1) + # expect two edus, in one or two transactions. We don't know what order the # devices will be updated. self.assertEqual(len(self.edus), 2) @@ -307,6 +322,10 @@ class FederationSenderDevicesTestCases(HomeserverTestCase): self.login("user", "pass", device_id="D2") self.login("user", "pass", device_id="D3") + # We queue up device list updates to be sent over federation, so we + # advance to clear the queue. + self.reactor.advance(1) + # expect three edus self.assertEqual(len(self.edus), 3) stream_id = self.check_device_update_edu(self.edus.pop(0), u1, "D1", None) @@ -318,6 +337,10 @@ class FederationSenderDevicesTestCases(HomeserverTestCase): self.hs.get_device_handler().delete_devices(u1, ["D1", "D2", "D3"]) ) + # We queue up device list updates to be sent over federation, so we + # advance to clear the queue. + self.reactor.advance(1) + # expect three edus, in an unknown order self.assertEqual(len(self.edus), 3) for edu in self.edus: @@ -350,12 +373,19 @@ class FederationSenderDevicesTestCases(HomeserverTestCase): self.hs.get_device_handler().delete_devices(u1, ["D1", "D2", "D3"]) ) + # We queue up device list updates to be sent over federation, so we + # advance to clear the queue. + self.reactor.advance(1) + self.assertGreaterEqual(mock_send_txn.call_count, 4) # recover the server mock_send_txn.side_effect = self.record_transaction self.hs.get_federation_sender().send_device_messages("host2") - self.pump() + + # We queue up device list updates to be sent over federation, so we + # advance to clear the queue. + self.reactor.advance(1) # for each device, there should be a single update self.assertEqual(len(self.edus), 3) @@ -390,6 +420,10 @@ class FederationSenderDevicesTestCases(HomeserverTestCase): self.hs.get_device_handler().delete_devices(u1, ["D1", "D2", "D3"]) ) + # We queue up device list updates to be sent over federation, so we + # advance to clear the queue. + self.reactor.advance(1) + self.assertGreaterEqual(mock_send_txn.call_count, 4) # run the prune job @@ -401,7 +435,10 @@ class FederationSenderDevicesTestCases(HomeserverTestCase): # recover the server mock_send_txn.side_effect = self.record_transaction self.hs.get_federation_sender().send_device_messages("host2") - self.pump() + + # We queue up device list updates to be sent over federation, so we + # advance to clear the queue. + self.reactor.advance(1) # there should be a single update for this user. self.assertEqual(len(self.edus), 1) @@ -435,6 +472,10 @@ class FederationSenderDevicesTestCases(HomeserverTestCase): self.login("user", "pass", device_id="D2") self.login("user", "pass", device_id="D3") + # We queue up device list updates to be sent over federation, so we + # advance to clear the queue. + self.reactor.advance(1) + # delete them again self.get_success( self.hs.get_device_handler().delete_devices(u1, ["D1", "D2", "D3"]) @@ -451,7 +492,10 @@ class FederationSenderDevicesTestCases(HomeserverTestCase): # recover the server mock_send_txn.side_effect = self.record_transaction self.hs.get_federation_sender().send_device_messages("host2") - self.pump() + + # We queue up device list updates to be sent over federation, so we + # advance to clear the queue. + self.reactor.advance(1) # ... and we should get a single update for this user. self.assertEqual(len(self.edus), 1)