Add a background database update to purge account data for deactivated users. (#11655)

This commit is contained in:
reivilibre 2022-02-02 11:37:18 +00:00 committed by GitHub
parent 513913cc6b
commit af795173be
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 244 additions and 59 deletions

View file

@ -0,0 +1 @@
Remove account data (including client config, push rules and ignored users) upon user deactivation.

View file

@ -36,6 +36,8 @@ from synapse.logging.context import (
run_in_background, run_in_background,
) )
from synapse.storage.database import DatabasePool, make_conn from synapse.storage.database import DatabasePool, make_conn
from synapse.storage.databases.main import PushRuleStore
from synapse.storage.databases.main.account_data import AccountDataWorkerStore
from synapse.storage.databases.main.client_ips import ClientIpBackgroundUpdateStore from synapse.storage.databases.main.client_ips import ClientIpBackgroundUpdateStore
from synapse.storage.databases.main.deviceinbox import DeviceInboxBackgroundUpdateStore from synapse.storage.databases.main.deviceinbox import DeviceInboxBackgroundUpdateStore
from synapse.storage.databases.main.devices import DeviceBackgroundUpdateStore from synapse.storage.databases.main.devices import DeviceBackgroundUpdateStore
@ -180,6 +182,8 @@ class Store(
UserDirectoryBackgroundUpdateStore, UserDirectoryBackgroundUpdateStore,
EndToEndKeyBackgroundStore, EndToEndKeyBackgroundStore,
StatsStore, StatsStore,
AccountDataWorkerStore,
PushRuleStore,
PusherWorkerStore, PusherWorkerStore,
PresenceBackgroundUpdateStore, PresenceBackgroundUpdateStore,
GroupServerWorkerStore, GroupServerWorkerStore,

View file

@ -106,6 +106,11 @@ class AccountDataWorkerStore(PushRulesWorkerStore, CacheInvalidationWorkerStore)
"AccountDataAndTagsChangeCache", account_max "AccountDataAndTagsChangeCache", account_max
) )
self.db_pool.updates.register_background_update_handler(
"delete_account_data_for_deactivated_users",
self._delete_account_data_for_deactivated_users,
)
def get_max_account_data_stream_id(self) -> int: def get_max_account_data_stream_id(self) -> int:
"""Get the current max stream ID for account data stream """Get the current max stream ID for account data stream
@ -549,72 +554,121 @@ class AccountDataWorkerStore(PushRulesWorkerStore, CacheInvalidationWorkerStore)
async def purge_account_data_for_user(self, user_id: str) -> None: async def purge_account_data_for_user(self, user_id: str) -> None:
""" """
Removes the account data for a user. Removes ALL the account data for a user.
Intended to be used upon user deactivation.
This is intended to be used upon user deactivation and also removes any Also purges the user from the ignored_users cache table
derived information from account data (e.g. push rules and ignored users). and the push_rules cache tables.
Args:
user_id: The user ID to remove data for.
""" """
def purge_account_data_for_user_txn(txn: LoggingTransaction) -> None:
# Purge from the primary account_data tables.
self.db_pool.simple_delete_txn(
txn, table="account_data", keyvalues={"user_id": user_id}
)
self.db_pool.simple_delete_txn(
txn, table="room_account_data", keyvalues={"user_id": user_id}
)
# Purge from ignored_users where this user is the ignorer.
# N.B. We don't purge where this user is the ignoree, because that
# interferes with other users' account data.
# It's also not this user's data to delete!
self.db_pool.simple_delete_txn(
txn, table="ignored_users", keyvalues={"ignorer_user_id": user_id}
)
# Remove the push rules
self.db_pool.simple_delete_txn(
txn, table="push_rules", keyvalues={"user_name": user_id}
)
self.db_pool.simple_delete_txn(
txn, table="push_rules_enable", keyvalues={"user_name": user_id}
)
self.db_pool.simple_delete_txn(
txn, table="push_rules_stream", keyvalues={"user_id": user_id}
)
# Invalidate caches as appropriate
self._invalidate_cache_and_stream(
txn, self.get_account_data_for_room_and_type, (user_id,)
)
self._invalidate_cache_and_stream(
txn, self.get_account_data_for_user, (user_id,)
)
self._invalidate_cache_and_stream(
txn, self.get_global_account_data_by_type_for_user, (user_id,)
)
self._invalidate_cache_and_stream(
txn, self.get_account_data_for_room, (user_id,)
)
self._invalidate_cache_and_stream(
txn, self.get_push_rules_for_user, (user_id,)
)
self._invalidate_cache_and_stream(
txn, self.get_push_rules_enabled_for_user, (user_id,)
)
# This user might be contained in the ignored_by cache for other users,
# so we have to invalidate it all.
self._invalidate_all_cache_and_stream(txn, self.ignored_by)
await self.db_pool.runInteraction( await self.db_pool.runInteraction(
"purge_account_data_for_user_txn", "purge_account_data_for_user_txn",
purge_account_data_for_user_txn, self._purge_account_data_for_user_txn,
user_id,
) )
def _purge_account_data_for_user_txn(
self, txn: LoggingTransaction, user_id: str
) -> None:
"""
See `purge_account_data_for_user`.
"""
# Purge from the primary account_data tables.
self.db_pool.simple_delete_txn(
txn, table="account_data", keyvalues={"user_id": user_id}
)
self.db_pool.simple_delete_txn(
txn, table="room_account_data", keyvalues={"user_id": user_id}
)
# Purge from ignored_users where this user is the ignorer.
# N.B. We don't purge where this user is the ignoree, because that
# interferes with other users' account data.
# It's also not this user's data to delete!
self.db_pool.simple_delete_txn(
txn, table="ignored_users", keyvalues={"ignorer_user_id": user_id}
)
# Remove the push rules
self.db_pool.simple_delete_txn(
txn, table="push_rules", keyvalues={"user_name": user_id}
)
self.db_pool.simple_delete_txn(
txn, table="push_rules_enable", keyvalues={"user_name": user_id}
)
self.db_pool.simple_delete_txn(
txn, table="push_rules_stream", keyvalues={"user_id": user_id}
)
# Invalidate caches as appropriate
self._invalidate_cache_and_stream(
txn, self.get_account_data_for_room_and_type, (user_id,)
)
self._invalidate_cache_and_stream(
txn, self.get_account_data_for_user, (user_id,)
)
self._invalidate_cache_and_stream(
txn, self.get_global_account_data_by_type_for_user, (user_id,)
)
self._invalidate_cache_and_stream(
txn, self.get_account_data_for_room, (user_id,)
)
self._invalidate_cache_and_stream(txn, self.get_push_rules_for_user, (user_id,))
self._invalidate_cache_and_stream(
txn, self.get_push_rules_enabled_for_user, (user_id,)
)
# This user might be contained in the ignored_by cache for other users,
# so we have to invalidate it all.
self._invalidate_all_cache_and_stream(txn, self.ignored_by)
async def _delete_account_data_for_deactivated_users(
self, progress: dict, batch_size: int
) -> int:
"""
Retroactively purges account data for users that have already been deactivated.
Gets run as a background update caused by a schema delta.
"""
last_user: str = progress.get("last_user", "")
def _delete_account_data_for_deactivated_users_txn(
txn: LoggingTransaction,
) -> int:
sql = """
SELECT name FROM users
WHERE deactivated = ? and name > ?
ORDER BY name ASC
LIMIT ?
"""
txn.execute(sql, (1, last_user, batch_size))
users = [row[0] for row in txn]
for user in users:
self._purge_account_data_for_user_txn(txn, user_id=user)
if users:
self.db_pool.updates._background_update_progress_txn(
txn,
"delete_account_data_for_deactivated_users",
{"last_user": users[-1]},
)
return len(users)
number_deleted = await self.db_pool.runInteraction(
"_delete_account_data_for_deactivated_users",
_delete_account_data_for_deactivated_users_txn,
)
if number_deleted < batch_size:
await self.db_pool.updates._end_background_update(
"delete_account_data_for_deactivated_users"
)
return number_deleted
class AccountDataStore(AccountDataWorkerStore): class AccountDataStore(AccountDataWorkerStore):
pass pass

View file

@ -0,0 +1,20 @@
/* Copyright 2021 The Matrix.org Foundation C.I.C
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-- We want to retroactively delete account data for users that were already
-- deactivated.
INSERT INTO background_updates (ordering, update_name, progress_json) VALUES
(6803, 'delete_account_data_for_deactivated_users', '{}');

View file

@ -217,3 +217,109 @@ class DeactivateAccountTestCase(HomeserverTestCase):
self.assertEqual( self.assertEqual(
self.get_success(self._store.ignored_by("@sheltie:test")), set() self.get_success(self._store.ignored_by("@sheltie:test")), set()
) )
def _rerun_retroactive_account_data_deletion_update(self) -> None:
# Reset the 'all done' flag
self._store.db_pool.updates._all_done = False
self.get_success(
self._store.db_pool.simple_insert(
"background_updates",
{
"update_name": "delete_account_data_for_deactivated_users",
"progress_json": "{}",
},
)
)
self.wait_for_background_updates()
def test_account_data_deleted_retroactively_by_background_update_if_deactivated(
self,
) -> None:
"""
Tests that a user, who deactivated their account before account data was
deleted automatically upon deactivation, has their account data retroactively
scrubbed by the background update.
"""
# Request the deactivation of our account
self._deactivate_my_account()
# Add some account data
# (we do this after the deactivation so that the act of deactivating doesn't
# clear it out. This emulates a user that was deactivated before this was cleared
# upon deactivation.)
self.get_success(
self._store.add_account_data_for_user(
self.user,
AccountDataTypes.DIRECT,
{"@someone:remote": ["!somewhere:remote"]},
)
)
# Check that the account data is there.
self.assertIsNotNone(
self.get_success(
self._store.get_global_account_data_by_type_for_user(
self.user,
AccountDataTypes.DIRECT,
)
),
)
# Re-run the retroactive deletion update
self._rerun_retroactive_account_data_deletion_update()
# Check that the account data was cleared.
self.assertIsNone(
self.get_success(
self._store.get_global_account_data_by_type_for_user(
self.user,
AccountDataTypes.DIRECT,
)
),
)
def test_account_data_preserved_by_background_update_if_not_deactivated(
self,
) -> None:
"""
Tests that the background update does not scrub account data for users that have
not been deactivated.
"""
# Add some account data
# (we do this after the deactivation so that the act of deactivating doesn't
# clear it out. This emulates a user that was deactivated before this was cleared
# upon deactivation.)
self.get_success(
self._store.add_account_data_for_user(
self.user,
AccountDataTypes.DIRECT,
{"@someone:remote": ["!somewhere:remote"]},
)
)
# Check that the account data is there.
self.assertIsNotNone(
self.get_success(
self._store.get_global_account_data_by_type_for_user(
self.user,
AccountDataTypes.DIRECT,
)
),
)
# Re-run the retroactive deletion update
self._rerun_retroactive_account_data_deletion_update()
# Check that the account data was NOT cleared.
self.assertIsNotNone(
self.get_success(
self._store.get_global_account_data_by_type_for_user(
self.user,
AccountDataTypes.DIRECT,
)
),
)