From cc4944c83e25c3fd622d0d553a6d3fa585f18a47 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Mon, 26 Sep 2016 01:18:41 +0100 Subject: [PATCH] speed up state deduplication migrations on sqlite Remove the self-inner-join and instead abuse sqlite's quirk that it allows us to mix aggregate & non-aggregate columns in a grouped select to return the event_ids of the events in a state group: see http://marc.info/?l=sqlite-users&m=141460824410980&w=2. This speeds up these queries by around 25x, drastically improving performance when sqlite is upgraded to 0.18. --- synapse/storage/state.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 7eb342674c..e1b9ceb776 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -389,14 +389,16 @@ class StateStore(SQLBaseStore): if next_group: group_tree.append(next_group) + # sqlite lets us mix aggregate & non-aggregate columns in + # a grouped select - for details, see: + # http://marc.info/?l=sqlite-users&m=141460824410980&w=2 + # As a result, we can avoid a self-join to populate + # event_id, which empirically speeds things up by 25x. sql = (""" - SELECT type, state_key, event_id FROM state_groups_state - INNER JOIN ( - SELECT type, state_key, max(state_group) as state_group - FROM state_groups_state - WHERE state_group IN (%s) %s - GROUP BY type, state_key - ) USING (type, state_key, state_group); + SELECT type, state_key, event_id, max(state_group) as state_group + FROM state_groups_state + WHERE state_group IN (%s) %s + GROUP BY type, state_key; """) % (",".join("?" for _ in group_tree), where_clause,) args = list(group_tree)