Add actual guranteed order for UNION

We use `union all` because we don't need any of the deduplication logic
(`union` is really a union + distinct). `UNION ALL`` does preserve the
ordering of the operand queries but there is no actual gurantee that it
has this behavior in all scenarios so we need the extra `ORDER BY` at the
bottom.

See https://dba.stackexchange.com/questions/316818/are-results-from-union-all-clauses-always-appended-in-order/316835#316835
This commit is contained in:
Eric Eastwood 2024-06-11 20:29:40 -05:00
parent 2a82ac0cbe
commit 2e1d142892
2 changed files with 9 additions and 1 deletions

View file

@ -922,6 +922,13 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore):
min_stream = end_token.stream
max_stream = end_token.get_max_stream_pos()
# We use `union all` because we don't need any of the deduplication logic
# (`union` is really a union + distinct). `UNION ALL`` does preserve the
# ordering of the operand queries but there is no actual gurantee that it
# has this behavior in all scenarios so we need the extra `ORDER BY` at the
# bottom.
#
# We're using the subquery syntax for SQLite compatibility.
sql = """
SELECT * FROM (
SELECT instance_name, stream_ordering, topological_ordering, event_id
@ -945,6 +952,7 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore):
ORDER BY stream_ordering DESC
LIMIT 1
) AS b
ORDER BY stream_ordering DESC
"""
txn.execute(
sql,

View file

@ -449,7 +449,7 @@ class GetLastEventInRoomBeforeStreamOrderingTestCase(HomeserverTestCase):
# Assemble a token that encompasses event1 -> event4 on worker1
end_token = RoomStreamToken(
stream=event_pos1.stream,
stream=event_pos2.stream,
instance_map=immutabledict({"worker1": event_pos4.stream}),
)