From 1fc66c7460b7e6c503dbeb6577fb0ba3cf7dfd83 Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <richard@matrix.org>
Date: Tue, 14 Nov 2017 09:23:56 +0000
Subject: [PATCH 1/2] Add a load of logging to the room_list handler

So we can see what it gets up to.
---
 synapse/handlers/room_list.py | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/synapse/handlers/room_list.py b/synapse/handlers/room_list.py
index 2cf34e51cb..928ee38aea 100644
--- a/synapse/handlers/room_list.py
+++ b/synapse/handlers/room_list.py
@@ -154,6 +154,8 @@ class RoomListHandler(BaseHandler):
             # We want larger rooms to be first, hence negating num_joined_users
             rooms_to_order_value[room_id] = (-num_joined_users, room_id)
 
+        logger.info("Getting ordering for %i rooms since %s",
+                    len(room_ids), stream_token)
         yield concurrently_execute(get_order_for_room, room_ids, 10)
 
         sorted_entries = sorted(rooms_to_order_value.items(), key=lambda e: e[1])
@@ -181,17 +183,25 @@ class RoomListHandler(BaseHandler):
                 rooms_to_scan = rooms_to_scan[:since_token.current_limit]
                 rooms_to_scan.reverse()
 
+        logger.info("After sorting and filtering, %i rooms remain",
+                    len(rooms_to_scan))
+
         # Actually generate the entries. _append_room_entry_to_chunk will append to
         # chunk but will stop if len(chunk) > limit
         chunk = []
         if limit and not search_filter:
             step = limit + 1
             for i in xrange(0, len(rooms_to_scan), step):
+                logger.info("Processing %i rooms for result", step)
                 # We iterate here because the vast majority of cases we'll stop
                 # at first iteration, but occaisonally _append_room_entry_to_chunk
                 # won't append to the chunk and so we need to loop again.
                 # We don't want to scan over the entire range either as that
                 # would potentially waste a lot of work.
+                #
+                # XXX why would that happen? _append_room_entry_to_chunk will
+                # only exclude rooms which don't match search_filter, but we
+                # know search_filter is None here.
                 yield concurrently_execute(
                     lambda r: self._append_room_entry_to_chunk(
                         r, rooms_to_num_joined[r],
@@ -199,9 +209,11 @@ class RoomListHandler(BaseHandler):
                     ),
                     rooms_to_scan[i:i + step], 10
                 )
+                logger.info("Now %i rooms in result", len(chunk))
                 if len(chunk) >= limit + 1:
                     break
         else:
+            logger.info("Processing %i rooms for result", len(rooms_to_scan))
             yield concurrently_execute(
                 lambda r: self._append_room_entry_to_chunk(
                     r, rooms_to_num_joined[r],
@@ -209,6 +221,7 @@ class RoomListHandler(BaseHandler):
                 ),
                 rooms_to_scan, 5
             )
+            logger.info("Now %i rooms in result", len(chunk))
 
         chunk.sort(key=lambda e: (-e["num_joined_members"], e["room_id"]))
 

From 44a1bfd6a6a1cda272677c9ea8704957bc940509 Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <richard@matrix.org>
Date: Tue, 14 Nov 2017 09:39:54 +0000
Subject: [PATCH 2/2] Reshuffle room list request code

I'm not entirely sure if this will actually help anything, but it simplifies
the code and might give further clues about why room list search requests are
blowing out the get_current_state_ids caches.
---
 synapse/handlers/room_list.py | 51 +++++++++++++++++------------------
 1 file changed, 24 insertions(+), 27 deletions(-)

diff --git a/synapse/handlers/room_list.py b/synapse/handlers/room_list.py
index 928ee38aea..bb40075387 100644
--- a/synapse/handlers/room_list.py
+++ b/synapse/handlers/room_list.py
@@ -186,42 +186,39 @@ class RoomListHandler(BaseHandler):
         logger.info("After sorting and filtering, %i rooms remain",
                     len(rooms_to_scan))
 
-        # Actually generate the entries. _append_room_entry_to_chunk will append to
-        # chunk but will stop if len(chunk) > limit
-        chunk = []
-        if limit and not search_filter:
+        # _append_room_entry_to_chunk will append to chunk but will stop if
+        # len(chunk) > limit
+        #
+        # Normally we will generate enough results on the first iteration here,
+        #  but if there is a search filter, _append_room_entry_to_chunk may
+        # filter some results out, in which case we loop again.
+        #
+        # We don't want to scan over the entire range either as that
+        # would potentially waste a lot of work.
+        #
+        # XXX if there is no limit, we may end up DoSing the server with
+        # calls to get_current_state_ids for every single room on the
+        # server. Surely we should cap this somehow?
+        #
+        if limit:
             step = limit + 1
-            for i in xrange(0, len(rooms_to_scan), step):
-                logger.info("Processing %i rooms for result", step)
-                # We iterate here because the vast majority of cases we'll stop
-                # at first iteration, but occaisonally _append_room_entry_to_chunk
-                # won't append to the chunk and so we need to loop again.
-                # We don't want to scan over the entire range either as that
-                # would potentially waste a lot of work.
-                #
-                # XXX why would that happen? _append_room_entry_to_chunk will
-                # only exclude rooms which don't match search_filter, but we
-                # know search_filter is None here.
-                yield concurrently_execute(
-                    lambda r: self._append_room_entry_to_chunk(
-                        r, rooms_to_num_joined[r],
-                        chunk, limit, search_filter
-                    ),
-                    rooms_to_scan[i:i + step], 10
-                )
-                logger.info("Now %i rooms in result", len(chunk))
-                if len(chunk) >= limit + 1:
-                    break
         else:
-            logger.info("Processing %i rooms for result", len(rooms_to_scan))
+            step = len(rooms_to_scan)
+
+        chunk = []
+        for i in xrange(0, len(rooms_to_scan), step):
+            batch = rooms_to_scan[i:i + step]
+            logger.info("Processing %i rooms for result", len(batch))
             yield concurrently_execute(
                 lambda r: self._append_room_entry_to_chunk(
                     r, rooms_to_num_joined[r],
                     chunk, limit, search_filter
                 ),
-                rooms_to_scan, 5
+                batch, 5,
             )
             logger.info("Now %i rooms in result", len(chunk))
+            if len(chunk) >= limit + 1:
+                break
 
         chunk.sort(key=lambda e: (-e["num_joined_members"], e["room_id"]))