From 5a7d1ecffcab7a94caf70471a2eec56eb868573c Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 19 Jan 2016 16:01:05 +0000 Subject: [PATCH 1/3] Add regex cache. Only caculate push actions for users that have sent read receipts, and are on that server --- synapse/handlers/_base.py | 2 +- synapse/handlers/federation.py | 2 +- synapse/push/action_generator.py | 7 ++++--- synapse/push/bulk_push_rule_evaluator.py | 15 ++++++++++----- synapse/push/push_rule_evaluator.py | 20 +++++++++++++++++--- synapse/server.py | 4 ++++ synapse/storage/receipts.py | 14 +++++++++++++- 7 files changed, 50 insertions(+), 14 deletions(-) diff --git a/synapse/handlers/_base.py b/synapse/handlers/_base.py index 2d1167296a..5c7617de44 100644 --- a/synapse/handlers/_base.py +++ b/synapse/handlers/_base.py @@ -266,7 +266,7 @@ class BaseHandler(object): event, context=context ) - action_generator = ActionGenerator(self.store) + action_generator = ActionGenerator(self.hs) yield action_generator.handle_push_actions_for_event( event, self ) diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 4b94940e99..6c19d6ae8c 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -245,7 +245,7 @@ class FederationHandler(BaseHandler): yield user_joined_room(self.distributor, user, event.room_id) if not backfilled and not event.internal_metadata.is_outlier(): - action_generator = ActionGenerator(self.store) + action_generator = ActionGenerator(self.hs) yield action_generator.handle_push_actions_for_event( event, self ) diff --git a/synapse/push/action_generator.py b/synapse/push/action_generator.py index 4cf94f6c61..1d2e558f9a 100644 --- a/synapse/push/action_generator.py +++ b/synapse/push/action_generator.py @@ -25,8 +25,9 @@ logger = logging.getLogger(__name__) class ActionGenerator: - def __init__(self, store): - self.store = store + def __init__(self, hs): + self.hs = hs + self.store = hs.get_datastore() # really we want to get all user ids and all profile tags too, # since we want the actions for each profile tag for every user and # also actions for a client with no profile tag for each user. @@ -42,7 +43,7 @@ class ActionGenerator: ) bulk_evaluator = yield bulk_push_rule_evaluator.evaluator_for_room_id( - event.room_id, self.store + event.room_id, self.hs, self.store ) actions_by_user = yield bulk_evaluator.action_for_event_by_user(event, handler) diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py index efd686fa6e..1000ae6301 100644 --- a/synapse/push/bulk_push_rule_evaluator.py +++ b/synapse/push/bulk_push_rule_evaluator.py @@ -36,6 +36,7 @@ def decode_rule_json(rule): @defer.inlineCallbacks def _get_rules(room_id, user_ids, store): rules_by_user = yield store.bulk_get_push_rules(user_ids) + rules_by_user = { uid: baserules.list_with_base_rules([ decode_rule_json(rule_list) @@ -47,12 +48,16 @@ def _get_rules(room_id, user_ids, store): @defer.inlineCallbacks -def evaluator_for_room_id(room_id, store): - users = yield store.get_users_in_room(room_id) - rules_by_user = yield _get_rules(room_id, users, store) +def evaluator_for_room_id(room_id, hs, store): + results = yield store.get_receipts_for_room(room_id, "m.read") + user_ids = [ + row["user_id"] for row in results + if hs.is_mine_id(row["user_id"]) + ] + rules_by_user = yield _get_rules(room_id, user_ids, store) defer.returnValue(BulkPushRuleEvaluator( - room_id, rules_by_user, users, store + room_id, rules_by_user, user_ids, store )) @@ -129,7 +134,7 @@ def _condition_checker(evaluator, conditions, uid, display_name, cache): res = evaluator.matches(cond, uid, display_name, None) if _id: - cache[_id] = res + cache[_id] = bool(res) if not res: return False diff --git a/synapse/push/push_rule_evaluator.py b/synapse/push/push_rule_evaluator.py index 4654994d2d..753b6469e2 100644 --- a/synapse/push/push_rule_evaluator.py +++ b/synapse/push/push_rule_evaluator.py @@ -22,6 +22,7 @@ import simplejson as json import re from synapse.types import UserID +from synapse.util.caches.lrucache import LruCache logger = logging.getLogger(__name__) @@ -277,18 +278,18 @@ def _glob_matches(glob, value, word_boundary=False): ) if word_boundary: r = r"\b%s\b" % (r,) - r = re.compile(r, flags=re.IGNORECASE) + r = _compile_regex(r) return r.search(value) else: r = r + "$" - r = re.compile(r, flags=re.IGNORECASE) + r = _compile_regex(r) return r.match(value) elif word_boundary: r = re.escape(glob) r = r"\b%s\b" % (r,) - r = re.compile(r, flags=re.IGNORECASE) + r = _compile_regex(r) return r.search(value) else: @@ -306,3 +307,16 @@ def _flatten_dict(d, prefix=[], result={}): _flatten_dict(value, prefix=(prefix+[key]), result=result) return result + + +regex_cache = LruCache(100000) + + +def _compile_regex(regex_str): + r = regex_cache.get(regex_str, None) + if r: + return r + + r = re.compile(regex_str, flags=re.IGNORECASE) + regex_cache[regex_str] = r + return r diff --git a/synapse/server.py b/synapse/server.py index ffd4f936d0..63f9059837 100644 --- a/synapse/server.py +++ b/synapse/server.py @@ -96,6 +96,7 @@ class BaseHomeServer(object): hostname : The hostname for the server. """ self.hostname = hostname + self.hostname_with_colon = ":" + hostname self._building = {} # Other kwargs are explicit dependencies @@ -139,6 +140,9 @@ class BaseHomeServer(object): def is_mine(self, domain_specific_string): return domain_specific_string.domain == self.hostname + def is_mine_id(self, string): + return string.endswith(self.hostname_with_colon) + # Build magic accessors for every dependency for depname in BaseHomeServer.DEPENDENCIES: BaseHomeServer._make_dependency_method(depname) diff --git a/synapse/storage/receipts.py b/synapse/storage/receipts.py index 21cf88b3da..c80e576620 100644 --- a/synapse/storage/receipts.py +++ b/synapse/storage/receipts.py @@ -14,7 +14,7 @@ # limitations under the License. from ._base import SQLBaseStore -from synapse.util.caches.descriptors import cachedInlineCallbacks, cachedList +from synapse.util.caches.descriptors import cachedInlineCallbacks, cachedList, cached from synapse.util.caches import cache_counter, caches_by_name from twisted.internet import defer @@ -33,6 +33,18 @@ class ReceiptsStore(SQLBaseStore): self._receipts_stream_cache = _RoomStreamChangeCache() + @cached(num_args=2) + def get_receipts_for_room(self, room_id, receipt_type): + return self._simple_select_list( + table="receipts_linearized", + keyvalues={ + "room_id": room_id, + "receipt_type": receipt_type, + }, + retcols=("user_id", "event_id"), + desc="get_receipts_for_room", + ) + @defer.inlineCallbacks def get_linearized_receipts_for_rooms(self, room_ids, to_key, from_key=None): """Get receipts for multiple rooms for sending to clients. From fb5d8e58ff280c9fc24123adca3254e46ac63097 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 19 Jan 2016 16:07:07 +0000 Subject: [PATCH 2/3] Change regex cache size to 5000 --- synapse/push/push_rule_evaluator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/push/push_rule_evaluator.py b/synapse/push/push_rule_evaluator.py index 753b6469e2..dca018af95 100644 --- a/synapse/push/push_rule_evaluator.py +++ b/synapse/push/push_rule_evaluator.py @@ -309,7 +309,7 @@ def _flatten_dict(d, prefix=[], result={}): return result -regex_cache = LruCache(100000) +regex_cache = LruCache(5000) def _compile_regex(regex_str): From 2818a000aa5ca7968f196898908f31a732387791 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 19 Jan 2016 16:11:39 +0000 Subject: [PATCH 3/3] Use split rather than endswith --- synapse/server.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/synapse/server.py b/synapse/server.py index 63f9059837..4a5796b982 100644 --- a/synapse/server.py +++ b/synapse/server.py @@ -96,7 +96,6 @@ class BaseHomeServer(object): hostname : The hostname for the server. """ self.hostname = hostname - self.hostname_with_colon = ":" + hostname self._building = {} # Other kwargs are explicit dependencies @@ -141,7 +140,7 @@ class BaseHomeServer(object): return domain_specific_string.domain == self.hostname def is_mine_id(self, string): - return string.endswith(self.hostname_with_colon) + return string.split(":", 1)[1] == self.hostname # Build magic accessors for every dependency for depname in BaseHomeServer.DEPENDENCIES: