From 9fbe70a7dc3afabfdac176ba1f4be32dd44602aa Mon Sep 17 00:00:00 2001 From: Vincent Breitmoser Date: Sat, 6 Jan 2018 18:11:02 +0100 Subject: [PATCH 1/5] Use sortedcontainers instead of blist This commit drop-in replaces blist with SortedContainers. They are written in pure python so work with pypy, but perform as good as native implementations, at least in a couple benchmarks: http://www.grantjenks.com/docs/sortedcontainers/performance.html --- synapse/federation/send_queue.py | 14 +++++++------- synapse/python_dependencies.py | 2 +- synapse/util/caches/stream_change_cache.py | 4 ++-- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/synapse/federation/send_queue.py b/synapse/federation/send_queue.py index 93e5acebc1..945832283f 100644 --- a/synapse/federation/send_queue.py +++ b/synapse/federation/send_queue.py @@ -35,7 +35,7 @@ from synapse.storage.presence import UserPresenceState from synapse.util.metrics import Measure import synapse.metrics -from blist import sorteddict +from sortedcontainers import SortedDict from collections import namedtuple import logging @@ -56,19 +56,19 @@ class FederationRemoteSendQueue(object): self.is_mine_id = hs.is_mine_id self.presence_map = {} # Pending presence map user_id -> UserPresenceState - self.presence_changed = sorteddict() # Stream position -> user_id + self.presence_changed = SortedDict() # Stream position -> user_id self.keyed_edu = {} # (destination, key) -> EDU - self.keyed_edu_changed = sorteddict() # stream position -> (destination, key) + self.keyed_edu_changed = SortedDict() # stream position -> (destination, key) - self.edus = sorteddict() # stream position -> Edu + self.edus = SortedDict() # stream position -> Edu - self.failures = sorteddict() # stream position -> (destination, Failure) + self.failures = SortedDict() # stream position -> (destination, Failure) - self.device_messages = sorteddict() # stream position -> destination + self.device_messages = SortedDict() # stream position -> destination self.pos = 1 - self.pos_time = sorteddict() + self.pos_time = SortedDict() # EVERYTHING IS SAD. In particular, python only makes new scopes when # we make a new function, so we need to make a new function so the inner diff --git a/synapse/python_dependencies.py b/synapse/python_dependencies.py index 40eedb63cb..f9596bddaf 100644 --- a/synapse/python_dependencies.py +++ b/synapse/python_dependencies.py @@ -34,8 +34,8 @@ REQUIREMENTS = { "bcrypt": ["bcrypt>=3.1.0"], "pillow": ["PIL"], "pydenticon": ["pydenticon"], - "blist": ["blist"], "pysaml2>=3.0.0": ["saml2>=3.0.0"], + "sortedcontainers": ["sortedcontainers"], "pymacaroons-pynacl": ["pymacaroons"], "msgpack-python>=0.3.0": ["msgpack"], "phonenumbers>=8.2.0": ["phonenumbers"], diff --git a/synapse/util/caches/stream_change_cache.py b/synapse/util/caches/stream_change_cache.py index 941d873ab8..2ff46090a6 100644 --- a/synapse/util/caches/stream_change_cache.py +++ b/synapse/util/caches/stream_change_cache.py @@ -16,7 +16,7 @@ from synapse.util.caches import register_cache, CACHE_SIZE_FACTOR -from blist import sorteddict +from sortedcontainers import SortedDict import logging @@ -35,7 +35,7 @@ class StreamChangeCache(object): def __init__(self, name, current_stream_pos, max_size=10000, prefilled_cache={}): self._max_size = int(max_size * CACHE_SIZE_FACTOR) self._entity_to_key = {} - self._cache = sorteddict() + self._cache = SortedDict() self._earliest_known_stream_pos = current_stream_pos self.name = name self.metrics = register_cache(self.name, self._cache) From 89de9349815b79f55bfff53b7c8c6d43f8c30336 Mon Sep 17 00:00:00 2001 From: Vincent Breitmoser Date: Sat, 6 Jan 2018 18:13:56 +0100 Subject: [PATCH 2/5] Use psycopg2cffi module instead of psycopg2 if running on pypy The psycopg2 package isn't available for PyPy. This commit adds a check if the runtime is PyPy, and if it is uses psycopg2cffi module in favor of psycopg2. This is almost a drop-in replacement, except for one place where an additional cast to string is required. --- synapse/storage/_base.py | 2 +- synapse/storage/engines/__init__.py | 8 +++++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index 2fbebd4907..2262776ab2 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -376,7 +376,7 @@ class SQLBaseStore(object): Returns: A list of dicts where the key is the column header. """ - col_headers = list(intern(column[0]) for column in cursor.description) + col_headers = list(intern(str(column[0])) for column in cursor.description) results = list( dict(zip(col_headers, row)) for row in cursor ) diff --git a/synapse/storage/engines/__init__.py b/synapse/storage/engines/__init__.py index 338b495611..be7f6d6ac3 100644 --- a/synapse/storage/engines/__init__.py +++ b/synapse/storage/engines/__init__.py @@ -18,6 +18,7 @@ from .postgres import PostgresEngine from .sqlite3 import Sqlite3Engine import importlib +import platform SUPPORTED_MODULE = { @@ -31,7 +32,12 @@ def create_engine(database_config): engine_class = SUPPORTED_MODULE.get(name, None) if engine_class: - module = importlib.import_module(name) + needs_pypy_hack = (name == "psycopg2" and + platform.python_implementation() == "PyPy") + if needs_pypy_hack: + module = importlib.import_module("psycopg2cffi") + else: + module = importlib.import_module(name) return engine_class(module, database_config) raise RuntimeError( From d1e56cfcd11bcd509d8fa3954c00e06a84bddd87 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Tue, 10 Apr 2018 00:21:51 +0100 Subject: [PATCH 3/5] Fix pep8 error on psycopg2cffi hack --- synapse/storage/engines/__init__.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/synapse/storage/engines/__init__.py b/synapse/storage/engines/__init__.py index be7f6d6ac3..8c868ece75 100644 --- a/synapse/storage/engines/__init__.py +++ b/synapse/storage/engines/__init__.py @@ -32,12 +32,11 @@ def create_engine(database_config): engine_class = SUPPORTED_MODULE.get(name, None) if engine_class: - needs_pypy_hack = (name == "psycopg2" and - platform.python_implementation() == "PyPy") - if needs_pypy_hack: - module = importlib.import_module("psycopg2cffi") - else: - module = importlib.import_module(name) + # pypy requires psycopg2cffi rather than psycopg2 + if (name == "psycopg2" and + platform.python_implementation() == "PyPy"): + name = "psycopg2cffi" + module = importlib.import_module(name) return engine_class(module, database_config) raise RuntimeError( From f4284d943aee616f3100298634a113e51c8ab27e Mon Sep 17 00:00:00 2001 From: Vincent Breitmoser Date: Sat, 6 Jan 2018 18:14:51 +0100 Subject: [PATCH 4/5] In DomainSpecificString, override __repr__ in addition to __str__ For some reason, string interpolation on a DomainSpecificString object like "%r" % (domainSpecificStringObj) fails under PyPy, because the default __repr__ implementation wants to iterate over the object. I'm not sure why that happens, but overriding __repr__ instead of __str__ fixes this problem, and is arguably the more appropriate thing to do anyways. --- synapse/types.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/types.py b/synapse/types.py index 7cb24cecb2..cc7c182a78 100644 --- a/synapse/types.py +++ b/synapse/types.py @@ -169,7 +169,7 @@ class DomainSpecificString( except Exception: return False - __str__ = to_string + __repr__ = to_string class UserID(DomainSpecificString): From 6d7f0f8dd3685b7d2c58a43662eb793defc186ff Mon Sep 17 00:00:00 2001 From: Vincent Breitmoser Date: Mon, 8 Jan 2018 01:53:32 +0100 Subject: [PATCH 5/5] Don't disable GC when running on PyPy PyPy's incminimark GC can't be triggered manually. From what I observed there are no obvious issues with just letting it run normally. And unlike CPython, it actually returns unused RAM to the system. Signed-off-by: Vincent Breitmoser --- synapse/metrics/__init__.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/synapse/metrics/__init__.py b/synapse/metrics/__init__.py index 50d99d7a5c..2ed82b04a5 100644 --- a/synapse/metrics/__init__.py +++ b/synapse/metrics/__init__.py @@ -17,6 +17,7 @@ import logging import functools import time import gc +import platform from twisted.internet import reactor @@ -30,6 +31,7 @@ from .process_collector import register_process_collector logger = logging.getLogger(__name__) +running_on_pypy = platform.python_implementation() == 'PyPy' all_metrics = [] all_collectors = [] @@ -174,6 +176,9 @@ def runUntilCurrentTimer(func): tick_time.inc_by(end - start) pending_calls_metric.inc_by(num_pending) + if running_on_pypy: + return ret + # Check if we need to do a manual GC (since its been disabled), and do # one if necessary. threshold = gc.get_threshold() @@ -206,6 +211,7 @@ try: # We manually run the GC each reactor tick so that we can get some metrics # about time spent doing GC, - gc.disable() + if not running_on_pypy: + gc.disable() except AttributeError: pass