diff --git a/CHANGES.md b/CHANGES.md index 7713328f12..7188f94445 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,11 +1,24 @@ +Synapse 1.32.1 (2021-04-21) +=========================== + +This release fixes [a regression](https://github.com/matrix-org/synapse/issues/9853) +in Synapse 1.32.0 that caused connected Prometheus instances to become unstable. If you +ran Synapse 1.32.0 with Prometheus metrics, first upgrade to Synapse 1.32.1 and follow +[these instructions](https://github.com/matrix-org/synapse/pull/9854#issuecomment-823472183) +to clean up any excess writeahead logs. + +Bugfixes +-------- + +- Fix a regression in Synapse 1.32.0 which caused Synapse to report large numbers of Prometheus time series, potentially overwhelming Prometheus instances. ([\#9854](https://github.com/matrix-org/synapse/issues/9854)) + + Synapse 1.32.0 (2021-04-20) =========================== -**Note:** This release introduces [a regression](https://githubcom/matrix-org/synapse/issues/9853) +**Note:** This release introduces [a regression](https://github.com/matrix-org/synapse/issues/9853) that can overwhelm connected Prometheus instances. This issue was not present in -Synapse v1.32.0rc1. It is recommended not to update to this release. If you have -upgraded to v1.32.0 already, please downgrade to v1.31.0. This issue will be -resolved in a subsequent release version shortly. +1.32.0rc1, and is fixed in 1.32.1. See the changelog for 1.32.1 above for more information. **Note:** This release requires Python 3.6+ and Postgres 9.6+ or SQLite 3.22+. diff --git a/UPGRADE.rst b/UPGRADE.rst index e3e0c1e40d..215211b6b5 100644 --- a/UPGRADE.rst +++ b/UPGRADE.rst @@ -114,11 +114,14 @@ Upgrading to v1.32.0 Regression causing connected Prometheus instances to become overwhelmed ----------------------------------------------------------------------- -This release introduces `a regression `_ -that can overwhelm connected Prometheus instances. This issue was not present in -Synapse v1.32.0rc1. It is recommended not to update to this release. If you have -upgraded to v1.32.0 already, please downgrade to v1.31.0. This issue will be -resolved in a subsequent release version shortly. +This release introduces `a regression `_ +that can overwhelm connected Prometheus instances. This issue is not present in +Synapse v1.32.0rc1, and is fixed in Synapse v1.32.1. + +If you have been affected, please first upgrade to a more recent Synapse version. +You then may need to remove excess writeahead logs in order for Prometheus to recover. +Instructions for doing so are provided +`here `_. Dropping support for old Python, Postgres and SQLite versions ------------------------------------------------------------- diff --git a/debian/changelog b/debian/changelog index 83be4497ec..b8cf2cac58 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,9 @@ +matrix-synapse-py3 (1.32.1) stable; urgency=medium + + * New synapse release 1.32.1. + + -- Synapse Packaging team Wed, 21 Apr 2021 14:00:55 +0100 + matrix-synapse-py3 (1.32.0) stable; urgency=medium [ Dan Callahan ] diff --git a/synapse/__init__.py b/synapse/__init__.py index 7e97f5d995..5bfae24cbd 100644 --- a/synapse/__init__.py +++ b/synapse/__init__.py @@ -47,7 +47,7 @@ try: except ImportError: pass -__version__ = "1.32.0" +__version__ = "1.32.1" if bool(os.environ.get("SYNAPSE_TEST_PATCH_LOG_CONTEXTS", False)): # We import here so that we don't have to install a bunch of deps when diff --git a/synapse/metrics/background_process_metrics.py b/synapse/metrics/background_process_metrics.py index c56f0dd124..714caf84c3 100644 --- a/synapse/metrics/background_process_metrics.py +++ b/synapse/metrics/background_process_metrics.py @@ -15,7 +15,7 @@ import logging import threading from functools import wraps -from typing import TYPE_CHECKING, Dict, Optional, Set +from typing import TYPE_CHECKING, Dict, Optional, Set, Union from prometheus_client.core import REGISTRY, Counter, Gauge @@ -198,7 +198,7 @@ def run_as_background_process(desc: str, func, *args, bg_start_span=True, **kwar _background_process_start_count.labels(desc).inc() _background_process_in_flight_count.labels(desc).inc() - with BackgroundProcessLoggingContext("%s-%s" % (desc, count)) as context: + with BackgroundProcessLoggingContext(desc, count) as context: try: ctx = noop_context_manager() if bg_start_span: @@ -243,8 +243,20 @@ class BackgroundProcessLoggingContext(LoggingContext): __slots__ = ["_proc"] - def __init__(self, name: str): - super().__init__(name) + def __init__(self, name: str, instance_id: Optional[Union[int, str]] = None): + """ + + Args: + name: The name of the background process. Each distinct `name` gets a + separate prometheus time series. + + instance_id: an identifer to add to `name` to distinguish this instance of + the named background process in the logs. If this is `None`, one is + made up based on id(self). + """ + if instance_id is None: + instance_id = id(self) + super().__init__("%s-%s" % (name, instance_id)) self._proc = _BackgroundProcess(name, self) def start(self, rusage: "Optional[resource._RUsage]"): diff --git a/synapse/replication/tcp/protocol.py b/synapse/replication/tcp/protocol.py index 2df028f315..6860576e78 100644 --- a/synapse/replication/tcp/protocol.py +++ b/synapse/replication/tcp/protocol.py @@ -184,7 +184,7 @@ class BaseReplicationStreamProtocol(LineOnlyReceiver): # a logcontext which we use for processing incoming commands. We declare it as a # background process so that the CPU stats get reported to prometheus. self._logging_context = BackgroundProcessLoggingContext( - "replication-conn-%s" % (self.conn_id,) + "replication-conn", self.conn_id ) def connectionMade(self):