From d38d242411b8910dfacde1e61fd3a0ec5cbcaa66 Mon Sep 17 00:00:00 2001 From: David Robertson Date: Wed, 11 May 2022 14:43:22 +0100 Subject: [PATCH] Reload cache factors from disk on SIGHUP (#12673) --- changelog.d/12673.feature | 1 + docs/sample_config.yaml | 6 ++ .../configuration/config_documentation.md | 17 ++++ synapse/app/_base.py | 44 ++++++++++ synapse/app/homeserver.py | 36 +-------- synapse/config/_base.py | 81 +++++++++++++++++-- synapse/config/_base.pyi | 15 +++- synapse/config/cache.py | 49 ++++++----- synapse/http/client.py | 2 +- tests/config/test_cache.py | 8 ++ tests/server.py | 1 + 11 files changed, 199 insertions(+), 61 deletions(-) create mode 100644 changelog.d/12673.feature diff --git a/changelog.d/12673.feature b/changelog.d/12673.feature new file mode 100644 index 0000000000..f2bddd6e1c --- /dev/null +++ b/changelog.d/12673.feature @@ -0,0 +1 @@ +Synapse will now reload [cache config](https://matrix-org.github.io/synapse/latest/usage/configuration/config_documentation.html#caching) when it receives a [SIGHUP](https://en.wikipedia.org/wiki/SIGHUP) signal. diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml index a803b8261d..e7b57f5a0b 100644 --- a/docs/sample_config.yaml +++ b/docs/sample_config.yaml @@ -730,6 +730,12 @@ retention: # A cache 'factor' is a multiplier that can be applied to each of # Synapse's caches in order to increase or decrease the maximum # number of entries that can be stored. +# +# The configuration for cache factors (caches.global_factor and +# caches.per_cache_factors) can be reloaded while the application is running, +# by sending a SIGHUP signal to the Synapse process. Changes to other parts of +# the caching config will NOT be applied after a SIGHUP is received; a restart +# is necessary. # The number of events to cache in memory. Not affected by # caches.global_factor. diff --git a/docs/usage/configuration/config_documentation.md b/docs/usage/configuration/config_documentation.md index 21dad0ac41..f292b94fb0 100644 --- a/docs/usage/configuration/config_documentation.md +++ b/docs/usage/configuration/config_documentation.md @@ -1130,6 +1130,23 @@ caches: expire_caches: false sync_response_cache_duration: 2m ``` + +### Reloading cache factors + +The cache factors (i.e. `caches.global_factor` and `caches.per_cache_factors`) may be reloaded at any time by sending a +[`SIGHUP`](https://en.wikipedia.org/wiki/SIGHUP) signal to Synapse using e.g. + +```commandline +kill -HUP [PID_OF_SYNAPSE_PROCESS] +``` + +If you are running multiple workers, you must individually update the worker +config file and send this signal to each worker process. + +If you're using the [example systemd service](https://github.com/matrix-org/synapse/blob/develop/contrib/systemd/matrix-synapse.service) +file in Synapse's `contrib` directory, you can send a `SIGHUP` signal by using +`systemctl reload matrix-synapse`. + --- ## Database ## Config options related to database settings. diff --git a/synapse/app/_base.py b/synapse/app/_base.py index 3623c1724d..a3446ac6e8 100644 --- a/synapse/app/_base.py +++ b/synapse/app/_base.py @@ -49,9 +49,12 @@ from twisted.logger import LoggingFile, LogLevel from twisted.protocols.tls import TLSMemoryBIOFactory from twisted.python.threadpool import ThreadPool +import synapse.util.caches from synapse.api.constants import MAX_PDU_SIZE from synapse.app import check_bind_error from synapse.app.phone_stats_home import start_phone_stats_home +from synapse.config import ConfigError +from synapse.config._base import format_config_error from synapse.config.homeserver import HomeServerConfig from synapse.config.server import ManholeConfig from synapse.crypto import context_factory @@ -432,6 +435,10 @@ async def start(hs: "HomeServer") -> None: signal.signal(signal.SIGHUP, run_sighup) register_sighup(refresh_certificate, hs) + register_sighup(reload_cache_config, hs.config) + + # Apply the cache config. + hs.config.caches.resize_all_caches() # Load the certificate from disk. refresh_certificate(hs) @@ -486,6 +493,43 @@ async def start(hs: "HomeServer") -> None: atexit.register(gc.freeze) +def reload_cache_config(config: HomeServerConfig) -> None: + """Reload cache config from disk and immediately apply it.resize caches accordingly. + + If the config is invalid, a `ConfigError` is logged and no changes are made. + + Otherwise, this: + - replaces the `caches` section on the given `config` object, + - resizes all caches according to the new cache factors, and + + Note that the following cache config keys are read, but not applied: + - event_cache_size: used to set a max_size and _original_max_size on + EventsWorkerStore._get_event_cache when it is created. We'd have to update + the _original_max_size (and maybe + - sync_response_cache_duration: would have to update the timeout_sec attribute on + HomeServer -> SyncHandler -> ResponseCache. + - track_memory_usage. This affects synapse.util.caches.TRACK_MEMORY_USAGE which + influences Synapse's self-reported metrics. + + Also, the HTTPConnectionPool in SimpleHTTPClient sets its maxPersistentPerHost + parameter based on the global_factor. This won't be applied on a config reload. + """ + try: + previous_cache_config = config.reload_config_section("caches") + except ConfigError as e: + logger.warning("Failed to reload cache config") + for f in format_config_error(e): + logger.warning(f) + else: + logger.debug( + "New cache config. Was:\n %s\nNow:\n", + previous_cache_config.__dict__, + config.caches.__dict__, + ) + synapse.util.caches.TRACK_MEMORY_USAGE = config.caches.track_memory_usage + config.caches.resize_all_caches() + + def setup_sentry(hs: "HomeServer") -> None: """Enable sentry integration, if enabled in configuration""" diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index 0f75e7b9d4..4c6c0658ab 100644 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -16,7 +16,7 @@ import logging import os import sys -from typing import Dict, Iterable, Iterator, List +from typing import Dict, Iterable, List from matrix_common.versionstring import get_distribution_version_string @@ -45,7 +45,7 @@ from synapse.app._base import ( redirect_stdio_to_logs, register_start, ) -from synapse.config._base import ConfigError +from synapse.config._base import ConfigError, format_config_error from synapse.config.emailconfig import ThreepidBehaviour from synapse.config.homeserver import HomeServerConfig from synapse.config.server import ListenerConfig @@ -399,38 +399,6 @@ def setup(config_options: List[str]) -> SynapseHomeServer: return hs -def format_config_error(e: ConfigError) -> Iterator[str]: - """ - Formats a config error neatly - - The idea is to format the immediate error, plus the "causes" of those errors, - hopefully in a way that makes sense to the user. For example: - - Error in configuration at 'oidc_config.user_mapping_provider.config.display_name_template': - Failed to parse config for module 'JinjaOidcMappingProvider': - invalid jinja template: - unexpected end of template, expected 'end of print statement'. - - Args: - e: the error to be formatted - - Returns: An iterator which yields string fragments to be formatted - """ - yield "Error in configuration" - - if e.path: - yield " at '%s'" % (".".join(e.path),) - - yield ":\n %s" % (e.msg,) - - parent_e = e.__cause__ - indent = 1 - while parent_e: - indent += 1 - yield ":\n%s%s" % (" " * indent, str(parent_e)) - parent_e = parent_e.__cause__ - - def run(hs: HomeServer) -> None: _base.start_reactor( "synapse-homeserver", diff --git a/synapse/config/_base.py b/synapse/config/_base.py index 179aa7ff88..42364fc133 100644 --- a/synapse/config/_base.py +++ b/synapse/config/_base.py @@ -16,14 +16,18 @@ import argparse import errno +import logging import os from collections import OrderedDict from hashlib import sha256 from textwrap import dedent from typing import ( Any, + ClassVar, + Collection, Dict, Iterable, + Iterator, List, MutableMapping, Optional, @@ -40,6 +44,8 @@ import yaml from synapse.util.templates import _create_mxc_to_http_filter, _format_ts_filter +logger = logging.getLogger(__name__) + class ConfigError(Exception): """Represents a problem parsing the configuration @@ -55,6 +61,38 @@ class ConfigError(Exception): self.path = path +def format_config_error(e: ConfigError) -> Iterator[str]: + """ + Formats a config error neatly + + The idea is to format the immediate error, plus the "causes" of those errors, + hopefully in a way that makes sense to the user. For example: + + Error in configuration at 'oidc_config.user_mapping_provider.config.display_name_template': + Failed to parse config for module 'JinjaOidcMappingProvider': + invalid jinja template: + unexpected end of template, expected 'end of print statement'. + + Args: + e: the error to be formatted + + Returns: An iterator which yields string fragments to be formatted + """ + yield "Error in configuration" + + if e.path: + yield " at '%s'" % (".".join(e.path),) + + yield ":\n %s" % (e.msg,) + + parent_e = e.__cause__ + indent = 1 + while parent_e: + indent += 1 + yield ":\n%s%s" % (" " * indent, str(parent_e)) + parent_e = parent_e.__cause__ + + # We split these messages out to allow packages to override with package # specific instructions. MISSING_REPORT_STATS_CONFIG_INSTRUCTIONS = """\ @@ -119,7 +157,7 @@ class Config: defined in subclasses. """ - section: str + section: ClassVar[str] def __init__(self, root_config: "RootConfig" = None): self.root = root_config @@ -309,9 +347,12 @@ class RootConfig: class, lower-cased and with "Config" removed. """ - config_classes = [] + config_classes: List[Type[Config]] = [] + + def __init__(self, config_files: Collection[str] = ()): + # Capture absolute paths here, so we can reload config after we daemonize. + self.config_files = [os.path.abspath(path) for path in config_files] - def __init__(self): for config_class in self.config_classes: if config_class.section is None: raise ValueError("%r requires a section name" % (config_class,)) @@ -512,12 +553,10 @@ class RootConfig: object from parser.parse_args(..)` """ - obj = cls() - config_args = parser.parse_args(argv) config_files = find_config_files(search_paths=config_args.config_path) - + obj = cls(config_files) if not config_files: parser.error("Must supply a config file.") @@ -627,7 +666,7 @@ class RootConfig: generate_missing_configs = config_args.generate_missing_configs - obj = cls() + obj = cls(config_files) if config_args.generate_config: if config_args.report_stats is None: @@ -727,6 +766,34 @@ class RootConfig: ) -> None: self.invoke_all("generate_files", config_dict, config_dir_path) + def reload_config_section(self, section_name: str) -> Config: + """Reconstruct the given config section, leaving all others unchanged. + + This works in three steps: + + 1. Create a new instance of the relevant `Config` subclass. + 2. Call `read_config` on that instance to parse the new config. + 3. Replace the existing config instance with the new one. + + :raises ValueError: if the given `section` does not exist. + :raises ConfigError: for any other problems reloading config. + + :returns: the previous config object, which no longer has a reference to this + RootConfig. + """ + existing_config: Optional[Config] = getattr(self, section_name, None) + if existing_config is None: + raise ValueError(f"Unknown config section '{section_name}'") + logger.info("Reloading config section '%s'", section_name) + + new_config_data = read_config_files(self.config_files) + new_config = type(existing_config)(self) + new_config.read_config(new_config_data) + setattr(self, section_name, new_config) + + existing_config.root = None + return existing_config + def read_config_files(config_files: Iterable[str]) -> Dict[str, Any]: """Read the config files into a dict diff --git a/synapse/config/_base.pyi b/synapse/config/_base.pyi index bd092f956d..71d6655fda 100644 --- a/synapse/config/_base.pyi +++ b/synapse/config/_base.pyi @@ -1,15 +1,19 @@ import argparse from typing import ( Any, + Collection, Dict, Iterable, + Iterator, List, + Literal, MutableMapping, Optional, Tuple, Type, TypeVar, Union, + overload, ) import jinja2 @@ -64,6 +68,8 @@ class ConfigError(Exception): self.msg = msg self.path = path +def format_config_error(e: ConfigError) -> Iterator[str]: ... + MISSING_REPORT_STATS_CONFIG_INSTRUCTIONS: str MISSING_REPORT_STATS_SPIEL: str MISSING_SERVER_NAME: str @@ -117,7 +123,8 @@ class RootConfig: background_updates: background_updates.BackgroundUpdateConfig config_classes: List[Type["Config"]] = ... - def __init__(self) -> None: ... + config_files: List[str] + def __init__(self, config_files: Collection[str] = ...) -> None: ... def invoke_all( self, func_name: str, *args: Any, **kwargs: Any ) -> MutableMapping[str, Any]: ... @@ -157,6 +164,12 @@ class RootConfig: def generate_missing_files( self, config_dict: dict, config_dir_path: str ) -> None: ... + @overload + def reload_config_section( + self, section_name: Literal["caches"] + ) -> cache.CacheConfig: ... + @overload + def reload_config_section(self, section_name: str) -> Config: ... class Config: root: RootConfig diff --git a/synapse/config/cache.py b/synapse/config/cache.py index 94d852f413..58b2fe5519 100644 --- a/synapse/config/cache.py +++ b/synapse/config/cache.py @@ -69,11 +69,11 @@ def _canonicalise_cache_name(cache_name: str) -> str: def add_resizable_cache( cache_name: str, cache_resize_callback: Callable[[float], None] ) -> None: - """Register a cache that's size can dynamically change + """Register a cache whose size can dynamically change Args: cache_name: A reference to the cache - cache_resize_callback: A callback function that will be ran whenever + cache_resize_callback: A callback function that will run whenever the cache needs to be resized """ # Some caches have '*' in them which we strip out. @@ -96,6 +96,13 @@ class CacheConfig(Config): section = "caches" _environ = os.environ + event_cache_size: int + cache_factors: Dict[str, float] + global_factor: float + track_memory_usage: bool + expiry_time_msec: Optional[int] + sync_response_cache_duration: int + @staticmethod def reset() -> None: """Resets the caches to their defaults. Used for tests.""" @@ -115,6 +122,12 @@ class CacheConfig(Config): # A cache 'factor' is a multiplier that can be applied to each of # Synapse's caches in order to increase or decrease the maximum # number of entries that can be stored. + # + # The configuration for cache factors (caches.global_factor and + # caches.per_cache_factors) can be reloaded while the application is running, + # by sending a SIGHUP signal to the Synapse process. Changes to other parts of + # the caching config will NOT be applied after a SIGHUP is received; a restart + # is necessary. # The number of events to cache in memory. Not affected by # caches.global_factor. @@ -174,21 +187,21 @@ class CacheConfig(Config): """ def read_config(self, config: JsonDict, **kwargs: Any) -> None: + """Populate this config object with values from `config`. + + This method does NOT resize existing or future caches: use `resize_all_caches`. + We use two separate methods so that we can reject bad config before applying it. + """ self.event_cache_size = self.parse_size( config.get("event_cache_size", _DEFAULT_EVENT_CACHE_SIZE) ) - self.cache_factors: Dict[str, float] = {} + self.cache_factors = {} cache_config = config.get("caches") or {} - self.global_factor = cache_config.get( - "global_factor", properties.default_factor_size - ) + self.global_factor = cache_config.get("global_factor", _DEFAULT_FACTOR_SIZE) if not isinstance(self.global_factor, (int, float)): raise ConfigError("caches.global_factor must be a number.") - # Set the global one so that it's reflected in new caches - properties.default_factor_size = self.global_factor - # Load cache factors from the config individual_factors = cache_config.get("per_cache_factors") or {} if not isinstance(individual_factors, dict): @@ -230,7 +243,7 @@ class CacheConfig(Config): cache_entry_ttl = cache_config.get("cache_entry_ttl", "30m") if expire_caches: - self.expiry_time_msec: Optional[int] = self.parse_duration(cache_entry_ttl) + self.expiry_time_msec = self.parse_duration(cache_entry_ttl) else: self.expiry_time_msec = None @@ -254,19 +267,19 @@ class CacheConfig(Config): cache_config.get("sync_response_cache_duration", 0) ) - # Resize all caches (if necessary) with the new factors we've loaded - self.resize_all_caches() - - # Store this function so that it can be called from other classes without - # needing an instance of Config - properties.resize_all_caches_func = self.resize_all_caches - def resize_all_caches(self) -> None: - """Ensure all cache sizes are up to date + """Ensure all cache sizes are up-to-date. For each cache, run the mapped callback function with either a specific cache factor or the default, global one. """ + # Set the global factor size, so that new caches are appropriately sized. + properties.default_factor_size = self.global_factor + + # Store this function so that it can be called from other classes without + # needing an instance of CacheConfig + properties.resize_all_caches_func = self.resize_all_caches + # block other threads from modifying _CACHES while we iterate it. with _CACHES_LOCK: for cache_name, callback in _CACHES.items(): diff --git a/synapse/http/client.py b/synapse/http/client.py index 8310fb466a..b2c9a7c670 100644 --- a/synapse/http/client.py +++ b/synapse/http/client.py @@ -348,7 +348,7 @@ class SimpleHttpClient: # XXX: The justification for using the cache factor here is that larger instances # will need both more cache and more connections. # Still, this should probably be a separate dial - pool.maxPersistentPerHost = max((100 * hs.config.caches.global_factor, 5)) + pool.maxPersistentPerHost = max(int(100 * hs.config.caches.global_factor), 5) pool.cachedConnectionTimeout = 2 * 60 self.agent: IAgent = ProxyAgent( diff --git a/tests/config/test_cache.py b/tests/config/test_cache.py index 4bb82e810e..d2b3c299e3 100644 --- a/tests/config/test_cache.py +++ b/tests/config/test_cache.py @@ -38,6 +38,7 @@ class CacheConfigTests(TestCase): "SYNAPSE_NOT_CACHE": "BLAH", } self.config.read_config(config, config_dir_path="", data_dir_path="") + self.config.resize_all_caches() self.assertEqual(dict(self.config.cache_factors), {"something_or_other": 2.0}) @@ -52,6 +53,7 @@ class CacheConfigTests(TestCase): "SYNAPSE_CACHE_FACTOR_FOO": 1, } self.config.read_config(config, config_dir_path="", data_dir_path="") + self.config.resize_all_caches() self.assertEqual( dict(self.config.cache_factors), @@ -71,6 +73,7 @@ class CacheConfigTests(TestCase): config = {"caches": {"per_cache_factors": {"foo": 3}}} self.config.read_config(config) + self.config.resize_all_caches() self.assertEqual(cache.max_size, 300) @@ -82,6 +85,7 @@ class CacheConfigTests(TestCase): """ config = {"caches": {"per_cache_factors": {"foo": 2}}} self.config.read_config(config, config_dir_path="", data_dir_path="") + self.config.resize_all_caches() cache = LruCache(100) add_resizable_cache("foo", cache_resize_callback=cache.set_cache_factor) @@ -99,6 +103,7 @@ class CacheConfigTests(TestCase): config = {"caches": {"global_factor": 4}} self.config.read_config(config, config_dir_path="", data_dir_path="") + self.config.resize_all_caches() self.assertEqual(cache.max_size, 400) @@ -110,6 +115,7 @@ class CacheConfigTests(TestCase): """ config = {"caches": {"global_factor": 1.5}} self.config.read_config(config, config_dir_path="", data_dir_path="") + self.config.resize_all_caches() cache = LruCache(100) add_resizable_cache("foo", cache_resize_callback=cache.set_cache_factor) @@ -128,6 +134,7 @@ class CacheConfigTests(TestCase): "SYNAPSE_CACHE_FACTOR_CACHE_B": 3, } self.config.read_config(config, config_dir_path="", data_dir_path="") + self.config.resize_all_caches() cache_a = LruCache(100) add_resizable_cache("*cache_a*", cache_resize_callback=cache_a.set_cache_factor) @@ -148,6 +155,7 @@ class CacheConfigTests(TestCase): config = {"caches": {"event_cache_size": "10k"}} self.config.read_config(config, config_dir_path="", data_dir_path="") + self.config.resize_all_caches() cache = LruCache( max_size=self.config.event_cache_size, diff --git a/tests/server.py b/tests/server.py index aaefcfc46c..b9f465971f 100644 --- a/tests/server.py +++ b/tests/server.py @@ -749,6 +749,7 @@ def setup_test_homeserver( if config is None: config = default_config(name, parse=True) + config.caches.resize_all_caches() config.ldap_enabled = False if "clock" not in kwargs: