Reload cache factors from disk on SIGHUP (#12673)

This commit is contained in:
David Robertson 2022-05-11 14:43:22 +01:00 committed by GitHub
parent a559c8b0d9
commit d38d242411
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
11 changed files with 199 additions and 61 deletions

View file

@ -0,0 +1 @@
Synapse will now reload [cache config](https://matrix-org.github.io/synapse/latest/usage/configuration/config_documentation.html#caching) when it receives a [SIGHUP](https://en.wikipedia.org/wiki/SIGHUP) signal.

View file

@ -730,6 +730,12 @@ retention:
# A cache 'factor' is a multiplier that can be applied to each of # A cache 'factor' is a multiplier that can be applied to each of
# Synapse's caches in order to increase or decrease the maximum # Synapse's caches in order to increase or decrease the maximum
# number of entries that can be stored. # number of entries that can be stored.
#
# The configuration for cache factors (caches.global_factor and
# caches.per_cache_factors) can be reloaded while the application is running,
# by sending a SIGHUP signal to the Synapse process. Changes to other parts of
# the caching config will NOT be applied after a SIGHUP is received; a restart
# is necessary.
# The number of events to cache in memory. Not affected by # The number of events to cache in memory. Not affected by
# caches.global_factor. # caches.global_factor.

View file

@ -1130,6 +1130,23 @@ caches:
expire_caches: false expire_caches: false
sync_response_cache_duration: 2m sync_response_cache_duration: 2m
``` ```
### Reloading cache factors
The cache factors (i.e. `caches.global_factor` and `caches.per_cache_factors`) may be reloaded at any time by sending a
[`SIGHUP`](https://en.wikipedia.org/wiki/SIGHUP) signal to Synapse using e.g.
```commandline
kill -HUP [PID_OF_SYNAPSE_PROCESS]
```
If you are running multiple workers, you must individually update the worker
config file and send this signal to each worker process.
If you're using the [example systemd service](https://github.com/matrix-org/synapse/blob/develop/contrib/systemd/matrix-synapse.service)
file in Synapse's `contrib` directory, you can send a `SIGHUP` signal by using
`systemctl reload matrix-synapse`.
--- ---
## Database ## ## Database ##
Config options related to database settings. Config options related to database settings.

View file

@ -49,9 +49,12 @@ from twisted.logger import LoggingFile, LogLevel
from twisted.protocols.tls import TLSMemoryBIOFactory from twisted.protocols.tls import TLSMemoryBIOFactory
from twisted.python.threadpool import ThreadPool from twisted.python.threadpool import ThreadPool
import synapse.util.caches
from synapse.api.constants import MAX_PDU_SIZE from synapse.api.constants import MAX_PDU_SIZE
from synapse.app import check_bind_error from synapse.app import check_bind_error
from synapse.app.phone_stats_home import start_phone_stats_home from synapse.app.phone_stats_home import start_phone_stats_home
from synapse.config import ConfigError
from synapse.config._base import format_config_error
from synapse.config.homeserver import HomeServerConfig from synapse.config.homeserver import HomeServerConfig
from synapse.config.server import ManholeConfig from synapse.config.server import ManholeConfig
from synapse.crypto import context_factory from synapse.crypto import context_factory
@ -432,6 +435,10 @@ async def start(hs: "HomeServer") -> None:
signal.signal(signal.SIGHUP, run_sighup) signal.signal(signal.SIGHUP, run_sighup)
register_sighup(refresh_certificate, hs) register_sighup(refresh_certificate, hs)
register_sighup(reload_cache_config, hs.config)
# Apply the cache config.
hs.config.caches.resize_all_caches()
# Load the certificate from disk. # Load the certificate from disk.
refresh_certificate(hs) refresh_certificate(hs)
@ -486,6 +493,43 @@ async def start(hs: "HomeServer") -> None:
atexit.register(gc.freeze) atexit.register(gc.freeze)
def reload_cache_config(config: HomeServerConfig) -> None:
"""Reload cache config from disk and immediately apply it.resize caches accordingly.
If the config is invalid, a `ConfigError` is logged and no changes are made.
Otherwise, this:
- replaces the `caches` section on the given `config` object,
- resizes all caches according to the new cache factors, and
Note that the following cache config keys are read, but not applied:
- event_cache_size: used to set a max_size and _original_max_size on
EventsWorkerStore._get_event_cache when it is created. We'd have to update
the _original_max_size (and maybe
- sync_response_cache_duration: would have to update the timeout_sec attribute on
HomeServer -> SyncHandler -> ResponseCache.
- track_memory_usage. This affects synapse.util.caches.TRACK_MEMORY_USAGE which
influences Synapse's self-reported metrics.
Also, the HTTPConnectionPool in SimpleHTTPClient sets its maxPersistentPerHost
parameter based on the global_factor. This won't be applied on a config reload.
"""
try:
previous_cache_config = config.reload_config_section("caches")
except ConfigError as e:
logger.warning("Failed to reload cache config")
for f in format_config_error(e):
logger.warning(f)
else:
logger.debug(
"New cache config. Was:\n %s\nNow:\n",
previous_cache_config.__dict__,
config.caches.__dict__,
)
synapse.util.caches.TRACK_MEMORY_USAGE = config.caches.track_memory_usage
config.caches.resize_all_caches()
def setup_sentry(hs: "HomeServer") -> None: def setup_sentry(hs: "HomeServer") -> None:
"""Enable sentry integration, if enabled in configuration""" """Enable sentry integration, if enabled in configuration"""

View file

@ -16,7 +16,7 @@
import logging import logging
import os import os
import sys import sys
from typing import Dict, Iterable, Iterator, List from typing import Dict, Iterable, List
from matrix_common.versionstring import get_distribution_version_string from matrix_common.versionstring import get_distribution_version_string
@ -45,7 +45,7 @@ from synapse.app._base import (
redirect_stdio_to_logs, redirect_stdio_to_logs,
register_start, register_start,
) )
from synapse.config._base import ConfigError from synapse.config._base import ConfigError, format_config_error
from synapse.config.emailconfig import ThreepidBehaviour from synapse.config.emailconfig import ThreepidBehaviour
from synapse.config.homeserver import HomeServerConfig from synapse.config.homeserver import HomeServerConfig
from synapse.config.server import ListenerConfig from synapse.config.server import ListenerConfig
@ -399,38 +399,6 @@ def setup(config_options: List[str]) -> SynapseHomeServer:
return hs return hs
def format_config_error(e: ConfigError) -> Iterator[str]:
"""
Formats a config error neatly
The idea is to format the immediate error, plus the "causes" of those errors,
hopefully in a way that makes sense to the user. For example:
Error in configuration at 'oidc_config.user_mapping_provider.config.display_name_template':
Failed to parse config for module 'JinjaOidcMappingProvider':
invalid jinja template:
unexpected end of template, expected 'end of print statement'.
Args:
e: the error to be formatted
Returns: An iterator which yields string fragments to be formatted
"""
yield "Error in configuration"
if e.path:
yield " at '%s'" % (".".join(e.path),)
yield ":\n %s" % (e.msg,)
parent_e = e.__cause__
indent = 1
while parent_e:
indent += 1
yield ":\n%s%s" % (" " * indent, str(parent_e))
parent_e = parent_e.__cause__
def run(hs: HomeServer) -> None: def run(hs: HomeServer) -> None:
_base.start_reactor( _base.start_reactor(
"synapse-homeserver", "synapse-homeserver",

View file

@ -16,14 +16,18 @@
import argparse import argparse
import errno import errno
import logging
import os import os
from collections import OrderedDict from collections import OrderedDict
from hashlib import sha256 from hashlib import sha256
from textwrap import dedent from textwrap import dedent
from typing import ( from typing import (
Any, Any,
ClassVar,
Collection,
Dict, Dict,
Iterable, Iterable,
Iterator,
List, List,
MutableMapping, MutableMapping,
Optional, Optional,
@ -40,6 +44,8 @@ import yaml
from synapse.util.templates import _create_mxc_to_http_filter, _format_ts_filter from synapse.util.templates import _create_mxc_to_http_filter, _format_ts_filter
logger = logging.getLogger(__name__)
class ConfigError(Exception): class ConfigError(Exception):
"""Represents a problem parsing the configuration """Represents a problem parsing the configuration
@ -55,6 +61,38 @@ class ConfigError(Exception):
self.path = path self.path = path
def format_config_error(e: ConfigError) -> Iterator[str]:
"""
Formats a config error neatly
The idea is to format the immediate error, plus the "causes" of those errors,
hopefully in a way that makes sense to the user. For example:
Error in configuration at 'oidc_config.user_mapping_provider.config.display_name_template':
Failed to parse config for module 'JinjaOidcMappingProvider':
invalid jinja template:
unexpected end of template, expected 'end of print statement'.
Args:
e: the error to be formatted
Returns: An iterator which yields string fragments to be formatted
"""
yield "Error in configuration"
if e.path:
yield " at '%s'" % (".".join(e.path),)
yield ":\n %s" % (e.msg,)
parent_e = e.__cause__
indent = 1
while parent_e:
indent += 1
yield ":\n%s%s" % (" " * indent, str(parent_e))
parent_e = parent_e.__cause__
# We split these messages out to allow packages to override with package # We split these messages out to allow packages to override with package
# specific instructions. # specific instructions.
MISSING_REPORT_STATS_CONFIG_INSTRUCTIONS = """\ MISSING_REPORT_STATS_CONFIG_INSTRUCTIONS = """\
@ -119,7 +157,7 @@ class Config:
defined in subclasses. defined in subclasses.
""" """
section: str section: ClassVar[str]
def __init__(self, root_config: "RootConfig" = None): def __init__(self, root_config: "RootConfig" = None):
self.root = root_config self.root = root_config
@ -309,9 +347,12 @@ class RootConfig:
class, lower-cased and with "Config" removed. class, lower-cased and with "Config" removed.
""" """
config_classes = [] config_classes: List[Type[Config]] = []
def __init__(self, config_files: Collection[str] = ()):
# Capture absolute paths here, so we can reload config after we daemonize.
self.config_files = [os.path.abspath(path) for path in config_files]
def __init__(self):
for config_class in self.config_classes: for config_class in self.config_classes:
if config_class.section is None: if config_class.section is None:
raise ValueError("%r requires a section name" % (config_class,)) raise ValueError("%r requires a section name" % (config_class,))
@ -512,12 +553,10 @@ class RootConfig:
object from parser.parse_args(..)` object from parser.parse_args(..)`
""" """
obj = cls()
config_args = parser.parse_args(argv) config_args = parser.parse_args(argv)
config_files = find_config_files(search_paths=config_args.config_path) config_files = find_config_files(search_paths=config_args.config_path)
obj = cls(config_files)
if not config_files: if not config_files:
parser.error("Must supply a config file.") parser.error("Must supply a config file.")
@ -627,7 +666,7 @@ class RootConfig:
generate_missing_configs = config_args.generate_missing_configs generate_missing_configs = config_args.generate_missing_configs
obj = cls() obj = cls(config_files)
if config_args.generate_config: if config_args.generate_config:
if config_args.report_stats is None: if config_args.report_stats is None:
@ -727,6 +766,34 @@ class RootConfig:
) -> None: ) -> None:
self.invoke_all("generate_files", config_dict, config_dir_path) self.invoke_all("generate_files", config_dict, config_dir_path)
def reload_config_section(self, section_name: str) -> Config:
"""Reconstruct the given config section, leaving all others unchanged.
This works in three steps:
1. Create a new instance of the relevant `Config` subclass.
2. Call `read_config` on that instance to parse the new config.
3. Replace the existing config instance with the new one.
:raises ValueError: if the given `section` does not exist.
:raises ConfigError: for any other problems reloading config.
:returns: the previous config object, which no longer has a reference to this
RootConfig.
"""
existing_config: Optional[Config] = getattr(self, section_name, None)
if existing_config is None:
raise ValueError(f"Unknown config section '{section_name}'")
logger.info("Reloading config section '%s'", section_name)
new_config_data = read_config_files(self.config_files)
new_config = type(existing_config)(self)
new_config.read_config(new_config_data)
setattr(self, section_name, new_config)
existing_config.root = None
return existing_config
def read_config_files(config_files: Iterable[str]) -> Dict[str, Any]: def read_config_files(config_files: Iterable[str]) -> Dict[str, Any]:
"""Read the config files into a dict """Read the config files into a dict

View file

@ -1,15 +1,19 @@
import argparse import argparse
from typing import ( from typing import (
Any, Any,
Collection,
Dict, Dict,
Iterable, Iterable,
Iterator,
List, List,
Literal,
MutableMapping, MutableMapping,
Optional, Optional,
Tuple, Tuple,
Type, Type,
TypeVar, TypeVar,
Union, Union,
overload,
) )
import jinja2 import jinja2
@ -64,6 +68,8 @@ class ConfigError(Exception):
self.msg = msg self.msg = msg
self.path = path self.path = path
def format_config_error(e: ConfigError) -> Iterator[str]: ...
MISSING_REPORT_STATS_CONFIG_INSTRUCTIONS: str MISSING_REPORT_STATS_CONFIG_INSTRUCTIONS: str
MISSING_REPORT_STATS_SPIEL: str MISSING_REPORT_STATS_SPIEL: str
MISSING_SERVER_NAME: str MISSING_SERVER_NAME: str
@ -117,7 +123,8 @@ class RootConfig:
background_updates: background_updates.BackgroundUpdateConfig background_updates: background_updates.BackgroundUpdateConfig
config_classes: List[Type["Config"]] = ... config_classes: List[Type["Config"]] = ...
def __init__(self) -> None: ... config_files: List[str]
def __init__(self, config_files: Collection[str] = ...) -> None: ...
def invoke_all( def invoke_all(
self, func_name: str, *args: Any, **kwargs: Any self, func_name: str, *args: Any, **kwargs: Any
) -> MutableMapping[str, Any]: ... ) -> MutableMapping[str, Any]: ...
@ -157,6 +164,12 @@ class RootConfig:
def generate_missing_files( def generate_missing_files(
self, config_dict: dict, config_dir_path: str self, config_dict: dict, config_dir_path: str
) -> None: ... ) -> None: ...
@overload
def reload_config_section(
self, section_name: Literal["caches"]
) -> cache.CacheConfig: ...
@overload
def reload_config_section(self, section_name: str) -> Config: ...
class Config: class Config:
root: RootConfig root: RootConfig

View file

@ -69,11 +69,11 @@ def _canonicalise_cache_name(cache_name: str) -> str:
def add_resizable_cache( def add_resizable_cache(
cache_name: str, cache_resize_callback: Callable[[float], None] cache_name: str, cache_resize_callback: Callable[[float], None]
) -> None: ) -> None:
"""Register a cache that's size can dynamically change """Register a cache whose size can dynamically change
Args: Args:
cache_name: A reference to the cache cache_name: A reference to the cache
cache_resize_callback: A callback function that will be ran whenever cache_resize_callback: A callback function that will run whenever
the cache needs to be resized the cache needs to be resized
""" """
# Some caches have '*' in them which we strip out. # Some caches have '*' in them which we strip out.
@ -96,6 +96,13 @@ class CacheConfig(Config):
section = "caches" section = "caches"
_environ = os.environ _environ = os.environ
event_cache_size: int
cache_factors: Dict[str, float]
global_factor: float
track_memory_usage: bool
expiry_time_msec: Optional[int]
sync_response_cache_duration: int
@staticmethod @staticmethod
def reset() -> None: def reset() -> None:
"""Resets the caches to their defaults. Used for tests.""" """Resets the caches to their defaults. Used for tests."""
@ -115,6 +122,12 @@ class CacheConfig(Config):
# A cache 'factor' is a multiplier that can be applied to each of # A cache 'factor' is a multiplier that can be applied to each of
# Synapse's caches in order to increase or decrease the maximum # Synapse's caches in order to increase or decrease the maximum
# number of entries that can be stored. # number of entries that can be stored.
#
# The configuration for cache factors (caches.global_factor and
# caches.per_cache_factors) can be reloaded while the application is running,
# by sending a SIGHUP signal to the Synapse process. Changes to other parts of
# the caching config will NOT be applied after a SIGHUP is received; a restart
# is necessary.
# The number of events to cache in memory. Not affected by # The number of events to cache in memory. Not affected by
# caches.global_factor. # caches.global_factor.
@ -174,21 +187,21 @@ class CacheConfig(Config):
""" """
def read_config(self, config: JsonDict, **kwargs: Any) -> None: def read_config(self, config: JsonDict, **kwargs: Any) -> None:
"""Populate this config object with values from `config`.
This method does NOT resize existing or future caches: use `resize_all_caches`.
We use two separate methods so that we can reject bad config before applying it.
"""
self.event_cache_size = self.parse_size( self.event_cache_size = self.parse_size(
config.get("event_cache_size", _DEFAULT_EVENT_CACHE_SIZE) config.get("event_cache_size", _DEFAULT_EVENT_CACHE_SIZE)
) )
self.cache_factors: Dict[str, float] = {} self.cache_factors = {}
cache_config = config.get("caches") or {} cache_config = config.get("caches") or {}
self.global_factor = cache_config.get( self.global_factor = cache_config.get("global_factor", _DEFAULT_FACTOR_SIZE)
"global_factor", properties.default_factor_size
)
if not isinstance(self.global_factor, (int, float)): if not isinstance(self.global_factor, (int, float)):
raise ConfigError("caches.global_factor must be a number.") raise ConfigError("caches.global_factor must be a number.")
# Set the global one so that it's reflected in new caches
properties.default_factor_size = self.global_factor
# Load cache factors from the config # Load cache factors from the config
individual_factors = cache_config.get("per_cache_factors") or {} individual_factors = cache_config.get("per_cache_factors") or {}
if not isinstance(individual_factors, dict): if not isinstance(individual_factors, dict):
@ -230,7 +243,7 @@ class CacheConfig(Config):
cache_entry_ttl = cache_config.get("cache_entry_ttl", "30m") cache_entry_ttl = cache_config.get("cache_entry_ttl", "30m")
if expire_caches: if expire_caches:
self.expiry_time_msec: Optional[int] = self.parse_duration(cache_entry_ttl) self.expiry_time_msec = self.parse_duration(cache_entry_ttl)
else: else:
self.expiry_time_msec = None self.expiry_time_msec = None
@ -254,19 +267,19 @@ class CacheConfig(Config):
cache_config.get("sync_response_cache_duration", 0) cache_config.get("sync_response_cache_duration", 0)
) )
# Resize all caches (if necessary) with the new factors we've loaded
self.resize_all_caches()
# Store this function so that it can be called from other classes without
# needing an instance of Config
properties.resize_all_caches_func = self.resize_all_caches
def resize_all_caches(self) -> None: def resize_all_caches(self) -> None:
"""Ensure all cache sizes are up to date """Ensure all cache sizes are up-to-date.
For each cache, run the mapped callback function with either For each cache, run the mapped callback function with either
a specific cache factor or the default, global one. a specific cache factor or the default, global one.
""" """
# Set the global factor size, so that new caches are appropriately sized.
properties.default_factor_size = self.global_factor
# Store this function so that it can be called from other classes without
# needing an instance of CacheConfig
properties.resize_all_caches_func = self.resize_all_caches
# block other threads from modifying _CACHES while we iterate it. # block other threads from modifying _CACHES while we iterate it.
with _CACHES_LOCK: with _CACHES_LOCK:
for cache_name, callback in _CACHES.items(): for cache_name, callback in _CACHES.items():

View file

@ -348,7 +348,7 @@ class SimpleHttpClient:
# XXX: The justification for using the cache factor here is that larger instances # XXX: The justification for using the cache factor here is that larger instances
# will need both more cache and more connections. # will need both more cache and more connections.
# Still, this should probably be a separate dial # Still, this should probably be a separate dial
pool.maxPersistentPerHost = max((100 * hs.config.caches.global_factor, 5)) pool.maxPersistentPerHost = max(int(100 * hs.config.caches.global_factor), 5)
pool.cachedConnectionTimeout = 2 * 60 pool.cachedConnectionTimeout = 2 * 60
self.agent: IAgent = ProxyAgent( self.agent: IAgent = ProxyAgent(

View file

@ -38,6 +38,7 @@ class CacheConfigTests(TestCase):
"SYNAPSE_NOT_CACHE": "BLAH", "SYNAPSE_NOT_CACHE": "BLAH",
} }
self.config.read_config(config, config_dir_path="", data_dir_path="") self.config.read_config(config, config_dir_path="", data_dir_path="")
self.config.resize_all_caches()
self.assertEqual(dict(self.config.cache_factors), {"something_or_other": 2.0}) self.assertEqual(dict(self.config.cache_factors), {"something_or_other": 2.0})
@ -52,6 +53,7 @@ class CacheConfigTests(TestCase):
"SYNAPSE_CACHE_FACTOR_FOO": 1, "SYNAPSE_CACHE_FACTOR_FOO": 1,
} }
self.config.read_config(config, config_dir_path="", data_dir_path="") self.config.read_config(config, config_dir_path="", data_dir_path="")
self.config.resize_all_caches()
self.assertEqual( self.assertEqual(
dict(self.config.cache_factors), dict(self.config.cache_factors),
@ -71,6 +73,7 @@ class CacheConfigTests(TestCase):
config = {"caches": {"per_cache_factors": {"foo": 3}}} config = {"caches": {"per_cache_factors": {"foo": 3}}}
self.config.read_config(config) self.config.read_config(config)
self.config.resize_all_caches()
self.assertEqual(cache.max_size, 300) self.assertEqual(cache.max_size, 300)
@ -82,6 +85,7 @@ class CacheConfigTests(TestCase):
""" """
config = {"caches": {"per_cache_factors": {"foo": 2}}} config = {"caches": {"per_cache_factors": {"foo": 2}}}
self.config.read_config(config, config_dir_path="", data_dir_path="") self.config.read_config(config, config_dir_path="", data_dir_path="")
self.config.resize_all_caches()
cache = LruCache(100) cache = LruCache(100)
add_resizable_cache("foo", cache_resize_callback=cache.set_cache_factor) add_resizable_cache("foo", cache_resize_callback=cache.set_cache_factor)
@ -99,6 +103,7 @@ class CacheConfigTests(TestCase):
config = {"caches": {"global_factor": 4}} config = {"caches": {"global_factor": 4}}
self.config.read_config(config, config_dir_path="", data_dir_path="") self.config.read_config(config, config_dir_path="", data_dir_path="")
self.config.resize_all_caches()
self.assertEqual(cache.max_size, 400) self.assertEqual(cache.max_size, 400)
@ -110,6 +115,7 @@ class CacheConfigTests(TestCase):
""" """
config = {"caches": {"global_factor": 1.5}} config = {"caches": {"global_factor": 1.5}}
self.config.read_config(config, config_dir_path="", data_dir_path="") self.config.read_config(config, config_dir_path="", data_dir_path="")
self.config.resize_all_caches()
cache = LruCache(100) cache = LruCache(100)
add_resizable_cache("foo", cache_resize_callback=cache.set_cache_factor) add_resizable_cache("foo", cache_resize_callback=cache.set_cache_factor)
@ -128,6 +134,7 @@ class CacheConfigTests(TestCase):
"SYNAPSE_CACHE_FACTOR_CACHE_B": 3, "SYNAPSE_CACHE_FACTOR_CACHE_B": 3,
} }
self.config.read_config(config, config_dir_path="", data_dir_path="") self.config.read_config(config, config_dir_path="", data_dir_path="")
self.config.resize_all_caches()
cache_a = LruCache(100) cache_a = LruCache(100)
add_resizable_cache("*cache_a*", cache_resize_callback=cache_a.set_cache_factor) add_resizable_cache("*cache_a*", cache_resize_callback=cache_a.set_cache_factor)
@ -148,6 +155,7 @@ class CacheConfigTests(TestCase):
config = {"caches": {"event_cache_size": "10k"}} config = {"caches": {"event_cache_size": "10k"}}
self.config.read_config(config, config_dir_path="", data_dir_path="") self.config.read_config(config, config_dir_path="", data_dir_path="")
self.config.resize_all_caches()
cache = LruCache( cache = LruCache(
max_size=self.config.event_cache_size, max_size=self.config.event_cache_size,

View file

@ -749,6 +749,7 @@ def setup_test_homeserver(
if config is None: if config is None:
config = default_config(name, parse=True) config = default_config(name, parse=True)
config.caches.resize_all_caches()
config.ldap_enabled = False config.ldap_enabled = False
if "clock" not in kwargs: if "clock" not in kwargs: