Reload cache factors from disk on SIGHUP (#12673)

This commit is contained in:
David Robertson 2022-05-11 14:43:22 +01:00 committed by GitHub
parent a559c8b0d9
commit d38d242411
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
11 changed files with 199 additions and 61 deletions

View file

@ -0,0 +1 @@
Synapse will now reload [cache config](https://matrix-org.github.io/synapse/latest/usage/configuration/config_documentation.html#caching) when it receives a [SIGHUP](https://en.wikipedia.org/wiki/SIGHUP) signal.

View file

@ -730,6 +730,12 @@ retention:
# A cache 'factor' is a multiplier that can be applied to each of
# Synapse's caches in order to increase or decrease the maximum
# number of entries that can be stored.
#
# The configuration for cache factors (caches.global_factor and
# caches.per_cache_factors) can be reloaded while the application is running,
# by sending a SIGHUP signal to the Synapse process. Changes to other parts of
# the caching config will NOT be applied after a SIGHUP is received; a restart
# is necessary.
# The number of events to cache in memory. Not affected by
# caches.global_factor.

View file

@ -1130,6 +1130,23 @@ caches:
expire_caches: false
sync_response_cache_duration: 2m
```
### Reloading cache factors
The cache factors (i.e. `caches.global_factor` and `caches.per_cache_factors`) may be reloaded at any time by sending a
[`SIGHUP`](https://en.wikipedia.org/wiki/SIGHUP) signal to Synapse using e.g.
```commandline
kill -HUP [PID_OF_SYNAPSE_PROCESS]
```
If you are running multiple workers, you must individually update the worker
config file and send this signal to each worker process.
If you're using the [example systemd service](https://github.com/matrix-org/synapse/blob/develop/contrib/systemd/matrix-synapse.service)
file in Synapse's `contrib` directory, you can send a `SIGHUP` signal by using
`systemctl reload matrix-synapse`.
---
## Database ##
Config options related to database settings.

View file

@ -49,9 +49,12 @@ from twisted.logger import LoggingFile, LogLevel
from twisted.protocols.tls import TLSMemoryBIOFactory
from twisted.python.threadpool import ThreadPool
import synapse.util.caches
from synapse.api.constants import MAX_PDU_SIZE
from synapse.app import check_bind_error
from synapse.app.phone_stats_home import start_phone_stats_home
from synapse.config import ConfigError
from synapse.config._base import format_config_error
from synapse.config.homeserver import HomeServerConfig
from synapse.config.server import ManholeConfig
from synapse.crypto import context_factory
@ -432,6 +435,10 @@ async def start(hs: "HomeServer") -> None:
signal.signal(signal.SIGHUP, run_sighup)
register_sighup(refresh_certificate, hs)
register_sighup(reload_cache_config, hs.config)
# Apply the cache config.
hs.config.caches.resize_all_caches()
# Load the certificate from disk.
refresh_certificate(hs)
@ -486,6 +493,43 @@ async def start(hs: "HomeServer") -> None:
atexit.register(gc.freeze)
def reload_cache_config(config: HomeServerConfig) -> None:
"""Reload cache config from disk and immediately apply it.resize caches accordingly.
If the config is invalid, a `ConfigError` is logged and no changes are made.
Otherwise, this:
- replaces the `caches` section on the given `config` object,
- resizes all caches according to the new cache factors, and
Note that the following cache config keys are read, but not applied:
- event_cache_size: used to set a max_size and _original_max_size on
EventsWorkerStore._get_event_cache when it is created. We'd have to update
the _original_max_size (and maybe
- sync_response_cache_duration: would have to update the timeout_sec attribute on
HomeServer -> SyncHandler -> ResponseCache.
- track_memory_usage. This affects synapse.util.caches.TRACK_MEMORY_USAGE which
influences Synapse's self-reported metrics.
Also, the HTTPConnectionPool in SimpleHTTPClient sets its maxPersistentPerHost
parameter based on the global_factor. This won't be applied on a config reload.
"""
try:
previous_cache_config = config.reload_config_section("caches")
except ConfigError as e:
logger.warning("Failed to reload cache config")
for f in format_config_error(e):
logger.warning(f)
else:
logger.debug(
"New cache config. Was:\n %s\nNow:\n",
previous_cache_config.__dict__,
config.caches.__dict__,
)
synapse.util.caches.TRACK_MEMORY_USAGE = config.caches.track_memory_usage
config.caches.resize_all_caches()
def setup_sentry(hs: "HomeServer") -> None:
"""Enable sentry integration, if enabled in configuration"""

View file

@ -16,7 +16,7 @@
import logging
import os
import sys
from typing import Dict, Iterable, Iterator, List
from typing import Dict, Iterable, List
from matrix_common.versionstring import get_distribution_version_string
@ -45,7 +45,7 @@ from synapse.app._base import (
redirect_stdio_to_logs,
register_start,
)
from synapse.config._base import ConfigError
from synapse.config._base import ConfigError, format_config_error
from synapse.config.emailconfig import ThreepidBehaviour
from synapse.config.homeserver import HomeServerConfig
from synapse.config.server import ListenerConfig
@ -399,38 +399,6 @@ def setup(config_options: List[str]) -> SynapseHomeServer:
return hs
def format_config_error(e: ConfigError) -> Iterator[str]:
"""
Formats a config error neatly
The idea is to format the immediate error, plus the "causes" of those errors,
hopefully in a way that makes sense to the user. For example:
Error in configuration at 'oidc_config.user_mapping_provider.config.display_name_template':
Failed to parse config for module 'JinjaOidcMappingProvider':
invalid jinja template:
unexpected end of template, expected 'end of print statement'.
Args:
e: the error to be formatted
Returns: An iterator which yields string fragments to be formatted
"""
yield "Error in configuration"
if e.path:
yield " at '%s'" % (".".join(e.path),)
yield ":\n %s" % (e.msg,)
parent_e = e.__cause__
indent = 1
while parent_e:
indent += 1
yield ":\n%s%s" % (" " * indent, str(parent_e))
parent_e = parent_e.__cause__
def run(hs: HomeServer) -> None:
_base.start_reactor(
"synapse-homeserver",

View file

@ -16,14 +16,18 @@
import argparse
import errno
import logging
import os
from collections import OrderedDict
from hashlib import sha256
from textwrap import dedent
from typing import (
Any,
ClassVar,
Collection,
Dict,
Iterable,
Iterator,
List,
MutableMapping,
Optional,
@ -40,6 +44,8 @@ import yaml
from synapse.util.templates import _create_mxc_to_http_filter, _format_ts_filter
logger = logging.getLogger(__name__)
class ConfigError(Exception):
"""Represents a problem parsing the configuration
@ -55,6 +61,38 @@ class ConfigError(Exception):
self.path = path
def format_config_error(e: ConfigError) -> Iterator[str]:
"""
Formats a config error neatly
The idea is to format the immediate error, plus the "causes" of those errors,
hopefully in a way that makes sense to the user. For example:
Error in configuration at 'oidc_config.user_mapping_provider.config.display_name_template':
Failed to parse config for module 'JinjaOidcMappingProvider':
invalid jinja template:
unexpected end of template, expected 'end of print statement'.
Args:
e: the error to be formatted
Returns: An iterator which yields string fragments to be formatted
"""
yield "Error in configuration"
if e.path:
yield " at '%s'" % (".".join(e.path),)
yield ":\n %s" % (e.msg,)
parent_e = e.__cause__
indent = 1
while parent_e:
indent += 1
yield ":\n%s%s" % (" " * indent, str(parent_e))
parent_e = parent_e.__cause__
# We split these messages out to allow packages to override with package
# specific instructions.
MISSING_REPORT_STATS_CONFIG_INSTRUCTIONS = """\
@ -119,7 +157,7 @@ class Config:
defined in subclasses.
"""
section: str
section: ClassVar[str]
def __init__(self, root_config: "RootConfig" = None):
self.root = root_config
@ -309,9 +347,12 @@ class RootConfig:
class, lower-cased and with "Config" removed.
"""
config_classes = []
config_classes: List[Type[Config]] = []
def __init__(self, config_files: Collection[str] = ()):
# Capture absolute paths here, so we can reload config after we daemonize.
self.config_files = [os.path.abspath(path) for path in config_files]
def __init__(self):
for config_class in self.config_classes:
if config_class.section is None:
raise ValueError("%r requires a section name" % (config_class,))
@ -512,12 +553,10 @@ class RootConfig:
object from parser.parse_args(..)`
"""
obj = cls()
config_args = parser.parse_args(argv)
config_files = find_config_files(search_paths=config_args.config_path)
obj = cls(config_files)
if not config_files:
parser.error("Must supply a config file.")
@ -627,7 +666,7 @@ class RootConfig:
generate_missing_configs = config_args.generate_missing_configs
obj = cls()
obj = cls(config_files)
if config_args.generate_config:
if config_args.report_stats is None:
@ -727,6 +766,34 @@ class RootConfig:
) -> None:
self.invoke_all("generate_files", config_dict, config_dir_path)
def reload_config_section(self, section_name: str) -> Config:
"""Reconstruct the given config section, leaving all others unchanged.
This works in three steps:
1. Create a new instance of the relevant `Config` subclass.
2. Call `read_config` on that instance to parse the new config.
3. Replace the existing config instance with the new one.
:raises ValueError: if the given `section` does not exist.
:raises ConfigError: for any other problems reloading config.
:returns: the previous config object, which no longer has a reference to this
RootConfig.
"""
existing_config: Optional[Config] = getattr(self, section_name, None)
if existing_config is None:
raise ValueError(f"Unknown config section '{section_name}'")
logger.info("Reloading config section '%s'", section_name)
new_config_data = read_config_files(self.config_files)
new_config = type(existing_config)(self)
new_config.read_config(new_config_data)
setattr(self, section_name, new_config)
existing_config.root = None
return existing_config
def read_config_files(config_files: Iterable[str]) -> Dict[str, Any]:
"""Read the config files into a dict

View file

@ -1,15 +1,19 @@
import argparse
from typing import (
Any,
Collection,
Dict,
Iterable,
Iterator,
List,
Literal,
MutableMapping,
Optional,
Tuple,
Type,
TypeVar,
Union,
overload,
)
import jinja2
@ -64,6 +68,8 @@ class ConfigError(Exception):
self.msg = msg
self.path = path
def format_config_error(e: ConfigError) -> Iterator[str]: ...
MISSING_REPORT_STATS_CONFIG_INSTRUCTIONS: str
MISSING_REPORT_STATS_SPIEL: str
MISSING_SERVER_NAME: str
@ -117,7 +123,8 @@ class RootConfig:
background_updates: background_updates.BackgroundUpdateConfig
config_classes: List[Type["Config"]] = ...
def __init__(self) -> None: ...
config_files: List[str]
def __init__(self, config_files: Collection[str] = ...) -> None: ...
def invoke_all(
self, func_name: str, *args: Any, **kwargs: Any
) -> MutableMapping[str, Any]: ...
@ -157,6 +164,12 @@ class RootConfig:
def generate_missing_files(
self, config_dict: dict, config_dir_path: str
) -> None: ...
@overload
def reload_config_section(
self, section_name: Literal["caches"]
) -> cache.CacheConfig: ...
@overload
def reload_config_section(self, section_name: str) -> Config: ...
class Config:
root: RootConfig

View file

@ -69,11 +69,11 @@ def _canonicalise_cache_name(cache_name: str) -> str:
def add_resizable_cache(
cache_name: str, cache_resize_callback: Callable[[float], None]
) -> None:
"""Register a cache that's size can dynamically change
"""Register a cache whose size can dynamically change
Args:
cache_name: A reference to the cache
cache_resize_callback: A callback function that will be ran whenever
cache_resize_callback: A callback function that will run whenever
the cache needs to be resized
"""
# Some caches have '*' in them which we strip out.
@ -96,6 +96,13 @@ class CacheConfig(Config):
section = "caches"
_environ = os.environ
event_cache_size: int
cache_factors: Dict[str, float]
global_factor: float
track_memory_usage: bool
expiry_time_msec: Optional[int]
sync_response_cache_duration: int
@staticmethod
def reset() -> None:
"""Resets the caches to their defaults. Used for tests."""
@ -115,6 +122,12 @@ class CacheConfig(Config):
# A cache 'factor' is a multiplier that can be applied to each of
# Synapse's caches in order to increase or decrease the maximum
# number of entries that can be stored.
#
# The configuration for cache factors (caches.global_factor and
# caches.per_cache_factors) can be reloaded while the application is running,
# by sending a SIGHUP signal to the Synapse process. Changes to other parts of
# the caching config will NOT be applied after a SIGHUP is received; a restart
# is necessary.
# The number of events to cache in memory. Not affected by
# caches.global_factor.
@ -174,21 +187,21 @@ class CacheConfig(Config):
"""
def read_config(self, config: JsonDict, **kwargs: Any) -> None:
"""Populate this config object with values from `config`.
This method does NOT resize existing or future caches: use `resize_all_caches`.
We use two separate methods so that we can reject bad config before applying it.
"""
self.event_cache_size = self.parse_size(
config.get("event_cache_size", _DEFAULT_EVENT_CACHE_SIZE)
)
self.cache_factors: Dict[str, float] = {}
self.cache_factors = {}
cache_config = config.get("caches") or {}
self.global_factor = cache_config.get(
"global_factor", properties.default_factor_size
)
self.global_factor = cache_config.get("global_factor", _DEFAULT_FACTOR_SIZE)
if not isinstance(self.global_factor, (int, float)):
raise ConfigError("caches.global_factor must be a number.")
# Set the global one so that it's reflected in new caches
properties.default_factor_size = self.global_factor
# Load cache factors from the config
individual_factors = cache_config.get("per_cache_factors") or {}
if not isinstance(individual_factors, dict):
@ -230,7 +243,7 @@ class CacheConfig(Config):
cache_entry_ttl = cache_config.get("cache_entry_ttl", "30m")
if expire_caches:
self.expiry_time_msec: Optional[int] = self.parse_duration(cache_entry_ttl)
self.expiry_time_msec = self.parse_duration(cache_entry_ttl)
else:
self.expiry_time_msec = None
@ -254,19 +267,19 @@ class CacheConfig(Config):
cache_config.get("sync_response_cache_duration", 0)
)
# Resize all caches (if necessary) with the new factors we've loaded
self.resize_all_caches()
# Store this function so that it can be called from other classes without
# needing an instance of Config
properties.resize_all_caches_func = self.resize_all_caches
def resize_all_caches(self) -> None:
"""Ensure all cache sizes are up to date
"""Ensure all cache sizes are up-to-date.
For each cache, run the mapped callback function with either
a specific cache factor or the default, global one.
"""
# Set the global factor size, so that new caches are appropriately sized.
properties.default_factor_size = self.global_factor
# Store this function so that it can be called from other classes without
# needing an instance of CacheConfig
properties.resize_all_caches_func = self.resize_all_caches
# block other threads from modifying _CACHES while we iterate it.
with _CACHES_LOCK:
for cache_name, callback in _CACHES.items():

View file

@ -348,7 +348,7 @@ class SimpleHttpClient:
# XXX: The justification for using the cache factor here is that larger instances
# will need both more cache and more connections.
# Still, this should probably be a separate dial
pool.maxPersistentPerHost = max((100 * hs.config.caches.global_factor, 5))
pool.maxPersistentPerHost = max(int(100 * hs.config.caches.global_factor), 5)
pool.cachedConnectionTimeout = 2 * 60
self.agent: IAgent = ProxyAgent(

View file

@ -38,6 +38,7 @@ class CacheConfigTests(TestCase):
"SYNAPSE_NOT_CACHE": "BLAH",
}
self.config.read_config(config, config_dir_path="", data_dir_path="")
self.config.resize_all_caches()
self.assertEqual(dict(self.config.cache_factors), {"something_or_other": 2.0})
@ -52,6 +53,7 @@ class CacheConfigTests(TestCase):
"SYNAPSE_CACHE_FACTOR_FOO": 1,
}
self.config.read_config(config, config_dir_path="", data_dir_path="")
self.config.resize_all_caches()
self.assertEqual(
dict(self.config.cache_factors),
@ -71,6 +73,7 @@ class CacheConfigTests(TestCase):
config = {"caches": {"per_cache_factors": {"foo": 3}}}
self.config.read_config(config)
self.config.resize_all_caches()
self.assertEqual(cache.max_size, 300)
@ -82,6 +85,7 @@ class CacheConfigTests(TestCase):
"""
config = {"caches": {"per_cache_factors": {"foo": 2}}}
self.config.read_config(config, config_dir_path="", data_dir_path="")
self.config.resize_all_caches()
cache = LruCache(100)
add_resizable_cache("foo", cache_resize_callback=cache.set_cache_factor)
@ -99,6 +103,7 @@ class CacheConfigTests(TestCase):
config = {"caches": {"global_factor": 4}}
self.config.read_config(config, config_dir_path="", data_dir_path="")
self.config.resize_all_caches()
self.assertEqual(cache.max_size, 400)
@ -110,6 +115,7 @@ class CacheConfigTests(TestCase):
"""
config = {"caches": {"global_factor": 1.5}}
self.config.read_config(config, config_dir_path="", data_dir_path="")
self.config.resize_all_caches()
cache = LruCache(100)
add_resizable_cache("foo", cache_resize_callback=cache.set_cache_factor)
@ -128,6 +134,7 @@ class CacheConfigTests(TestCase):
"SYNAPSE_CACHE_FACTOR_CACHE_B": 3,
}
self.config.read_config(config, config_dir_path="", data_dir_path="")
self.config.resize_all_caches()
cache_a = LruCache(100)
add_resizable_cache("*cache_a*", cache_resize_callback=cache_a.set_cache_factor)
@ -148,6 +155,7 @@ class CacheConfigTests(TestCase):
config = {"caches": {"event_cache_size": "10k"}}
self.config.read_config(config, config_dir_path="", data_dir_path="")
self.config.resize_all_caches()
cache = LruCache(
max_size=self.config.event_cache_size,

View file

@ -749,6 +749,7 @@ def setup_test_homeserver(
if config is None:
config = default_config(name, parse=True)
config.caches.resize_all_caches()
config.ldap_enabled = False
if "clock" not in kwargs: