Fix cache metrics not being updated when not using the legacy exposition module. (#13717)

This commit is contained in:
reivilibre 2022-09-08 14:30:48 +00:00 committed by GitHub
parent 526f84bc2e
commit cf11919ddd
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 82 additions and 22 deletions

1
changelog.d/13717.misc Normal file
View file

@ -0,0 +1 @@
Add experimental configuration option to allow disabling legacy Prometheus metric names.

View file

@ -34,8 +34,6 @@ from prometheus_client.core import Sample
from twisted.web.resource import Resource from twisted.web.resource import Resource
from twisted.web.server import Request from twisted.web.server import Request
from synapse.util import caches
CONTENT_TYPE_LATEST = "text/plain; version=0.0.4; charset=utf-8" CONTENT_TYPE_LATEST = "text/plain; version=0.0.4; charset=utf-8"
@ -107,11 +105,6 @@ def generate_latest(registry: CollectorRegistry, emit_help: bool = False) -> byt
by prometheus-client. by prometheus-client.
""" """
# Trigger the cache metrics to be rescraped, which updates the common
# metrics but do not produce metrics themselves
for collector in caches.collectors_by_name.values():
collector.collect()
output = [] output = []
for metric in registry.collect(): for metric in registry.collect():

View file

@ -20,9 +20,11 @@ from sys import intern
from typing import Any, Callable, Dict, List, Optional, Sized, TypeVar from typing import Any, Callable, Dict, List, Optional, Sized, TypeVar
import attr import attr
from prometheus_client import REGISTRY
from prometheus_client.core import Gauge from prometheus_client.core import Gauge
from synapse.config.cache import add_resizable_cache from synapse.config.cache import add_resizable_cache
from synapse.util.metrics import DynamicCollectorRegistry
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -30,27 +32,62 @@ logger = logging.getLogger(__name__)
# Whether to track estimated memory usage of the LruCaches. # Whether to track estimated memory usage of the LruCaches.
TRACK_MEMORY_USAGE = False TRACK_MEMORY_USAGE = False
# We track cache metrics in a special registry that lets us update the metrics
# just before they are returned from the scrape endpoint.
CACHE_METRIC_REGISTRY = DynamicCollectorRegistry()
caches_by_name: Dict[str, Sized] = {} caches_by_name: Dict[str, Sized] = {}
collectors_by_name: Dict[str, "CacheMetric"] = {}
cache_size = Gauge("synapse_util_caches_cache_size", "", ["name"]) cache_size = Gauge(
cache_hits = Gauge("synapse_util_caches_cache_hits", "", ["name"]) "synapse_util_caches_cache_size", "", ["name"], registry=CACHE_METRIC_REGISTRY
cache_evicted = Gauge("synapse_util_caches_cache_evicted_size", "", ["name", "reason"]) )
cache_total = Gauge("synapse_util_caches_cache", "", ["name"]) cache_hits = Gauge(
cache_max_size = Gauge("synapse_util_caches_cache_max_size", "", ["name"]) "synapse_util_caches_cache_hits", "", ["name"], registry=CACHE_METRIC_REGISTRY
)
cache_evicted = Gauge(
"synapse_util_caches_cache_evicted_size",
"",
["name", "reason"],
registry=CACHE_METRIC_REGISTRY,
)
cache_total = Gauge(
"synapse_util_caches_cache", "", ["name"], registry=CACHE_METRIC_REGISTRY
)
cache_max_size = Gauge(
"synapse_util_caches_cache_max_size", "", ["name"], registry=CACHE_METRIC_REGISTRY
)
cache_memory_usage = Gauge( cache_memory_usage = Gauge(
"synapse_util_caches_cache_size_bytes", "synapse_util_caches_cache_size_bytes",
"Estimated memory usage of the caches", "Estimated memory usage of the caches",
["name"], ["name"],
registry=CACHE_METRIC_REGISTRY,
) )
response_cache_size = Gauge("synapse_util_caches_response_cache_size", "", ["name"]) response_cache_size = Gauge(
response_cache_hits = Gauge("synapse_util_caches_response_cache_hits", "", ["name"]) "synapse_util_caches_response_cache_size",
response_cache_evicted = Gauge( "",
"synapse_util_caches_response_cache_evicted_size", "", ["name", "reason"] ["name"],
registry=CACHE_METRIC_REGISTRY,
) )
response_cache_total = Gauge("synapse_util_caches_response_cache", "", ["name"]) response_cache_hits = Gauge(
"synapse_util_caches_response_cache_hits",
"",
["name"],
registry=CACHE_METRIC_REGISTRY,
)
response_cache_evicted = Gauge(
"synapse_util_caches_response_cache_evicted_size",
"",
["name", "reason"],
registry=CACHE_METRIC_REGISTRY,
)
response_cache_total = Gauge(
"synapse_util_caches_response_cache", "", ["name"], registry=CACHE_METRIC_REGISTRY
)
# Register our custom cache metrics registry with the global registry
REGISTRY.register(CACHE_METRIC_REGISTRY)
class EvictionReason(Enum): class EvictionReason(Enum):
@ -168,9 +205,8 @@ def register_cache(
add_resizable_cache(cache_name, resize_callback) add_resizable_cache(cache_name, resize_callback)
metric = CacheMetric(cache, cache_type, cache_name, collect_callback) metric = CacheMetric(cache, cache_type, cache_name, collect_callback)
metric_name = "cache_%s_%s" % (cache_type, cache_name)
caches_by_name[cache_name] = cache caches_by_name[cache_name] = cache
collectors_by_name[metric_name] = metric CACHE_METRIC_REGISTRY.register_hook(metric.collect)
return metric return metric

View file

@ -15,9 +15,9 @@
import logging import logging
from functools import wraps from functools import wraps
from types import TracebackType from types import TracebackType
from typing import Awaitable, Callable, Optional, Type, TypeVar from typing import Awaitable, Callable, Generator, List, Optional, Type, TypeVar
from prometheus_client import Counter from prometheus_client import CollectorRegistry, Counter, Metric
from typing_extensions import Concatenate, ParamSpec, Protocol from typing_extensions import Concatenate, ParamSpec, Protocol
from synapse.logging.context import ( from synapse.logging.context import (
@ -208,3 +208,33 @@ class Measure:
metrics.real_time_sum += duration metrics.real_time_sum += duration
# TODO: Add other in flight metrics. # TODO: Add other in flight metrics.
class DynamicCollectorRegistry(CollectorRegistry):
"""
Custom Prometheus Collector registry that calls a hook first, allowing you
to update metrics on-demand.
Don't forget to register this registry with the main registry!
"""
def __init__(self) -> None:
super().__init__()
self._pre_update_hooks: List[Callable[[], None]] = []
def collect(self) -> Generator[Metric, None, None]:
"""
Collects metrics, calling pre-update hooks first.
"""
for pre_update_hook in self._pre_update_hooks:
pre_update_hook()
yield from super().collect()
def register_hook(self, hook: Callable[[], None]) -> None:
"""
Registers a hook that is called before metric collection.
"""
self._pre_update_hooks.append(hook)