Replace last_*_pdu_age metrics with timestamps (#9540)

Following the advice at
https://prometheus.io/docs/practices/instrumentation/#timestamps-not-time-since,
it's preferable to export unix timestamps, not ages.

There doesn't seem to be any particular naming convention for timestamp
metrics.
This commit is contained in:
Richard van der Hoff 2021-03-04 16:40:18 +00:00 committed by GitHub
parent df425c2c63
commit 8a4b3738f3
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 11 additions and 12 deletions

1
changelog.d/9540.feature Normal file
View file

@ -0,0 +1 @@
Add `synapse_federation_last_sent_pdu_time` and `synapse_federation_last_received_pdu_time` prometheus metrics, which monitor federation delays by reporting the timestamps of messages sent and received to a set of remote servers.

1
changelog.d/9540.removal Normal file
View file

@ -0,0 +1 @@
The `synapse_federation_last_sent_pdu_age` and `synapse_federation_last_received_pdu_age` prometheus metrics have been removed. They are replaced by `synapse_federation_last_sent_pdu_time` and `synapse_federation_last_received_pdu_time`.

View file

@ -90,10 +90,9 @@ pdu_process_time = Histogram(
"Time taken to process an event",
)
last_pdu_age_metric = Gauge(
"synapse_federation_last_received_pdu_age",
"The age (in seconds) of the last PDU successfully received from the given domain",
last_pdu_ts_metric = Gauge(
"synapse_federation_last_received_pdu_time",
"The timestamp of the last PDU which was successfully received from the given domain",
labelnames=("server_name",),
)
@ -369,8 +368,7 @@ class FederationServer(FederationBase):
)
if newest_pdu_ts and origin in self._federation_metrics_domains:
newest_pdu_age = self._clock.time_msec() - newest_pdu_ts
last_pdu_age_metric.labels(server_name=origin).set(newest_pdu_age / 1000)
last_pdu_ts_metric.labels(server_name=origin).set(newest_pdu_ts / 1000)
return pdu_results

View file

@ -36,9 +36,9 @@ if TYPE_CHECKING:
logger = logging.getLogger(__name__)
last_pdu_age_metric = Gauge(
"synapse_federation_last_sent_pdu_age",
"The age (in seconds) of the last PDU successfully sent to the given domain",
last_pdu_ts_metric = Gauge(
"synapse_federation_last_sent_pdu_time",
"The timestamp of the last PDU which was successfully sent to the given domain",
labelnames=("server_name",),
)
@ -187,9 +187,8 @@ class TransactionManager:
if success and pdus and destination in self._federation_metrics_domains:
last_pdu = pdus[-1]
last_pdu_age = self.clock.time_msec() - last_pdu.origin_server_ts
last_pdu_age_metric.labels(server_name=destination).set(
last_pdu_age / 1000
last_pdu_ts_metric.labels(server_name=destination).set(
last_pdu.origin_server_ts / 1000
)
set_tag(tags.ERROR, not success)