Merge pull request #3066 from matrix-org/rav/remove_redundant_metrics

Remove redundant metrics which were deprecated in 0.27.0.
2024-06-30 17:13:29 +00:00 · 2018-04-05 17:21:18 +01:00 · 2018-04-05 17:21:18 +01:00 · d1679a4ed7
parent e089100c62 518f6de088
commit d1679a4ed7
5 changed files with 28 additions and 52 deletions
--- a/CHANGES.rst
+++ b/CHANGES.rst
@ -1,3 +1,12 @@
+Changes in synapse v0.28.0 (2018-xx-xx)
+=======================================
+
+As previously advised, this release removes a number of redundant Prometheus
+metrics. Administrators may need to update their dashboards and alerting rules
+to use the updated metric names, if they have not already done so. See
+`docs/metrics-howto.rst <docs/metrics-howto.rst#deprecated-metrics-removed-in-0-28-0>`_
+for more details.
+
 Changes in synapse v0.27.2 (2018-03-26)
 =======================================

--- a/UPGRADE.rst
+++ b/UPGRADE.rst
@ -52,7 +52,7 @@ Upgrading to $NEXT_VERSION
 ====================

 This release expands the anonymous usage stats sent if the opt-in
-``report_stats`` configuration is set to ``true``. We now capture RSS memory 
+``report_stats`` configuration is set to ``true``. We now capture RSS memory
 and cpu use at a very coarse level. This requires administrators to install
 the optional ``psutil`` python module.

@ -60,6 +60,13 @@ We would appreciate it if you could assist by ensuring this module is available
 and ``report_stats`` is enabled. This will let us see if performance changes to
 synapse are having an impact to the general community.

+This release also removes a number of redundant Prometheus metrics.
+Administrators may need to update their dashboards and alerting rules to use
+the updated metric names, if they have not already done so. See
+`docs/metrics-howto.rst <docs/metrics-howto.rst#deprecated-metrics-removed-in-0-28-0>`_
+for more details.
+
+
 Upgrading to v0.15.0
 ====================

--- a/docs/metrics-howto.rst
+++ b/docs/metrics-howto.rst
@ -34,6 +34,17 @@ How to monitor Synapse metrics using Prometheus
   Restart prometheus.


+Deprecated metrics removed in 0.28.0
+------------------------------------
+
+Synapse 0.28.0 removes all of the metrics deprecated by 0.27.0, which are those
+listed under "Old name" below. This has been done to reduce the bandwidth used
+by gathering metrics and the storage requirements for the Prometheus server, as
+well as reducing CPU overhead for both Synapse and Prometheus.
+
+Administrators should update any alerts or monitoring dashboards to use the
+"New name" listed below.
+
 Block and response metrics renamed for 0.27.0
 ---------------------------------------------

--- a/synapse/http/server.py
+++ b/synapse/http/server.py
@ -47,17 +47,6 @@ metrics = synapse.metrics.get_metrics_for(__name__)
 response_count = metrics.register_counter(
    "response_count",
    labels=["method", "servlet", "tag"],
-    alternative_names=(
-        # the following are all deprecated aliases for the same metric
-        metrics.name_prefix + x for x in (
-            "_requests",
-            "_response_time:count",
-            "_response_ru_utime:count",
-            "_response_ru_stime:count",
-            "_response_db_txn_count:count",
-            "_response_db_txn_duration:count",
-        )
-    )
 )

 requests_counter = metrics.register_counter(
@ -73,39 +62,24 @@ outgoing_responses_counter = metrics.register_counter(
 response_timer = metrics.register_counter(
    "response_time_seconds",
    labels=["method", "servlet", "tag"],
-    alternative_names=(
-        metrics.name_prefix + "_response_time:total",
-    ),
 )

 response_ru_utime = metrics.register_counter(
    "response_ru_utime_seconds", labels=["method", "servlet", "tag"],
-    alternative_names=(
-        metrics.name_prefix + "_response_ru_utime:total",
-    ),
 )

 response_ru_stime = metrics.register_counter(
    "response_ru_stime_seconds", labels=["method", "servlet", "tag"],
-    alternative_names=(
-        metrics.name_prefix + "_response_ru_stime:total",
-    ),
 )

 response_db_txn_count = metrics.register_counter(
    "response_db_txn_count", labels=["method", "servlet", "tag"],
-    alternative_names=(
-        metrics.name_prefix + "_response_db_txn_count:total",
-    ),
 )

 # seconds spent waiting for db txns, excluding scheduling time, when processing
 # this request
 response_db_txn_duration = metrics.register_counter(
    "response_db_txn_duration_seconds", labels=["method", "servlet", "tag"],
-    alternative_names=(
-        metrics.name_prefix + "_response_db_txn_duration:total",
-    ),
 )

 # seconds spent waiting for a db connection, when processing this request
--- a/synapse/util/metrics.py
+++ b/synapse/util/metrics.py
@ -31,53 +31,28 @@ metrics = synapse.metrics.get_metrics_for(__name__)
 block_counter = metrics.register_counter(
    "block_count",
    labels=["block_name"],
-    alternative_names=(
-        # the following are all deprecated aliases for the same metric
-        metrics.name_prefix + x for x in (
-            "_block_timer:count",
-            "_block_ru_utime:count",
-            "_block_ru_stime:count",
-            "_block_db_txn_count:count",
-            "_block_db_txn_duration:count",
-        )
-    )
 )

 block_timer = metrics.register_counter(
    "block_time_seconds",
    labels=["block_name"],
-    alternative_names=(
-        metrics.name_prefix + "_block_timer:total",
-    ),
 )

 block_ru_utime = metrics.register_counter(
    "block_ru_utime_seconds", labels=["block_name"],
-    alternative_names=(
-        metrics.name_prefix + "_block_ru_utime:total",
-    ),
 )

 block_ru_stime = metrics.register_counter(
    "block_ru_stime_seconds", labels=["block_name"],
-    alternative_names=(
-        metrics.name_prefix + "_block_ru_stime:total",
-    ),
 )

 block_db_txn_count = metrics.register_counter(
    "block_db_txn_count", labels=["block_name"],
-    alternative_names=(
-        metrics.name_prefix + "_block_db_txn_count:total",
-    ),
 )

 # seconds spent waiting for db txns, excluding scheduling time, in this block
 block_db_txn_duration = metrics.register_counter(
    "block_db_txn_duration_seconds", labels=["block_name"],
-    alternative_names=(
-        metrics.name_prefix + "_block_db_txn_duration:total",
-    ),
 )

 # seconds spent waiting for a db connection, in this block