From a2eb5db4a066148da5057b8ee844c7e4f5d888bc Mon Sep 17 00:00:00 2001 From: Amber Brown Date: Mon, 28 May 2018 19:10:27 +1000 Subject: [PATCH] update metrics to be in seconds --- synapse/http/request_metrics.py | 6 +++--- synapse/http/site.py | 20 ++++++++++---------- synapse/metrics/__init__.py | 19 ++++++++++--------- synapse/storage/_base.py | 30 +++++++++++++++--------------- 4 files changed, 38 insertions(+), 37 deletions(-) diff --git a/synapse/http/request_metrics.py b/synapse/http/request_metrics.py index e7df494333..af3067b4bb 100644 --- a/synapse/http/request_metrics.py +++ b/synapse/http/request_metrics.py @@ -38,15 +38,15 @@ outgoing_responses_counter = Counter( ) response_timer = Histogram( - "synapse_http_server_response_time_seconds", "", ["method", "servlet", "tag"] + "synapse_http_server_response_time_seconds", "sec", ["method", "servlet", "tag"] ) response_ru_utime = Counter( - "synapse_http_server_response_ru_utime_seconds", "", ["method", "servlet", "tag"] + "synapse_http_server_response_ru_utime_seconds", "sec", ["method", "servlet", "tag"] ) response_ru_stime = Counter( - "synapse_http_server_response_ru_stime_seconds", "", ["method", "servlet", "tag"] + "synapse_http_server_response_ru_stime_seconds", "sec", ["method", "servlet", "tag"] ) response_db_txn_count = Counter( diff --git a/synapse/http/site.py b/synapse/http/site.py index 23c1b76922..60299657b9 100644 --- a/synapse/http/site.py +++ b/synapse/http/site.py @@ -83,7 +83,7 @@ class SynapseRequest(Request): return Request.render(self, resrc) def _started_processing(self, servlet_name): - self.start_time = int(time.time() * 1000) + self.start_time = time.time() self.request_metrics = RequestMetrics() self.request_metrics.start( self.start_time, name=servlet_name, method=self.method, @@ -102,26 +102,26 @@ class SynapseRequest(Request): context = LoggingContext.current_context() ru_utime, ru_stime = context.get_resource_usage() db_txn_count = context.db_txn_count - db_txn_duration_ms = context.db_txn_duration_ms - db_sched_duration_ms = context.db_sched_duration_ms + db_txn_duration_sec = context.db_txn_duration_sec + db_sched_duration_sec = context.db_sched_duration_sec except Exception: ru_utime, ru_stime = (0, 0) - db_txn_count, db_txn_duration_ms = (0, 0) + db_txn_count, db_txn_duration_sec = (0, 0) - end_time = int(time.time() * 1000) + end_time = time.time() self.site.access_logger.info( "%s - %s - {%s}" - " Processed request: %dms (%dms, %dms) (%dms/%dms/%d)" + " Processed request: %.3fsec (%.3fsec, %.3fsec) (%.3fsec/%.3fsec/%d)" " %sB %s \"%s %s %s\" \"%s\"", self.getClientIP(), self.site.site_tag, self.authenticated_entity, end_time - self.start_time, - int(ru_utime * 1000), - int(ru_stime * 1000), - db_sched_duration_ms, - db_txn_duration_ms, + ru_utime, + ru_stime, + db_sched_duration_sec, + db_txn_duration_sec, int(db_txn_count), self.sentLength, self.code, diff --git a/synapse/metrics/__init__.py b/synapse/metrics/__init__.py index e33ed6c9be..0c557a43f6 100644 --- a/synapse/metrics/__init__.py +++ b/synapse/metrics/__init__.py @@ -122,10 +122,10 @@ REGISTRY.register(CPUMetrics()) gc_unreachable = Gauge("python_gc_unreachable_total", "Unreachable GC objects", ["gen"]) gc_time = Histogram( "python_gc_time", - "Time taken to GC (ms)", + "Time taken to GC (sec)", ["gen"], - buckets=[2.5, 5, 10, 25, 50, 100, 250, 500, 1000, 2500, 5000, 7500, 15000, - 30000, 45000, 60000], + buckets=[0.0025, 0.005, 0.01, 0.025, 0.05, 0.10, 0.25, 0.50, 1.00, 2.50, + 5.00, 7.50, 15.00, 30.00, 45.00, 60.00], ) @@ -147,8 +147,9 @@ REGISTRY.register(GCCounts()) tick_time = Histogram( "python_twisted_reactor_tick_time", - "Tick time of the Twisted reactor (ms)", - buckets=[1, 2, 5, 10, 50, 100, 250, 500, 1000, 2000], + "Tick time of the Twisted reactor (sec)", + buckets=[0.001, 0.002, 0.005, 0.001, 0.005, 0.01. 0.025, 0.05, 0.1, 0.2, + 0.5, 1, 2, 5], ) pending_calls_metric = Histogram( "python_twisted_reactor_pending_calls", @@ -202,9 +203,9 @@ def runUntilCurrentTimer(func): num_pending += 1 num_pending += len(reactor.threadCallQueue) - start = time.time() * 1000 + start = time.time() ret = func(*args, **kwargs) - end = time.time() * 1000 + end = time.time() # record the amount of wallclock time spent running pending calls. # This is a proxy for the actual amount of time between reactor polls, @@ -225,9 +226,9 @@ def runUntilCurrentTimer(func): if threshold[i] < counts[i]: logger.info("Collecting gc %d", i) - start = time.time() * 1000 + start = time.time() unreachable = gc.collect(i) - end = time.time() * 1000 + end = time.time() gc_time.labels(i).observe(end - start) gc_unreachable.labels(i).set(unreachable) diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index d963af5c89..22d6257a9f 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -42,10 +42,10 @@ sql_logger = logging.getLogger("synapse.storage.SQL") transaction_logger = logging.getLogger("synapse.storage.txn") perf_logger = logging.getLogger("synapse.storage.TIME") -sql_scheduling_timer = Histogram("synapse_storage_schedule_time", "") +sql_scheduling_timer = Histogram("synapse_storage_schedule_time", "sec") -sql_query_timer = Histogram("synapse_storage_query_time", "", ["verb"]) -sql_txn_timer = Histogram("synapse_storage_transaction_time", "", ["desc"]) +sql_query_timer = Histogram("synapse_storage_query_time", "sec", ["verb"]) +sql_txn_timer = Histogram("synapse_storage_transaction_time", "sec", ["desc"]) class LoggingTransaction(object): @@ -110,7 +110,7 @@ class LoggingTransaction(object): # Don't let logging failures stop SQL from working pass - start = time.time() * 1000 + start = time.time() try: return func( @@ -120,9 +120,9 @@ class LoggingTransaction(object): logger.debug("[SQL FAIL] {%s} %s", self.name, e) raise finally: - msecs = (time.time() * 1000) - start - sql_logger.debug("[SQL time] {%s} %f", self.name, msecs) - sql_query_timer.labels(sql.split()[0]).observe(msecs) + secs = time.time() - start + sql_logger.debug("[SQL time] {%s} %f sec", self.name, secs) + sql_query_timer.labels(sql.split()[0]).observe(secs) class PerformanceCounters(object): @@ -132,7 +132,7 @@ class PerformanceCounters(object): def update(self, key, start_time, end_time=None): if end_time is None: - end_time = time.time() * 1000 + end_time = time.time() duration = end_time - start_time count, cum_time = self.current_counters.get(key, (0, 0)) count += 1 @@ -222,7 +222,7 @@ class SQLBaseStore(object): def _new_transaction(self, conn, desc, after_callbacks, exception_callbacks, logging_context, func, *args, **kwargs): - start = time.time() * 1000 + start = time.time() txn_id = self._TXN_ID # We don't really need these to be unique, so lets stop it from @@ -282,13 +282,13 @@ class SQLBaseStore(object): logger.debug("[TXN FAIL] {%s} %s", name, e) raise finally: - end = time.time() * 1000 + end = time.time() duration = end - start if logging_context is not None: logging_context.add_database_transaction(duration) - transaction_logger.debug("[TXN END] {%s} %f", name, duration) + transaction_logger.debug("[TXN END] {%s} %f sec", name, duration) self._current_txn_total_time += duration self._txn_perf_counters.update(desc, start, end) @@ -349,13 +349,13 @@ class SQLBaseStore(object): """ current_context = LoggingContext.current_context() - start_time = time.time() * 1000 + start_time = time.time() def inner_func(conn, *args, **kwargs): with LoggingContext("runWithConnection") as context: - sched_duration_ms = time.time() * 1000 - start_time - sql_scheduling_timer.observe(sched_duration_ms) - current_context.add_database_scheduled(sched_duration_ms) + sched_duration_sec = time.time() - start_time + sql_scheduling_timer.observe(sched_duration_sec) + current_context.add_database_scheduled(sched_duration_sec) if self.database_engine.is_connection_closed(conn): logger.debug("Reconnecting closed database connection")