update metrics to be in seconds

This commit is contained in:
Amber Brown 2018-05-28 19:10:27 +10:00
parent 754826a830
commit a2eb5db4a0
4 changed files with 38 additions and 37 deletions

View file

@ -38,15 +38,15 @@ outgoing_responses_counter = Counter(
) )
response_timer = Histogram( response_timer = Histogram(
"synapse_http_server_response_time_seconds", "", ["method", "servlet", "tag"] "synapse_http_server_response_time_seconds", "sec", ["method", "servlet", "tag"]
) )
response_ru_utime = Counter( response_ru_utime = Counter(
"synapse_http_server_response_ru_utime_seconds", "", ["method", "servlet", "tag"] "synapse_http_server_response_ru_utime_seconds", "sec", ["method", "servlet", "tag"]
) )
response_ru_stime = Counter( response_ru_stime = Counter(
"synapse_http_server_response_ru_stime_seconds", "", ["method", "servlet", "tag"] "synapse_http_server_response_ru_stime_seconds", "sec", ["method", "servlet", "tag"]
) )
response_db_txn_count = Counter( response_db_txn_count = Counter(

View file

@ -83,7 +83,7 @@ class SynapseRequest(Request):
return Request.render(self, resrc) return Request.render(self, resrc)
def _started_processing(self, servlet_name): def _started_processing(self, servlet_name):
self.start_time = int(time.time() * 1000) self.start_time = time.time()
self.request_metrics = RequestMetrics() self.request_metrics = RequestMetrics()
self.request_metrics.start( self.request_metrics.start(
self.start_time, name=servlet_name, method=self.method, self.start_time, name=servlet_name, method=self.method,
@ -102,26 +102,26 @@ class SynapseRequest(Request):
context = LoggingContext.current_context() context = LoggingContext.current_context()
ru_utime, ru_stime = context.get_resource_usage() ru_utime, ru_stime = context.get_resource_usage()
db_txn_count = context.db_txn_count db_txn_count = context.db_txn_count
db_txn_duration_ms = context.db_txn_duration_ms db_txn_duration_sec = context.db_txn_duration_sec
db_sched_duration_ms = context.db_sched_duration_ms db_sched_duration_sec = context.db_sched_duration_sec
except Exception: except Exception:
ru_utime, ru_stime = (0, 0) ru_utime, ru_stime = (0, 0)
db_txn_count, db_txn_duration_ms = (0, 0) db_txn_count, db_txn_duration_sec = (0, 0)
end_time = int(time.time() * 1000) end_time = time.time()
self.site.access_logger.info( self.site.access_logger.info(
"%s - %s - {%s}" "%s - %s - {%s}"
" Processed request: %dms (%dms, %dms) (%dms/%dms/%d)" " Processed request: %.3fsec (%.3fsec, %.3fsec) (%.3fsec/%.3fsec/%d)"
" %sB %s \"%s %s %s\" \"%s\"", " %sB %s \"%s %s %s\" \"%s\"",
self.getClientIP(), self.getClientIP(),
self.site.site_tag, self.site.site_tag,
self.authenticated_entity, self.authenticated_entity,
end_time - self.start_time, end_time - self.start_time,
int(ru_utime * 1000), ru_utime,
int(ru_stime * 1000), ru_stime,
db_sched_duration_ms, db_sched_duration_sec,
db_txn_duration_ms, db_txn_duration_sec,
int(db_txn_count), int(db_txn_count),
self.sentLength, self.sentLength,
self.code, self.code,

View file

@ -122,10 +122,10 @@ REGISTRY.register(CPUMetrics())
gc_unreachable = Gauge("python_gc_unreachable_total", "Unreachable GC objects", ["gen"]) gc_unreachable = Gauge("python_gc_unreachable_total", "Unreachable GC objects", ["gen"])
gc_time = Histogram( gc_time = Histogram(
"python_gc_time", "python_gc_time",
"Time taken to GC (ms)", "Time taken to GC (sec)",
["gen"], ["gen"],
buckets=[2.5, 5, 10, 25, 50, 100, 250, 500, 1000, 2500, 5000, 7500, 15000, buckets=[0.0025, 0.005, 0.01, 0.025, 0.05, 0.10, 0.25, 0.50, 1.00, 2.50,
30000, 45000, 60000], 5.00, 7.50, 15.00, 30.00, 45.00, 60.00],
) )
@ -147,8 +147,9 @@ REGISTRY.register(GCCounts())
tick_time = Histogram( tick_time = Histogram(
"python_twisted_reactor_tick_time", "python_twisted_reactor_tick_time",
"Tick time of the Twisted reactor (ms)", "Tick time of the Twisted reactor (sec)",
buckets=[1, 2, 5, 10, 50, 100, 250, 500, 1000, 2000], buckets=[0.001, 0.002, 0.005, 0.001, 0.005, 0.01. 0.025, 0.05, 0.1, 0.2,
0.5, 1, 2, 5],
) )
pending_calls_metric = Histogram( pending_calls_metric = Histogram(
"python_twisted_reactor_pending_calls", "python_twisted_reactor_pending_calls",
@ -202,9 +203,9 @@ def runUntilCurrentTimer(func):
num_pending += 1 num_pending += 1
num_pending += len(reactor.threadCallQueue) num_pending += len(reactor.threadCallQueue)
start = time.time() * 1000 start = time.time()
ret = func(*args, **kwargs) ret = func(*args, **kwargs)
end = time.time() * 1000 end = time.time()
# record the amount of wallclock time spent running pending calls. # record the amount of wallclock time spent running pending calls.
# This is a proxy for the actual amount of time between reactor polls, # This is a proxy for the actual amount of time between reactor polls,
@ -225,9 +226,9 @@ def runUntilCurrentTimer(func):
if threshold[i] < counts[i]: if threshold[i] < counts[i]:
logger.info("Collecting gc %d", i) logger.info("Collecting gc %d", i)
start = time.time() * 1000 start = time.time()
unreachable = gc.collect(i) unreachable = gc.collect(i)
end = time.time() * 1000 end = time.time()
gc_time.labels(i).observe(end - start) gc_time.labels(i).observe(end - start)
gc_unreachable.labels(i).set(unreachable) gc_unreachable.labels(i).set(unreachable)

View file

@ -42,10 +42,10 @@ sql_logger = logging.getLogger("synapse.storage.SQL")
transaction_logger = logging.getLogger("synapse.storage.txn") transaction_logger = logging.getLogger("synapse.storage.txn")
perf_logger = logging.getLogger("synapse.storage.TIME") perf_logger = logging.getLogger("synapse.storage.TIME")
sql_scheduling_timer = Histogram("synapse_storage_schedule_time", "") sql_scheduling_timer = Histogram("synapse_storage_schedule_time", "sec")
sql_query_timer = Histogram("synapse_storage_query_time", "", ["verb"]) sql_query_timer = Histogram("synapse_storage_query_time", "sec", ["verb"])
sql_txn_timer = Histogram("synapse_storage_transaction_time", "", ["desc"]) sql_txn_timer = Histogram("synapse_storage_transaction_time", "sec", ["desc"])
class LoggingTransaction(object): class LoggingTransaction(object):
@ -110,7 +110,7 @@ class LoggingTransaction(object):
# Don't let logging failures stop SQL from working # Don't let logging failures stop SQL from working
pass pass
start = time.time() * 1000 start = time.time()
try: try:
return func( return func(
@ -120,9 +120,9 @@ class LoggingTransaction(object):
logger.debug("[SQL FAIL] {%s} %s", self.name, e) logger.debug("[SQL FAIL] {%s} %s", self.name, e)
raise raise
finally: finally:
msecs = (time.time() * 1000) - start secs = time.time() - start
sql_logger.debug("[SQL time] {%s} %f", self.name, msecs) sql_logger.debug("[SQL time] {%s} %f sec", self.name, secs)
sql_query_timer.labels(sql.split()[0]).observe(msecs) sql_query_timer.labels(sql.split()[0]).observe(secs)
class PerformanceCounters(object): class PerformanceCounters(object):
@ -132,7 +132,7 @@ class PerformanceCounters(object):
def update(self, key, start_time, end_time=None): def update(self, key, start_time, end_time=None):
if end_time is None: if end_time is None:
end_time = time.time() * 1000 end_time = time.time()
duration = end_time - start_time duration = end_time - start_time
count, cum_time = self.current_counters.get(key, (0, 0)) count, cum_time = self.current_counters.get(key, (0, 0))
count += 1 count += 1
@ -222,7 +222,7 @@ class SQLBaseStore(object):
def _new_transaction(self, conn, desc, after_callbacks, exception_callbacks, def _new_transaction(self, conn, desc, after_callbacks, exception_callbacks,
logging_context, func, *args, **kwargs): logging_context, func, *args, **kwargs):
start = time.time() * 1000 start = time.time()
txn_id = self._TXN_ID txn_id = self._TXN_ID
# We don't really need these to be unique, so lets stop it from # We don't really need these to be unique, so lets stop it from
@ -282,13 +282,13 @@ class SQLBaseStore(object):
logger.debug("[TXN FAIL] {%s} %s", name, e) logger.debug("[TXN FAIL] {%s} %s", name, e)
raise raise
finally: finally:
end = time.time() * 1000 end = time.time()
duration = end - start duration = end - start
if logging_context is not None: if logging_context is not None:
logging_context.add_database_transaction(duration) logging_context.add_database_transaction(duration)
transaction_logger.debug("[TXN END] {%s} %f", name, duration) transaction_logger.debug("[TXN END] {%s} %f sec", name, duration)
self._current_txn_total_time += duration self._current_txn_total_time += duration
self._txn_perf_counters.update(desc, start, end) self._txn_perf_counters.update(desc, start, end)
@ -349,13 +349,13 @@ class SQLBaseStore(object):
""" """
current_context = LoggingContext.current_context() current_context = LoggingContext.current_context()
start_time = time.time() * 1000 start_time = time.time()
def inner_func(conn, *args, **kwargs): def inner_func(conn, *args, **kwargs):
with LoggingContext("runWithConnection") as context: with LoggingContext("runWithConnection") as context:
sched_duration_ms = time.time() * 1000 - start_time sched_duration_sec = time.time() - start_time
sql_scheduling_timer.observe(sched_duration_ms) sql_scheduling_timer.observe(sched_duration_sec)
current_context.add_database_scheduled(sched_duration_ms) current_context.add_database_scheduled(sched_duration_sec)
if self.database_engine.is_connection_closed(conn): if self.database_engine.is_connection_closed(conn):
logger.debug("Reconnecting closed database connection") logger.debug("Reconnecting closed database connection")