From cb8d568cf90bb0f5f07ee9e7e6796ad7cd83361f Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Wed, 3 Jul 2019 13:40:45 +0100 Subject: [PATCH] Fix 'utime went backwards' errors on daemonization. (#5609) * Fix 'utime went backwards' errors on daemonization. Fixes #5608 * remove spurious debug --- changelog.d/5609.bugfix | 1 + synapse/app/_base.py | 53 ++++++++++++++++++++------------------ synapse/util/logcontext.py | 17 +++++++++--- 3 files changed, 42 insertions(+), 29 deletions(-) create mode 100644 changelog.d/5609.bugfix diff --git a/changelog.d/5609.bugfix b/changelog.d/5609.bugfix new file mode 100644 index 0000000000..534ee22a1b --- /dev/null +++ b/changelog.d/5609.bugfix @@ -0,0 +1 @@ +Fix 'utime went backwards' errors on daemonization. diff --git a/synapse/app/_base.py b/synapse/app/_base.py index d50a9840d4..9b5c30f506 100644 --- a/synapse/app/_base.py +++ b/synapse/app/_base.py @@ -93,33 +93,36 @@ def start_reactor( install_dns_limiter(reactor) def run(): - # make sure that we run the reactor with the sentinel log context, - # otherwise other PreserveLoggingContext instances will get confused - # and complain when they see the logcontext arbitrarily swapping - # between the sentinel and `run` logcontexts. - with PreserveLoggingContext(): - logger.info("Running") + logger.info("Running") + change_resource_limit(soft_file_limit) + if gc_thresholds: + gc.set_threshold(*gc_thresholds) + reactor.run() - change_resource_limit(soft_file_limit) - if gc_thresholds: - gc.set_threshold(*gc_thresholds) - reactor.run() + # make sure that we run the reactor with the sentinel log context, + # otherwise other PreserveLoggingContext instances will get confused + # and complain when they see the logcontext arbitrarily swapping + # between the sentinel and `run` logcontexts. + # + # We also need to drop the logcontext before forking if we're daemonizing, + # otherwise the cputime metrics get confused about the per-thread resource usage + # appearing to go backwards. + with PreserveLoggingContext(): + if daemonize: + if print_pidfile: + print(pid_file) - if daemonize: - if print_pidfile: - print(pid_file) - - daemon = Daemonize( - app=appname, - pid=pid_file, - action=run, - auto_close_fds=False, - verbose=True, - logger=logger, - ) - daemon.start() - else: - run() + daemon = Daemonize( + app=appname, + pid=pid_file, + action=run, + auto_close_fds=False, + verbose=True, + logger=logger, + ) + daemon.start() + else: + run() def quit_with_error(error_string): diff --git a/synapse/util/logcontext.py b/synapse/util/logcontext.py index 9e1b537804..30dfa1d6b2 100644 --- a/synapse/util/logcontext.py +++ b/synapse/util/logcontext.py @@ -52,6 +52,15 @@ except Exception: return None +# get an id for the current thread. +# +# threading.get_ident doesn't actually return an OS-level tid, and annoyingly, +# on Linux it actually returns the same value either side of a fork() call. However +# we only fork in one place, so it's not worth the hoop-jumping to get a real tid. +# +get_thread_id = threading.get_ident + + class ContextResourceUsage(object): """Object for tracking the resources used by a log context @@ -225,7 +234,7 @@ class LoggingContext(object): # became active. self.usage_start = None - self.main_thread = threading.current_thread() + self.main_thread = get_thread_id() self.request = None self.tag = "" self.alive = True @@ -318,7 +327,7 @@ class LoggingContext(object): record.request = self.request def start(self): - if threading.current_thread() is not self.main_thread: + if get_thread_id() != self.main_thread: logger.warning("Started logcontext %s on different thread", self) return @@ -328,7 +337,7 @@ class LoggingContext(object): self.usage_start = get_thread_resource_usage() def stop(self): - if threading.current_thread() is not self.main_thread: + if get_thread_id() != self.main_thread: logger.warning("Stopped logcontext %s on different thread", self) return @@ -355,7 +364,7 @@ class LoggingContext(object): # If we are on the correct thread and we're currently running then we # can include resource usage so far. - is_main_thread = threading.current_thread() is self.main_thread + is_main_thread = get_thread_id() == self.main_thread if self.alive and self.usage_start and is_main_thread: utime_delta, stime_delta = self._get_cputime() res.ru_utime += utime_delta