From e2c300e7e4647500b0ff5ada8018cc984b78f91c Mon Sep 17 00:00:00 2001 From: Michael Kaye <1917473+michaelkaye@users.noreply.github.com> Date: Fri, 26 Nov 2021 14:05:20 +0000 Subject: [PATCH] Create healthcheck script for synapse-workers container (#11429) The intent is to iterate through all the worker ports and only report healthy when all are healthy, starting with the main process. --- changelog.d/11429.docker | 1 + docker/Dockerfile-workers | 3 +++ docker/conf-workers/healthcheck.sh.j2 | 6 ++++++ docker/configure_workers_and_start.py | 13 +++++++++++++ 4 files changed, 23 insertions(+) create mode 100644 changelog.d/11429.docker create mode 100644 docker/conf-workers/healthcheck.sh.j2 diff --git a/changelog.d/11429.docker b/changelog.d/11429.docker new file mode 100644 index 0000000000..81db719ed6 --- /dev/null +++ b/changelog.d/11429.docker @@ -0,0 +1 @@ +Update `Dockerfile-workers` to healthcheck all workers in container. diff --git a/docker/Dockerfile-workers b/docker/Dockerfile-workers index 969cf97286..46f2e17382 100644 --- a/docker/Dockerfile-workers +++ b/docker/Dockerfile-workers @@ -21,3 +21,6 @@ VOLUME ["/data"] # files to run the desired worker configuration. Will start supervisord. COPY ./docker/configure_workers_and_start.py /configure_workers_and_start.py ENTRYPOINT ["/configure_workers_and_start.py"] + +HEALTHCHECK --start-period=5s --interval=15s --timeout=5s \ + CMD /bin/sh /healthcheck.sh diff --git a/docker/conf-workers/healthcheck.sh.j2 b/docker/conf-workers/healthcheck.sh.j2 new file mode 100644 index 0000000000..79c621f89c --- /dev/null +++ b/docker/conf-workers/healthcheck.sh.j2 @@ -0,0 +1,6 @@ +#!/bin/sh +# This healthcheck script is designed to return OK when every +# host involved returns OK +{%- for healthcheck_url in healthcheck_urls %} +curl -fSs {{ healthcheck_url }} || exit 1 +{%- endfor %} diff --git a/docker/configure_workers_and_start.py b/docker/configure_workers_and_start.py index f4ac1c22a4..adbb551cee 100755 --- a/docker/configure_workers_and_start.py +++ b/docker/configure_workers_and_start.py @@ -474,10 +474,16 @@ def generate_worker_files(environ, config_path: str, data_dir: str): # Determine the load-balancing upstreams to configure nginx_upstream_config = "" + + # At the same time, prepare a list of internal endpoints to healthcheck + # starting with the main process which exists even if no workers do. + healthcheck_urls = ["http://localhost:8080/health"] + for upstream_worker_type, upstream_worker_ports in nginx_upstreams.items(): body = "" for port in upstream_worker_ports: body += " server localhost:%d;\n" % (port,) + healthcheck_urls.append("http://localhost:%d/health" % (port,)) # Add to the list of configured upstreams nginx_upstream_config += NGINX_UPSTREAM_CONFIG_BLOCK.format( @@ -510,6 +516,13 @@ def generate_worker_files(environ, config_path: str, data_dir: str): worker_config=supervisord_config, ) + # healthcheck config + convert( + "/conf/healthcheck.sh.j2", + "/healthcheck.sh", + healthcheck_urls=healthcheck_urls, + ) + # Ensure the logging directory exists log_dir = data_dir + "/logs" if not os.path.exists(log_dir):