diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md index 2d56b084e2..c608d2c29b 100644 --- a/docs/SUMMARY.md +++ b/docs/SUMMARY.md @@ -85,7 +85,7 @@ - [Git Usage](development/git.md) - [Testing]() - [Demo scripts](development/demo.md) - - [OpenTracing](opentracing.md) + - [Tracing](tracing.md) - [Database Schemas](development/database_schema.md) - [Experimental features](development/experimental_features.md) - [Dependency management](development/dependencies.md) diff --git a/docs/opentracing.md b/docs/opentracing.md index abb94b565f..976605a4c7 100644 --- a/docs/opentracing.md +++ b/docs/opentracing.md @@ -1,94 +1,3 @@ # OpenTracing -## Background - -OpenTracing is a semi-standard being adopted by a number of distributed -tracing platforms. It is a common api for facilitating vendor-agnostic -tracing instrumentation. That is, we can use the OpenTracing api and -select one of a number of tracer implementations to do the heavy lifting -in the background. Our current selected implementation is Jaeger. - -OpenTracing is a tool which gives an insight into the causal -relationship of work done in and between servers. The servers each track -events and report them to a centralised server - in Synapse's case: -Jaeger. The basic unit used to represent events is the span. The span -roughly represents a single piece of work that was done and the time at -which it occurred. A span can have child spans, meaning that the work of -the child had to be completed for the parent span to complete, or it can -have follow-on spans which represent work that is undertaken as a result -of the parent but is not depended on by the parent to in order to -finish. - -Since this is undertaken in a distributed environment a request to -another server, such as an RPC or a simple GET, can be considered a span -(a unit or work) for the local server. This causal link is what -OpenTracing aims to capture and visualise. In order to do this metadata -about the local server's span, i.e the 'span context', needs to be -included with the request to the remote. - -It is up to the remote server to decide what it does with the spans it -creates. This is called the sampling policy and it can be configured -through Jaeger's settings. - -For OpenTracing concepts see -. - -For more information about Jaeger's implementation see - - -## Setting up OpenTracing - -To receive OpenTracing spans, start up a Jaeger server. This can be done -using docker like so: - -```sh -docker run -d --name jaeger \ - -p 6831:6831/udp \ - -p 6832:6832/udp \ - -p 5778:5778 \ - -p 16686:16686 \ - -p 14268:14268 \ - jaegertracing/all-in-one:1 -``` - -Latest documentation is probably at -https://www.jaegertracing.io/docs/latest/getting-started. - -## Enable OpenTracing in Synapse - -OpenTracing is not enabled by default. It must be enabled in the -homeserver config by adding the `opentracing` option to your config file. You can find -documentation about how to do this in the [config manual under the header 'Opentracing'](usage/configuration/config_documentation.md#opentracing). -See below for an example Opentracing configuration: - -```yaml -opentracing: - enabled: true - homeserver_whitelist: - - "mytrustedhomeserver.org" - - "*.myotherhomeservers.com" -``` - -## Homeserver whitelisting - -The homeserver whitelist is configured using regular expressions. A list -of regular expressions can be given and their union will be compared -when propagating any spans contexts to another homeserver. - -Though it's mostly safe to send and receive span contexts to and from -untrusted users since span contexts are usually opaque ids it can lead -to two problems, namely: - -- If the span context is marked as sampled by the sending homeserver - the receiver will sample it. Therefore two homeservers with wildly - different sampling policies could incur higher sampling counts than - intended. -- Sending servers can attach arbitrary data to spans, known as - 'baggage'. For safety this has been disabled in Synapse but that - doesn't prevent another server sending you baggage which will be - logged to OpenTracing's logs. - -## Configuring Jaeger - -Sampling strategies can be set as in this document: -. +Synapse now uses OpenTelemetry and the [documentation for tracing has moved](./tracing.md). diff --git a/docs/tracing.md b/docs/tracing.md new file mode 100644 index 0000000000..abb94b565f --- /dev/null +++ b/docs/tracing.md @@ -0,0 +1,94 @@ +# OpenTracing + +## Background + +OpenTracing is a semi-standard being adopted by a number of distributed +tracing platforms. It is a common api for facilitating vendor-agnostic +tracing instrumentation. That is, we can use the OpenTracing api and +select one of a number of tracer implementations to do the heavy lifting +in the background. Our current selected implementation is Jaeger. + +OpenTracing is a tool which gives an insight into the causal +relationship of work done in and between servers. The servers each track +events and report them to a centralised server - in Synapse's case: +Jaeger. The basic unit used to represent events is the span. The span +roughly represents a single piece of work that was done and the time at +which it occurred. A span can have child spans, meaning that the work of +the child had to be completed for the parent span to complete, or it can +have follow-on spans which represent work that is undertaken as a result +of the parent but is not depended on by the parent to in order to +finish. + +Since this is undertaken in a distributed environment a request to +another server, such as an RPC or a simple GET, can be considered a span +(a unit or work) for the local server. This causal link is what +OpenTracing aims to capture and visualise. In order to do this metadata +about the local server's span, i.e the 'span context', needs to be +included with the request to the remote. + +It is up to the remote server to decide what it does with the spans it +creates. This is called the sampling policy and it can be configured +through Jaeger's settings. + +For OpenTracing concepts see +. + +For more information about Jaeger's implementation see + + +## Setting up OpenTracing + +To receive OpenTracing spans, start up a Jaeger server. This can be done +using docker like so: + +```sh +docker run -d --name jaeger \ + -p 6831:6831/udp \ + -p 6832:6832/udp \ + -p 5778:5778 \ + -p 16686:16686 \ + -p 14268:14268 \ + jaegertracing/all-in-one:1 +``` + +Latest documentation is probably at +https://www.jaegertracing.io/docs/latest/getting-started. + +## Enable OpenTracing in Synapse + +OpenTracing is not enabled by default. It must be enabled in the +homeserver config by adding the `opentracing` option to your config file. You can find +documentation about how to do this in the [config manual under the header 'Opentracing'](usage/configuration/config_documentation.md#opentracing). +See below for an example Opentracing configuration: + +```yaml +opentracing: + enabled: true + homeserver_whitelist: + - "mytrustedhomeserver.org" + - "*.myotherhomeservers.com" +``` + +## Homeserver whitelisting + +The homeserver whitelist is configured using regular expressions. A list +of regular expressions can be given and their union will be compared +when propagating any spans contexts to another homeserver. + +Though it's mostly safe to send and receive span contexts to and from +untrusted users since span contexts are usually opaque ids it can lead +to two problems, namely: + +- If the span context is marked as sampled by the sending homeserver + the receiver will sample it. Therefore two homeservers with wildly + different sampling policies could incur higher sampling counts than + intended. +- Sending servers can attach arbitrary data to spans, known as + 'baggage'. For safety this has been disabled in Synapse but that + doesn't prevent another server sending you baggage which will be + logged to OpenTracing's logs. + +## Configuring Jaeger + +Sampling strategies can be set as in this document: +. diff --git a/synapse/api/auth.py b/synapse/api/auth.py index aa4ad9a506..7d429600af 100644 --- a/synapse/api/auth.py +++ b/synapse/api/auth.py @@ -164,7 +164,7 @@ class Auth: ) -> Requester: """Helper for get_user_by_req - Once get_user_by_req has set up the opentracing span, this does the actual work. + Once get_user_by_req has set up the tracing span, this does the actual work. """ try: ip_addr = request.getClientAddress().host diff --git a/synapse/federation/sender/transaction_manager.py b/synapse/federation/sender/transaction_manager.py index c819cdcf8b..3f2c8bcfa1 100644 --- a/synapse/federation/sender/transaction_manager.py +++ b/synapse/federation/sender/transaction_manager.py @@ -81,7 +81,7 @@ class TransactionManager: edus: List of EDUs to send """ - # Make a transaction-sending opentracing span. This span follows on from + # Make a transaction-sending tracing span. This span follows on from # all the edus in that transaction. This needs to be done since there is # no active span here, so if the edus were not received by the remote the # span would have no causality and it would be forgotten. diff --git a/synapse/federation/transport/server/_base.py b/synapse/federation/transport/server/_base.py index 7beafe33c1..c9b46111fe 100644 --- a/synapse/federation/transport/server/_base.py +++ b/synapse/federation/transport/server/_base.py @@ -310,7 +310,7 @@ class BaseFederationServlet: logger.warning("authenticate_request failed: %s", e) raise - # update the active opentracing span with the authenticated entity + # update the active tracing span with the authenticated entity set_attribute("authenticated_entity", str(origin)) # if the origin is authenticated and whitelisted, use its span context diff --git a/synapse/http/server.py b/synapse/http/server.py index 5b94e159d6..4ff2171a53 100644 --- a/synapse/http/server.py +++ b/synapse/http/server.py @@ -327,7 +327,7 @@ class HttpServer(Protocol): subsequent arguments will be any matched groups from the regex. This should return either tuple of (code, response), or None. servlet_classname (str): The name of the handler to be used in prometheus - and opentracing logs. + and tracing logs. """ @@ -338,7 +338,7 @@ class _AsyncResource(resource.Resource, metaclass=abc.ABCMeta): requests by method, or override `_async_render` to handle all requests. Args: - extract_context: Whether to attempt to extract the opentracing + extract_context: Whether to attempt to extract the tracing context from the request the servlet is handling. """ @@ -508,7 +508,7 @@ class JsonResource(DirectServeJsonResource): callback: The handler for the request. Usually a Servlet servlet_classname: The name of the handler to be used in prometheus - and opentracing logs. + and tracing logs. """ method_bytes = method.encode("utf-8") diff --git a/synapse/http/site.py b/synapse/http/site.py index a82c23174a..d82c046dd7 100644 --- a/synapse/http/site.py +++ b/synapse/http/site.py @@ -85,7 +85,7 @@ class SynapseRequest(Request): # server name, for client requests this is the Requester object. self._requester: Optional[Union[Requester, str]] = None - # An opentracing span for this request. Will be closed when the request is + # An tracing span for this request. Will be closed when the request is # completely processed. self._tracing_span: Optional["opentelemetry.trace.Span"] = None @@ -165,7 +165,7 @@ class SynapseRequest(Request): self.logcontext.request.authenticated_entity = authenticated_entity or requester def set_tracing_span(self, span: "opentelemetry.trace.Span") -> None: - """attach an opentracing span to this request + """attach an tracing span to this request Doing so will cause the span to be closed when we finish processing the request """ @@ -479,7 +479,7 @@ class SynapseRequest(Request): usage.evt_db_fetch_count, ) - # complete the opentracing span, if any. + # complete the tracing span, if any. if self._tracing_span: self._tracing_span.end() diff --git a/synapse/logging/tracing.py b/synapse/logging/tracing.py index cc5a8297c0..364524273a 100644 --- a/synapse/logging/tracing.py +++ b/synapse/logging/tracing.py @@ -573,7 +573,7 @@ def start_active_span_from_edu( Args: operation_name: The label for the chunk of time used to process the given edu. edu_content: an edu_content with a `context` field whose value is - canonical json for a dict which contains opentracing information. + canonical json for a dict which contains tracing information. """ if opentelemetry is None: return contextlib.nullcontext() # type: ignore[unreachable] @@ -731,14 +731,15 @@ def inject_trace_id_into_response_headers(response_headers: Headers) -> None: ) def get_active_span_text_map(destination: Optional[str] = None) -> Dict[str, str]: """ - Gets the active tracing Context as a dict. This can be used instead of manually - injecting a span into an empty carrier. + Gets the active tracing Context serialized as a dict. This can be used + instead of manually injecting a span into an empty carrier. Args: destination: the name of the remote server. Returns: - dict: the active span's context if opentracing is enabled, otherwise empty. + dict: the serialized active span's context if opentelemetry is enabled, otherwise + empty. """ if destination and not whitelisted_homeserver(destination): return {} @@ -758,7 +759,7 @@ def get_active_span_text_map(destination: Optional[str] = None) -> Dict[str, str def context_from_request( request: Request, ) -> Optional["opentelemetry.context.context.Context"]: - """Extract an opentracing context from the headers on an HTTP request + """Extract an opentelemetry context from the headers on an HTTP request This is useful when we have received an HTTP request from another part of our system, and want to link our spans to those of the remote system. @@ -779,7 +780,7 @@ def extract_text_map( carrier: Dict[str, str] ) -> Optional["opentelemetry.context.context.Context"]: """ - Wrapper method for opentracing's tracer.extract for TEXT_MAP. + Wrapper method for opentelemetry's propagator.extract for TEXT_MAP. Args: carrier: a dict possibly containing a context. diff --git a/synapse/storage/controllers/persist_events.py b/synapse/storage/controllers/persist_events.py index 0c379814c3..f34c067515 100644 --- a/synapse/storage/controllers/persist_events.py +++ b/synapse/storage/controllers/persist_events.py @@ -222,7 +222,7 @@ class _EventPeristenceQueue(Generic[_PersistResult]): ) queue.append(end_item) - # also add our active opentracing span to the item so that we get a link back + # also add our active tracing span to the item so that we get a link back span = get_active_span() if span: end_item.parent_tracing_span_contexts.append(span.get_span_context()) @@ -233,7 +233,7 @@ class _EventPeristenceQueue(Generic[_PersistResult]): # wait for the queue item to complete res = await make_deferred_yieldable(end_item.deferred.observe()) - # add another opentracing span which links to the persist trace. + # add another tracing span which links to the persist trace. with start_active_span( f"{task.name}_complete", links=[Link(end_item.tracing_span_context)], diff --git a/synapse/storage/databases/main/devices.py b/synapse/storage/databases/main/devices.py index c8fd2b4365..7ceb7a202b 100644 --- a/synapse/storage/databases/main/devices.py +++ b/synapse/storage/databases/main/devices.py @@ -401,8 +401,8 @@ class DeviceWorkerStore(EndToEndKeyWorkerStore): if update_stream_id > previous_update_stream_id: # FIXME If this overwrites an older update, this discards the - # previous OpenTracing context. - # It might make it harder to track down issues using OpenTracing. + # previous tracing context. + # It might make it harder to track down issues using tracing. # If there's a good reason why it doesn't matter, a comment here # about that would not hurt. query_map[key] = (update_stream_id, update_context) @@ -493,7 +493,7 @@ class DeviceWorkerStore(EndToEndKeyWorkerStore): destination: The host the device updates are intended for from_stream_id: The minimum stream_id to filter updates by, exclusive query_map: Dictionary mapping (user_id, device_id) to - (update stream_id, the relevant json-encoded opentracing context) + (update stream_id, the relevant json-encoded tracing context) Returns: List of objects representing a device update EDU. diff --git a/tests/logging/test_tracing.py b/tests/logging/test_tracing.py index 75b7745ab6..36736ea8c5 100644 --- a/tests/logging/test_tracing.py +++ b/tests/logging/test_tracing.py @@ -42,7 +42,7 @@ class TracingTestCase(TestCase): def setUp(self) -> None: # since this is a unit test, we don't really want to mess around with the - # global variables that power opentracing. We create our own tracer instance + # global variables that power opentelemetry. We create our own tracer instance # and test with it. self._tracer_provider = opentelemetry.sdk.trace.TracerProvider()