mirror of
https://github.com/element-hq/synapse
synced 2024-09-20 02:55:10 +00:00
Easier to follow local vs remote span tracing
The `incoming-federation-request` vs `process-federation_request` was first introduced in https://github.com/matrix-org/synapse/pull/11870 - Span for remote trace: `incoming-federation-request` - `child_of` reference: `origin_span_context` - `follows_from` reference: `servlet_span` - Span for local trace: `process-federation-request` - `child_of` reference: `servlet_span` (by the nature of it being active) - `follows_from` reference: `incoming-federation-request`
This commit is contained in:
parent
786dd9b4b1
commit
7c135b93bd
2 changed files with 98 additions and 44 deletions
|
@ -27,6 +27,8 @@ from synapse.http.site import SynapseRequest
|
||||||
from synapse.logging.context import run_in_background
|
from synapse.logging.context import run_in_background
|
||||||
from synapse.logging.tracing import (
|
from synapse.logging.tracing import (
|
||||||
get_active_span,
|
get_active_span,
|
||||||
|
Link,
|
||||||
|
create_non_recording_span,
|
||||||
set_attribute,
|
set_attribute,
|
||||||
span_context_from_request,
|
span_context_from_request,
|
||||||
start_active_span,
|
start_active_span,
|
||||||
|
@ -313,29 +315,45 @@ class BaseFederationServlet:
|
||||||
|
|
||||||
# if the origin is authenticated and whitelisted, use its span context
|
# if the origin is authenticated and whitelisted, use its span context
|
||||||
# as the parent.
|
# as the parent.
|
||||||
context = None
|
origin_span_context = None
|
||||||
if origin and whitelisted_homeserver(origin):
|
if origin and whitelisted_homeserver(origin):
|
||||||
context = span_context_from_request(request)
|
origin_span_context = span_context_from_request(request)
|
||||||
|
|
||||||
if context:
|
if origin_span_context:
|
||||||
servlet_span = get_active_span()
|
local_servlet_span = get_active_span()
|
||||||
# a scope which uses the origin's context as a parent
|
# Create a span which uses the `origin_span_context` as a parent
|
||||||
processing_start_time = time.time()
|
# so we can see how the incoming payload was processed while
|
||||||
scope = start_active_span_follows_from(
|
# we're looking at the outgoing trace. Since the parent is set
|
||||||
|
# to a remote span (from the origin), it won't show up in the
|
||||||
|
# local trace which is why we create another span below for the
|
||||||
|
# local trace. A span can only have one parent so we have to
|
||||||
|
# create two separate ones.
|
||||||
|
remote_parent_span = start_active_span(
|
||||||
"incoming-federation-request",
|
"incoming-federation-request",
|
||||||
child_of=context,
|
context=origin_span_context,
|
||||||
contexts=(servlet_span,),
|
# Cross-link back to the local trace so we can jump
|
||||||
start_time=processing_start_time,
|
# to the incoming side from the remote origin trace.
|
||||||
|
links=[Link(local_servlet_span.get_span_context())],
|
||||||
|
)
|
||||||
|
|
||||||
|
# Create a local span to appear in the local trace
|
||||||
|
local_parent_span = start_active_span(
|
||||||
|
"process-federation-request",
|
||||||
|
# Cross-link back to the remote outgoing trace so we jump over
|
||||||
|
# there.
|
||||||
|
links=[Link(remote_parent_span.get_span_context())],
|
||||||
)
|
)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
# just use our context as a parent
|
# Otherwise just use our local context as a parent
|
||||||
scope = start_active_span(
|
local_parent_span = start_active_span(
|
||||||
"incoming-federation-request",
|
"process-federation-request",
|
||||||
)
|
)
|
||||||
|
|
||||||
try:
|
# Don't need to record anything for the remote
|
||||||
with scope:
|
remote_parent_span = create_non_recording_span()
|
||||||
|
|
||||||
|
with remote_parent_span, local_parent_span:
|
||||||
if origin and self.RATELIMIT:
|
if origin and self.RATELIMIT:
|
||||||
with ratelimiter.ratelimit(origin) as d:
|
with ratelimiter.ratelimit(origin) as d:
|
||||||
await d
|
await d
|
||||||
|
@ -352,16 +370,6 @@ class BaseFederationServlet:
|
||||||
response = await func(
|
response = await func(
|
||||||
origin, content, request.args, *args, **kwargs
|
origin, content, request.args, *args, **kwargs
|
||||||
)
|
)
|
||||||
finally:
|
|
||||||
# if we used the origin's context as the parent, add a new span using
|
|
||||||
# the servlet span as a parent, so that we have a link
|
|
||||||
if context:
|
|
||||||
scope2 = start_active_span_follows_from(
|
|
||||||
"process-federation_request",
|
|
||||||
contexts=(scope.span,),
|
|
||||||
start_time=processing_start_time,
|
|
||||||
)
|
|
||||||
scope2.close()
|
|
||||||
|
|
||||||
return response
|
return response
|
||||||
|
|
||||||
|
|
|
@ -162,6 +162,7 @@ Gotchas
|
||||||
than one caller? Will all of those calling functions have be in a context
|
than one caller? Will all of those calling functions have be in a context
|
||||||
with an active span?
|
with an active span?
|
||||||
"""
|
"""
|
||||||
|
from abc import ABC
|
||||||
import contextlib
|
import contextlib
|
||||||
import enum
|
import enum
|
||||||
import inspect
|
import inspect
|
||||||
|
@ -204,7 +205,7 @@ if TYPE_CHECKING:
|
||||||
|
|
||||||
# Helper class
|
# Helper class
|
||||||
|
|
||||||
# Always returns the fixed value given for any accessed property
|
# This will always returns the fixed value given for any accessed property
|
||||||
class _DummyLookup(object):
|
class _DummyLookup(object):
|
||||||
def __init__(self, value):
|
def __init__(self, value):
|
||||||
self.value = value
|
self.value = value
|
||||||
|
@ -213,6 +214,21 @@ class _DummyLookup(object):
|
||||||
return object.__getattribute__(self, "value")
|
return object.__getattribute__(self, "value")
|
||||||
|
|
||||||
|
|
||||||
|
class DummyLink(ABC):
|
||||||
|
def __init__(self):
|
||||||
|
self.not_implemented_message = (
|
||||||
|
"opentelemetry wasn't imported so this is just a dummy link placeholder"
|
||||||
|
)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def context(self):
|
||||||
|
raise NotImplementedError(self.not_implemented_message)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def context(self):
|
||||||
|
raise NotImplementedError(self.not_implemented_message)
|
||||||
|
|
||||||
|
|
||||||
# These dependencies are optional so they can fail to import
|
# These dependencies are optional so they can fail to import
|
||||||
# and we
|
# and we
|
||||||
try:
|
try:
|
||||||
|
@ -229,12 +245,13 @@ try:
|
||||||
SpanKind = opentelemetry.trace.SpanKind
|
SpanKind = opentelemetry.trace.SpanKind
|
||||||
SpanAttributes = opentelemetry.semconv.trace.SpanAttributes
|
SpanAttributes = opentelemetry.semconv.trace.SpanAttributes
|
||||||
StatusCode = opentelemetry.trace.StatusCode
|
StatusCode = opentelemetry.trace.StatusCode
|
||||||
|
Link = opentelemetry.trace.Link
|
||||||
except ImportError:
|
except ImportError:
|
||||||
opentelemetry = None # type: ignore[assignment]
|
opentelemetry = None # type: ignore[assignment]
|
||||||
SpanKind = _DummyLookup(0)
|
SpanKind = _DummyLookup(0)
|
||||||
SpanAttributes = _DummyLookup("fake-attribute")
|
SpanAttributes = _DummyLookup("fake-attribute")
|
||||||
StatusCode = _DummyLookup(0)
|
StatusCode = _DummyLookup(0)
|
||||||
|
Link = DummyLink
|
||||||
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
@ -435,6 +452,15 @@ def whitelisted_homeserver(destination: str) -> bool:
|
||||||
# Start spans and scopes
|
# Start spans and scopes
|
||||||
|
|
||||||
|
|
||||||
|
def create_non_recording_span():
|
||||||
|
if opentelemetry is None:
|
||||||
|
return contextlib.nullcontext() # type: ignore[unreachable]
|
||||||
|
|
||||||
|
return opentelemetry.trace.NonRecordingSpan(
|
||||||
|
opentelemetry.trace.INVALID_SPAN_CONTEXT
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def start_active_span(
|
def start_active_span(
|
||||||
name: str,
|
name: str,
|
||||||
*,
|
*,
|
||||||
|
@ -446,11 +472,15 @@ def start_active_span(
|
||||||
record_exception: bool = True,
|
record_exception: bool = True,
|
||||||
set_status_on_exception: bool = True,
|
set_status_on_exception: bool = True,
|
||||||
end_on_exit: bool = True,
|
end_on_exit: bool = True,
|
||||||
|
# For testing only
|
||||||
|
tracer: Optional["opentelemetry.sdk.trace.TracerProvider"] = None,
|
||||||
) -> Iterator["opentelemetry.trace.span.Span"]:
|
) -> Iterator["opentelemetry.trace.span.Span"]:
|
||||||
if opentelemetry is None:
|
if opentelemetry is None:
|
||||||
return contextlib.nullcontext() # type: ignore[unreachable]
|
return contextlib.nullcontext() # type: ignore[unreachable]
|
||||||
|
|
||||||
|
if tracer is None:
|
||||||
tracer = opentelemetry.trace.get_tracer(__name__)
|
tracer = opentelemetry.trace.get_tracer(__name__)
|
||||||
|
|
||||||
return tracer.start_as_current_span(
|
return tracer.start_as_current_span(
|
||||||
name=name,
|
name=name,
|
||||||
context=context,
|
context=context,
|
||||||
|
@ -464,6 +494,8 @@ def start_active_span(
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# TODO: I don't think we even need this function over the normal `start_active_span`.
|
||||||
|
# The only difference is the `inherit_force_tracing` stuff.
|
||||||
def start_active_span_follows_from(
|
def start_active_span_follows_from(
|
||||||
operation_name: str,
|
operation_name: str,
|
||||||
contexts: Collection,
|
contexts: Collection,
|
||||||
|
@ -491,8 +523,21 @@ def start_active_span_follows_from(
|
||||||
forced, the new span will also have tracing forced.
|
forced, the new span will also have tracing forced.
|
||||||
tracer: override the opentracing tracer. By default the global tracer is used.
|
tracer: override the opentracing tracer. By default the global tracer is used.
|
||||||
"""
|
"""
|
||||||
# TODO
|
if opentelemetry is None:
|
||||||
pass
|
return contextlib.nullcontext() # type: ignore[unreachable]
|
||||||
|
|
||||||
|
links = [opentelemetry.trace.Link(context) for context in contexts]
|
||||||
|
span = start_active_span(
|
||||||
|
name=operation_name,
|
||||||
|
links=links,
|
||||||
|
)
|
||||||
|
|
||||||
|
if inherit_force_tracing and any(
|
||||||
|
is_context_forced_tracing(ctx) for ctx in contexts
|
||||||
|
):
|
||||||
|
force_tracing(span)
|
||||||
|
|
||||||
|
return span
|
||||||
|
|
||||||
|
|
||||||
def start_active_span_from_edu(
|
def start_active_span_from_edu(
|
||||||
|
@ -529,7 +574,7 @@ def set_attribute(key: str, value: Union[str, bool, int, float]) -> None:
|
||||||
|
|
||||||
@ensure_active_span("set the status")
|
@ensure_active_span("set the status")
|
||||||
def set_status(
|
def set_status(
|
||||||
status: "opentelemetry.trace.StatusCode", exc: Optional(Exception)
|
status: "opentelemetry.trace.StatusCode", exc: Optional[Exception]
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Sets a tag on the active span"""
|
"""Sets a tag on the active span"""
|
||||||
active_span = get_active_span()
|
active_span = get_active_span()
|
||||||
|
@ -825,15 +870,16 @@ def trace_servlet(
|
||||||
}
|
}
|
||||||
|
|
||||||
request_name = request.request_metrics.name
|
request_name = request.request_metrics.name
|
||||||
context = span_context_from_request(request) if extract_context else None
|
span_context = span_context_from_request(request) if extract_context else None
|
||||||
|
|
||||||
# we configure the scope not to finish the span immediately on exit, and instead
|
# we configure the scope not to finish the span immediately on exit, and instead
|
||||||
# pass the span into the SynapseRequest, which will finish it once we've finished
|
# pass the span into the SynapseRequest, which will finish it once we've finished
|
||||||
# sending the response to the client.
|
# sending the response to the client.
|
||||||
|
|
||||||
with start_active_span(
|
with start_active_span(
|
||||||
request_name,
|
request_name,
|
||||||
kind=SpanKind.SERVER,
|
kind=SpanKind.SERVER,
|
||||||
context=context,
|
context=span_context,
|
||||||
end_on_exit=False,
|
end_on_exit=False,
|
||||||
) as span:
|
) as span:
|
||||||
request.set_tracing_span(span)
|
request.set_tracing_span(span)
|
||||||
|
|
Loading…
Reference in a new issue