From c9964ba6004903b0cfd76d245b121d9df6cb791c Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Wed, 24 Jul 2019 15:27:53 +0100 Subject: [PATCH 001/110] Return dicts from _fetch_event_list --- synapse/storage/events_worker.py | 42 ++++++++++++++++++++------------ 1 file changed, 27 insertions(+), 15 deletions(-) diff --git a/synapse/storage/events_worker.py b/synapse/storage/events_worker.py index 79680ee856..5b606bec7c 100644 --- a/synapse/storage/events_worker.py +++ b/synapse/storage/events_worker.py @@ -17,6 +17,7 @@ from __future__ import division import itertools import logging +import operator from collections import namedtuple from canonicaljson import json @@ -421,28 +422,28 @@ class EventsWorkerStore(SQLBaseStore): The fetch requests. Each entry consists of a list of event ids to be fetched, and a deferred to be completed once the events have been fetched. + + The deferreds are callbacked with a dictionary mapping from event id + to event row. Note that it may well contain additional events that + were not part of this request. """ with Measure(self._clock, "_fetch_event_list"): try: - event_id_lists = list(zip(*event_list))[0] - event_ids = [item for sublist in event_id_lists for item in sublist] + events_to_fetch = set( + event_id for events, _ in event_list for event_id in events + ) row_dict = self._new_transaction( - conn, "do_fetch", [], [], self._fetch_event_rows, event_ids + conn, "do_fetch", [], [], self._fetch_event_rows, events_to_fetch ) # We only want to resolve deferreds from the main thread - def fire(lst, res): - for ids, d in lst: - if not d.called: - try: - with PreserveLoggingContext(): - d.callback([res[i] for i in ids if i in res]) - except Exception: - logger.exception("Failed to callback") + def fire(): + for _, d in event_list: + d.callback(row_dict) with PreserveLoggingContext(): - self.hs.get_reactor().callFromThread(fire, event_list, row_dict) + self.hs.get_reactor().callFromThread(fire) except Exception as e: logger.exception("do_fetch") @@ -461,6 +462,12 @@ class EventsWorkerStore(SQLBaseStore): """Fetches events from the database using the _event_fetch_list. This allows batch and bulk fetching of events - it allows us to fetch events without having to create a new transaction for each request for events. + + Args: + events (Iterable[str]): events to be fetched. + + Returns: + Deferred[Dict[str, _EventCacheEntry]]: map from event id to result. """ if not events: return {} @@ -484,11 +491,16 @@ class EventsWorkerStore(SQLBaseStore): logger.debug("Loading %d events", len(events)) with PreserveLoggingContext(): - rows = yield events_d - logger.debug("Loaded %d events (%d rows)", len(events), len(rows)) + row_map = yield events_d + logger.debug("Loaded %d events (%d rows)", len(events), len(row_map)) + + rows = (row_map.get(event_id) for event_id in events) + + # filter out absent rows + rows = filter(operator.truth, rows) if not allow_rejected: - rows[:] = [r for r in rows if r["rejected_reason"] is None] + rows = (r for r in rows if r["rejected_reason"] is None) res = yield make_deferred_yieldable( defer.gatherResults( From e6a6c4fbab8f409a20422659dddc3b7437a2cc07 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Wed, 24 Jul 2019 16:37:50 +0100 Subject: [PATCH 002/110] split _get_events_from_db out of _enqueue_events --- synapse/storage/events_worker.py | 83 ++++++++++++++++++++------------ 1 file changed, 51 insertions(+), 32 deletions(-) diff --git a/synapse/storage/events_worker.py b/synapse/storage/events_worker.py index 5b606bec7c..6e5f1cf6ee 100644 --- a/synapse/storage/events_worker.py +++ b/synapse/storage/events_worker.py @@ -343,13 +343,12 @@ class EventsWorkerStore(SQLBaseStore): log_ctx = LoggingContext.current_context() log_ctx.record_event_fetch(len(missing_events_ids)) - # Note that _enqueue_events is also responsible for turning db rows + # Note that _get_events_from_db is also responsible for turning db rows # into FrozenEvents (via _get_event_from_row), which involves seeing if # the events have been redacted, and if so pulling the redaction event out # of the database to check it. # - # _enqueue_events is a bit of a rubbish name but naming is hard. - missing_events = yield self._enqueue_events( + missing_events = yield self._get_events_from_db( missing_events_ids, allow_rejected=allow_rejected ) @@ -458,43 +457,25 @@ class EventsWorkerStore(SQLBaseStore): self.hs.get_reactor().callFromThread(fire, event_list, e) @defer.inlineCallbacks - def _enqueue_events(self, events, allow_rejected=False): - """Fetches events from the database using the _event_fetch_list. This - allows batch and bulk fetching of events - it allows us to fetch events - without having to create a new transaction for each request for events. + def _get_events_from_db(self, event_ids, allow_rejected=False): + """Fetch a bunch of events from the database. + + Returned events will be added to the cache for future lookups. Args: - events (Iterable[str]): events to be fetched. + event_ids (Iterable[str]): The event_ids of the events to fetch + allow_rejected (bool): Whether to include rejected events Returns: - Deferred[Dict[str, _EventCacheEntry]]: map from event id to result. + Deferred[Dict[str, _EventCacheEntry]]: + map from event id to result. """ - if not events: + if not event_ids: return {} - events_d = defer.Deferred() - with self._event_fetch_lock: - self._event_fetch_list.append((events, events_d)) + row_map = yield self._enqueue_events(event_ids) - self._event_fetch_lock.notify() - - if self._event_fetch_ongoing < EVENT_QUEUE_THREADS: - self._event_fetch_ongoing += 1 - should_start = True - else: - should_start = False - - if should_start: - run_as_background_process( - "fetch_events", self.runWithConnection, self._do_fetch - ) - - logger.debug("Loading %d events", len(events)) - with PreserveLoggingContext(): - row_map = yield events_d - logger.debug("Loaded %d events (%d rows)", len(events), len(row_map)) - - rows = (row_map.get(event_id) for event_id in events) + rows = (row_map.get(event_id) for event_id in event_ids) # filter out absent rows rows = filter(operator.truth, rows) @@ -521,6 +502,44 @@ class EventsWorkerStore(SQLBaseStore): return {e.event.event_id: e for e in res if e} + @defer.inlineCallbacks + def _enqueue_events(self, events): + """Fetches events from the database using the _event_fetch_list. This + allows batch and bulk fetching of events - it allows us to fetch events + without having to create a new transaction for each request for events. + + Args: + events (Iterable[str]): events to be fetched. + + Returns: + Deferred[Dict[str, Dict]]: map from event id to row data from the database. + May contain events that weren't requested. + """ + + events_d = defer.Deferred() + with self._event_fetch_lock: + self._event_fetch_list.append((events, events_d)) + + self._event_fetch_lock.notify() + + if self._event_fetch_ongoing < EVENT_QUEUE_THREADS: + self._event_fetch_ongoing += 1 + should_start = True + else: + should_start = False + + if should_start: + run_as_background_process( + "fetch_events", self.runWithConnection, self._do_fetch + ) + + logger.debug("Loading %d events: %s", len(events), events) + with PreserveLoggingContext(): + row_map = yield events_d + logger.debug("Loaded %d events (%d rows)", len(events), len(row_map)) + + return row_map + def _fetch_event_rows(self, txn, event_ids): """Fetch event rows from the database From 448bcfd0f9975e7f26e1e493e269262140dd7dc7 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Wed, 24 Jul 2019 16:44:10 +0100 Subject: [PATCH 003/110] recursively fetch redactions --- synapse/storage/events_worker.py | 68 +++++++++++++++++--------------- 1 file changed, 36 insertions(+), 32 deletions(-) diff --git a/synapse/storage/events_worker.py b/synapse/storage/events_worker.py index 6e5f1cf6ee..e15e7d86fe 100644 --- a/synapse/storage/events_worker.py +++ b/synapse/storage/events_worker.py @@ -17,7 +17,6 @@ from __future__ import division import itertools import logging -import operator from collections import namedtuple from canonicaljson import json @@ -30,12 +29,7 @@ from synapse.api.room_versions import EventFormatVersions from synapse.events import FrozenEvent, event_type_from_format_version # noqa: F401 from synapse.events.snapshot import EventContext # noqa: F401 from synapse.events.utils import prune_event -from synapse.logging.context import ( - LoggingContext, - PreserveLoggingContext, - make_deferred_yieldable, - run_in_background, -) +from synapse.logging.context import LoggingContext, PreserveLoggingContext from synapse.metrics.background_process_metrics import run_as_background_process from synapse.types import get_domain_from_id from synapse.util import batch_iter @@ -468,39 +462,49 @@ class EventsWorkerStore(SQLBaseStore): Returns: Deferred[Dict[str, _EventCacheEntry]]: - map from event id to result. + map from event id to result. May return extra events which + weren't asked for. """ - if not event_ids: - return {} + fetched_events = {} + events_to_fetch = event_ids - row_map = yield self._enqueue_events(event_ids) + while events_to_fetch: + row_map = yield self._enqueue_events(events_to_fetch) - rows = (row_map.get(event_id) for event_id in event_ids) + # we need to recursively fetch any redactions of those events + redaction_ids = set() + for event_id in events_to_fetch: + row = row_map.get(event_id) + fetched_events[event_id] = row + if row: + redaction_ids.update(row["redactions"]) - # filter out absent rows - rows = filter(operator.truth, rows) + events_to_fetch = redaction_ids.difference(fetched_events.keys()) + if events_to_fetch: + logger.debug("Also fetching redaction events %s", events_to_fetch) - if not allow_rejected: - rows = (r for r in rows if r["rejected_reason"] is None) + result_map = {} + for event_id, row in fetched_events.items(): + if not row: + continue + assert row["event_id"] == event_id - res = yield make_deferred_yieldable( - defer.gatherResults( - [ - run_in_background( - self._get_event_from_row, - row["internal_metadata"], - row["json"], - row["redactions"], - rejected_reason=row["rejected_reason"], - format_version=row["format_version"], - ) - for row in rows - ], - consumeErrors=True, + rejected_reason = row["rejected_reason"] + + if not allow_rejected and rejected_reason: + continue + + cache_entry = yield self._get_event_from_row( + row["internal_metadata"], + row["json"], + row["redactions"], + rejected_reason=row["rejected_reason"], + format_version=row["format_version"], ) - ) - return {e.event.event_id: e for e in res if e} + result_map[event_id] = cache_entry + + return result_map @defer.inlineCallbacks def _enqueue_events(self, events): From 4e97eb89e5ed4517e5967a49acf6db987bb96d51 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Wed, 24 Jul 2019 22:45:35 +0100 Subject: [PATCH 004/110] Handle loops in redaction events --- synapse/storage/events_worker.py | 96 ++++++++++++-------------------- tests/storage/test_redaction.py | 70 +++++++++++++++++++++++ 2 files changed, 106 insertions(+), 60 deletions(-) diff --git a/synapse/storage/events_worker.py b/synapse/storage/events_worker.py index e15e7d86fe..c6fa7f82fd 100644 --- a/synapse/storage/events_worker.py +++ b/synapse/storage/events_worker.py @@ -483,7 +483,8 @@ class EventsWorkerStore(SQLBaseStore): if events_to_fetch: logger.debug("Also fetching redaction events %s", events_to_fetch) - result_map = {} + # build a map from event_id to EventBase + event_map = {} for event_id, row in fetched_events.items(): if not row: continue @@ -494,14 +495,37 @@ class EventsWorkerStore(SQLBaseStore): if not allow_rejected and rejected_reason: continue - cache_entry = yield self._get_event_from_row( - row["internal_metadata"], - row["json"], - row["redactions"], - rejected_reason=row["rejected_reason"], - format_version=row["format_version"], + d = json.loads(row["json"]) + internal_metadata = json.loads(row["internal_metadata"]) + + format_version = row["format_version"] + if format_version is None: + # This means that we stored the event before we had the concept + # of a event format version, so it must be a V1 event. + format_version = EventFormatVersions.V1 + + original_ev = event_type_from_format_version(format_version)( + event_dict=d, + internal_metadata_dict=internal_metadata, + rejected_reason=rejected_reason, ) + event_map[event_id] = original_ev + + # finally, we can decide whether each one nededs redacting, and build + # the cache entries. + result_map = {} + for event_id, original_ev in event_map.items(): + redactions = fetched_events[event_id]["redactions"] + redacted_event = self._maybe_redact_event_row( + original_ev, redactions, event_map + ) + + cache_entry = _EventCacheEntry( + event=original_ev, redacted_event=redacted_event + ) + + self._get_event_cache.prefill((event_id,), cache_entry) result_map[event_id] = cache_entry return result_map @@ -615,50 +639,7 @@ class EventsWorkerStore(SQLBaseStore): return event_dict - @defer.inlineCallbacks - def _get_event_from_row( - self, internal_metadata, js, redactions, format_version, rejected_reason=None - ): - """Parse an event row which has been read from the database - - Args: - internal_metadata (str): json-encoded internal_metadata column - js (str): json-encoded event body from event_json - redactions (list[str]): a list of the events which claim to have redacted - this event, from the redactions table - format_version: (str): the 'format_version' column - rejected_reason (str|None): the reason this event was rejected, if any - - Returns: - _EventCacheEntry - """ - with Measure(self._clock, "_get_event_from_row"): - d = json.loads(js) - internal_metadata = json.loads(internal_metadata) - - if format_version is None: - # This means that we stored the event before we had the concept - # of a event format version, so it must be a V1 event. - format_version = EventFormatVersions.V1 - - original_ev = event_type_from_format_version(format_version)( - event_dict=d, - internal_metadata_dict=internal_metadata, - rejected_reason=rejected_reason, - ) - - redacted_event = yield self._maybe_redact_event_row(original_ev, redactions) - - cache_entry = _EventCacheEntry( - event=original_ev, redacted_event=redacted_event - ) - - self._get_event_cache.prefill((original_ev.event_id,), cache_entry) - - return cache_entry - - @defer.inlineCallbacks - def _maybe_redact_event_row(self, original_ev, redactions): + def _maybe_redact_event_row(self, original_ev, redactions, event_map): """Given an event object and a list of possible redacting event ids, determine whether to honour any of those redactions and if so return a redacted event. @@ -666,6 +647,8 @@ class EventsWorkerStore(SQLBaseStore): Args: original_ev (EventBase): redactions (iterable[str]): list of event ids of potential redaction events + event_map (dict[str, EventBase]): other events which have been fetched, in + which we can look up the redaaction events. Map from event id to event. Returns: Deferred[EventBase|None]: if the event should be redacted, a pruned @@ -675,15 +658,9 @@ class EventsWorkerStore(SQLBaseStore): # we choose to ignore redactions of m.room.create events. return None - if original_ev.type == "m.room.redaction": - # ... and redaction events - return None - - redaction_map = yield self._get_events_from_cache_or_db(redactions) - for redaction_id in redactions: - redaction_entry = redaction_map.get(redaction_id) - if not redaction_entry: + redaction_event = event_map.get(redaction_id) + if not redaction_event or redaction_event.rejected_reason: # we don't have the redaction event, or the redaction event was not # authorized. logger.debug( @@ -693,7 +670,6 @@ class EventsWorkerStore(SQLBaseStore): ) continue - redaction_event = redaction_entry.event if redaction_event.room_id != original_ev.room_id: logger.debug( "%s was redacted by %s but redaction was in a different room!", diff --git a/tests/storage/test_redaction.py b/tests/storage/test_redaction.py index 8488b6edc8..d961b81d48 100644 --- a/tests/storage/test_redaction.py +++ b/tests/storage/test_redaction.py @@ -17,6 +17,8 @@ from mock import Mock +from twisted.internet import defer + from synapse.api.constants import EventTypes, Membership from synapse.api.room_versions import RoomVersions from synapse.types import RoomID, UserID @@ -216,3 +218,71 @@ class RedactionTestCase(unittest.HomeserverTestCase): }, event.unsigned["redacted_because"], ) + + def test_circular_redaction(self): + redaction_event_id1 = "$redaction1_id:test" + redaction_event_id2 = "$redaction2_id:test" + + class EventIdManglingBuilder: + def __init__(self, base_builder, event_id): + self._base_builder = base_builder + self._event_id = event_id + + @defer.inlineCallbacks + def build(self, prev_event_ids): + built_event = yield self._base_builder.build(prev_event_ids) + built_event.event_id = self._event_id + built_event._event_dict["event_id"] = self._event_id + return built_event + + @property + def room_id(self): + return self._base_builder.room_id + + event_1, context_1 = self.get_success( + self.event_creation_handler.create_new_client_event( + EventIdManglingBuilder( + self.event_builder_factory.for_room_version( + RoomVersions.V1, + { + "type": EventTypes.Redaction, + "sender": self.u_alice.to_string(), + "room_id": self.room1.to_string(), + "content": {"reason": "test"}, + "redacts": redaction_event_id2, + }, + ), + redaction_event_id1, + ) + ) + ) + + self.get_success(self.store.persist_event(event_1, context_1)) + + event_2, context_2 = self.get_success( + self.event_creation_handler.create_new_client_event( + EventIdManglingBuilder( + self.event_builder_factory.for_room_version( + RoomVersions.V1, + { + "type": EventTypes.Redaction, + "sender": self.u_alice.to_string(), + "room_id": self.room1.to_string(), + "content": {"reason": "test"}, + "redacts": redaction_event_id1, + }, + ), + redaction_event_id2, + ) + ) + ) + self.get_success(self.store.persist_event(event_2, context_2)) + + # fetch one of the redactions + fetched = self.get_success(self.store.get_event(redaction_event_id1)) + + # it should have been redacted + self.assertEqual(fetched.unsigned["redacted_by"], redaction_event_id2) + self.assertEqual( + fetched.unsigned["redacted_because"].event_id, redaction_event_id2 + ) From 5c3eecc70f3777561253afd89adf9fb974a27e69 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Thu, 25 Jul 2019 16:08:57 +0100 Subject: [PATCH 005/110] changelog --- changelog.d/5788.bugfix | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/5788.bugfix diff --git a/changelog.d/5788.bugfix b/changelog.d/5788.bugfix new file mode 100644 index 0000000000..5632f3cb99 --- /dev/null +++ b/changelog.d/5788.bugfix @@ -0,0 +1 @@ +Correctly handle redactions of redactions. From d02e41dcb299c7588bc9fa26bd0b5321fd7c5751 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 1 Aug 2019 13:41:27 +0100 Subject: [PATCH 006/110] Handle pusher being deleted during processing. Instead of throwing a StoreError lets break out of processing loop and mark the pusher as stopped. --- synapse/push/emailpusher.py | 19 +++++++++++++------ synapse/push/httppusher.py | 27 ++++++++++++++++++++------- synapse/storage/pusher.py | 30 ++++++++++++++++++++++-------- 3 files changed, 55 insertions(+), 21 deletions(-) diff --git a/synapse/push/emailpusher.py b/synapse/push/emailpusher.py index 424ffa8b68..f688d4152d 100644 --- a/synapse/push/emailpusher.py +++ b/synapse/push/emailpusher.py @@ -234,13 +234,20 @@ class EmailPusher(object): return self.last_stream_ordering = last_stream_ordering - yield self.store.update_pusher_last_stream_ordering_and_success( - self.app_id, - self.email, - self.user_id, - last_stream_ordering, - self.clock.time_msec(), + pusher_still_exists = ( + yield self.store.update_pusher_last_stream_ordering_and_success( + self.app_id, + self.email, + self.user_id, + last_stream_ordering, + self.clock.time_msec(), + ) ) + if not pusher_still_exists: + # The pusher has been deleted while we were processing, so + # lets just stop and return. + self.on_stop() + return def seconds_until(self, ts_msec): secs = (ts_msec - self.clock.time_msec()) / 1000 diff --git a/synapse/push/httppusher.py b/synapse/push/httppusher.py index 5b15b0dbe7..bd5d53af91 100644 --- a/synapse/push/httppusher.py +++ b/synapse/push/httppusher.py @@ -199,13 +199,21 @@ class HttpPusher(object): http_push_processed_counter.inc() self.backoff_delay = HttpPusher.INITIAL_BACKOFF_SEC self.last_stream_ordering = push_action["stream_ordering"] - yield self.store.update_pusher_last_stream_ordering_and_success( - self.app_id, - self.pushkey, - self.user_id, - self.last_stream_ordering, - self.clock.time_msec(), + pusher_still_exists = ( + yield self.store.update_pusher_last_stream_ordering_and_success( + self.app_id, + self.pushkey, + self.user_id, + self.last_stream_ordering, + self.clock.time_msec(), + ) ) + if not pusher_still_exists: + # The pusher has been deleted while we were processing, so + # lets just stop and return. + self.on_stop() + return + if self.failing_since: self.failing_since = None yield self.store.update_pusher_failing_since( @@ -234,12 +242,17 @@ class HttpPusher(object): ) self.backoff_delay = HttpPusher.INITIAL_BACKOFF_SEC self.last_stream_ordering = push_action["stream_ordering"] - yield self.store.update_pusher_last_stream_ordering( + pusher_still_exists = yield self.store.update_pusher_last_stream_ordering( self.app_id, self.pushkey, self.user_id, self.last_stream_ordering, ) + if not pusher_still_exists: + # The pusher has been deleted while we were processing, so + # lets just stop and return. + self.on_stop() + return self.failing_since = None yield self.store.update_pusher_failing_since( diff --git a/synapse/storage/pusher.py b/synapse/storage/pusher.py index be3d4d9ded..888035fe86 100644 --- a/synapse/storage/pusher.py +++ b/synapse/storage/pusher.py @@ -308,22 +308,36 @@ class PusherStore(PusherWorkerStore): def update_pusher_last_stream_ordering_and_success( self, app_id, pushkey, user_id, last_stream_ordering, last_success ): - yield self._simple_update_one( - "pushers", - {"app_id": app_id, "pushkey": pushkey, "user_name": user_id}, - { + """Update the last stream ordering position we've processed up to for + the given pusher. + + Args: + app_id (str) + pushkey (str) + last_stream_ordering (int) + last_success (int) + + Returns: + Deferred[bool]: Whether the pusher stil exists or not. + """ + updated = yield self._simple_update( + table="pushers", + keyvalues={"app_id": app_id, "pushkey": pushkey, "user_name": user_id}, + updatevalues={ "last_stream_ordering": last_stream_ordering, "last_success": last_success, }, desc="update_pusher_last_stream_ordering_and_success", ) + return bool(updated) + @defer.inlineCallbacks def update_pusher_failing_since(self, app_id, pushkey, user_id, failing_since): - yield self._simple_update_one( - "pushers", - {"app_id": app_id, "pushkey": pushkey, "user_name": user_id}, - {"failing_since": failing_since}, + yield self._simple_update( + table="pushers", + keyvalues={"app_id": app_id, "pushkey": pushkey, "user_name": user_id}, + updatevalues={"failing_since": failing_since}, desc="update_pusher_failing_since", ) From 312cc48e2ba8bfb703ed9f55be76714179723f67 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 1 Aug 2019 13:45:09 +0100 Subject: [PATCH 007/110] Newsfile --- changelog.d/5809.misc | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/5809.misc diff --git a/changelog.d/5809.misc b/changelog.d/5809.misc new file mode 100644 index 0000000000..82a812480e --- /dev/null +++ b/changelog.d/5809.misc @@ -0,0 +1 @@ +Handle pusher being deleted during processing rather than logging an exception. From bf4db429209aa8ed6b8926bc8a17cbd1489d97f8 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 6 Aug 2019 13:27:22 +0100 Subject: [PATCH 008/110] Don't unnecessarily block notifying of new events. When persisting events we calculate new stream orderings up front. Before we notify about an event all events with lower stream orderings must have finished being persisted. This PR moves the assignment of stream ordering till *after* calculated the new current state and split the batch of events into separate chunks for persistence. This means that if it takes a long time to calculate new current state then it will not block events in other rooms being notified about. This should help reduce some global pauses in the events stream which can last for tens of seconds (if not longer), caused by some particularly expensive state resolutions. --- synapse/storage/events.py | 256 ++++++++++++++++++++------------------ 1 file changed, 135 insertions(+), 121 deletions(-) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 88c0180116..ac876287fc 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -364,147 +364,161 @@ class EventsStore( if not events_and_contexts: return - if backfilled: - stream_ordering_manager = self._backfill_id_gen.get_next_mult( - len(events_and_contexts) - ) - else: - stream_ordering_manager = self._stream_id_gen.get_next_mult( - len(events_and_contexts) - ) + chunks = [ + events_and_contexts[x : x + 100] + for x in range(0, len(events_and_contexts), 100) + ] - with stream_ordering_manager as stream_orderings: - for (event, context), stream in zip(events_and_contexts, stream_orderings): - event.internal_metadata.stream_ordering = stream + for chunk in chunks: + # We can't easily parallelize these since different chunks + # might contain the same event. :( - chunks = [ - events_and_contexts[x : x + 100] - for x in range(0, len(events_and_contexts), 100) - ] + # NB: Assumes that we are only persisting events for one room + # at a time. - for chunk in chunks: - # We can't easily parallelize these since different chunks - # might contain the same event. :( + # map room_id->list[event_ids] giving the new forward + # extremities in each room + new_forward_extremeties = {} - # NB: Assumes that we are only persisting events for one room - # at a time. + # map room_id->(type,state_key)->event_id tracking the full + # state in each room after adding these events. + # This is simply used to prefill the get_current_state_ids + # cache + current_state_for_room = {} - # map room_id->list[event_ids] giving the new forward - # extremities in each room - new_forward_extremeties = {} + # map room_id->(to_delete, to_insert) where to_delete is a list + # of type/state keys to remove from current state, and to_insert + # is a map (type,key)->event_id giving the state delta in each + # room + state_delta_for_room = {} - # map room_id->(type,state_key)->event_id tracking the full - # state in each room after adding these events. - # This is simply used to prefill the get_current_state_ids - # cache - current_state_for_room = {} + if not backfilled: + with Measure(self._clock, "_calculate_state_and_extrem"): + # Work out the new "current state" for each room. + # We do this by working out what the new extremities are and then + # calculating the state from that. + events_by_room = {} + for event, context in chunk: + events_by_room.setdefault(event.room_id, []).append( + (event, context) + ) - # map room_id->(to_delete, to_insert) where to_delete is a list - # of type/state keys to remove from current state, and to_insert - # is a map (type,key)->event_id giving the state delta in each - # room - state_delta_for_room = {} + for room_id, ev_ctx_rm in iteritems(events_by_room): + latest_event_ids = yield self.get_latest_event_ids_in_room( + room_id + ) + new_latest_event_ids = yield self._calculate_new_extremities( + room_id, ev_ctx_rm, latest_event_ids + ) - if not backfilled: - with Measure(self._clock, "_calculate_state_and_extrem"): - # Work out the new "current state" for each room. - # We do this by working out what the new extremities are and then - # calculating the state from that. - events_by_room = {} - for event, context in chunk: - events_by_room.setdefault(event.room_id, []).append( - (event, context) + latest_event_ids = set(latest_event_ids) + if new_latest_event_ids == latest_event_ids: + # No change in extremities, so no change in state + continue + + # there should always be at least one forward extremity. + # (except during the initial persistence of the send_join + # results, in which case there will be no existing + # extremities, so we'll `continue` above and skip this bit.) + assert new_latest_event_ids, "No forward extremities left!" + + new_forward_extremeties[room_id] = new_latest_event_ids + + len_1 = ( + len(latest_event_ids) == 1 + and len(new_latest_event_ids) == 1 + ) + if len_1: + all_single_prev_not_state = all( + len(event.prev_event_ids()) == 1 + and not event.is_state() + for event, ctx in ev_ctx_rm ) - - for room_id, ev_ctx_rm in iteritems(events_by_room): - latest_event_ids = yield self.get_latest_event_ids_in_room( - room_id - ) - new_latest_event_ids = yield self._calculate_new_extremities( - room_id, ev_ctx_rm, latest_event_ids - ) - - latest_event_ids = set(latest_event_ids) - if new_latest_event_ids == latest_event_ids: - # No change in extremities, so no change in state + # Don't bother calculating state if they're just + # a long chain of single ancestor non-state events. + if all_single_prev_not_state: continue - # there should always be at least one forward extremity. - # (except during the initial persistence of the send_join - # results, in which case there will be no existing - # extremities, so we'll `continue` above and skip this bit.) - assert new_latest_event_ids, "No forward extremities left!" + state_delta_counter.inc() + if len(new_latest_event_ids) == 1: + state_delta_single_event_counter.inc() - new_forward_extremeties[room_id] = new_latest_event_ids + # This is a fairly handwavey check to see if we could + # have guessed what the delta would have been when + # processing one of these events. + # What we're interested in is if the latest extremities + # were the same when we created the event as they are + # now. When this server creates a new event (as opposed + # to receiving it over federation) it will use the + # forward extremities as the prev_events, so we can + # guess this by looking at the prev_events and checking + # if they match the current forward extremities. + for ev, _ in ev_ctx_rm: + prev_event_ids = set(ev.prev_event_ids()) + if latest_event_ids == prev_event_ids: + state_delta_reuse_delta_counter.inc() + break - len_1 = ( - len(latest_event_ids) == 1 - and len(new_latest_event_ids) == 1 + logger.info("Calculating state delta for room %s", room_id) + with Measure( + self._clock, "persist_events.get_new_state_after_events" + ): + res = yield self._get_new_state_after_events( + room_id, + ev_ctx_rm, + latest_event_ids, + new_latest_event_ids, ) - if len_1: - all_single_prev_not_state = all( - len(event.prev_event_ids()) == 1 - and not event.is_state() - for event, ctx in ev_ctx_rm - ) - # Don't bother calculating state if they're just - # a long chain of single ancestor non-state events. - if all_single_prev_not_state: - continue + current_state, delta_ids = res - state_delta_counter.inc() - if len(new_latest_event_ids) == 1: - state_delta_single_event_counter.inc() - - # This is a fairly handwavey check to see if we could - # have guessed what the delta would have been when - # processing one of these events. - # What we're interested in is if the latest extremities - # were the same when we created the event as they are - # now. When this server creates a new event (as opposed - # to receiving it over federation) it will use the - # forward extremities as the prev_events, so we can - # guess this by looking at the prev_events and checking - # if they match the current forward extremities. - for ev, _ in ev_ctx_rm: - prev_event_ids = set(ev.prev_event_ids()) - if latest_event_ids == prev_event_ids: - state_delta_reuse_delta_counter.inc() - break - - logger.info("Calculating state delta for room %s", room_id) + # If either are not None then there has been a change, + # and we need to work out the delta (or use that + # given) + if delta_ids is not None: + # If there is a delta we know that we've + # only added or replaced state, never + # removed keys entirely. + state_delta_for_room[room_id] = ([], delta_ids) + elif current_state is not None: with Measure( - self._clock, "persist_events.get_new_state_after_events" + self._clock, "persist_events.calculate_state_delta" ): - res = yield self._get_new_state_after_events( - room_id, - ev_ctx_rm, - latest_event_ids, - new_latest_event_ids, + delta = yield self._calculate_state_delta( + room_id, current_state ) - current_state, delta_ids = res + state_delta_for_room[room_id] = delta - # If either are not None then there has been a change, - # and we need to work out the delta (or use that - # given) - if delta_ids is not None: - # If there is a delta we know that we've - # only added or replaced state, never - # removed keys entirely. - state_delta_for_room[room_id] = ([], delta_ids) - elif current_state is not None: - with Measure( - self._clock, "persist_events.calculate_state_delta" - ): - delta = yield self._calculate_state_delta( - room_id, current_state - ) - state_delta_for_room[room_id] = delta + # If we have the current_state then lets prefill + # the cache with it. + if current_state is not None: + current_state_for_room[room_id] = current_state - # If we have the current_state then lets prefill - # the cache with it. - if current_state is not None: - current_state_for_room[room_id] = current_state + # We want to calculate the stream orderings as late as possible, as + # we only notify after all events with a lesser stream ordering have + # been persisted. I.e. if we spend 10s inside the with block then + # that will delay all subsequent events from being notified about. + # Hence why we do it down here rather than wrapping the entire + # function. + # + # Its safe to do this after calculating the state deltas etc as we + # only need to protect the *persistence* of the events. This is to + # ensure that queries of the form "fetch events since X" don't + # return events and stream positions after events that are still in + # flight, as otherwise subsequent requests "fetch event since Y" + # will not return those events. + # + # Note: Multiple instances of this function cannot be in flight at + # the same time for the same room. + if backfilled: + stream_ordering_manager = self._backfill_id_gen.get_next_mult( + len(chunk) + ) + else: + stream_ordering_manager = self._stream_id_gen.get_next_mult(len(chunk)) + + with stream_ordering_manager as stream_orderings: + for (event, context), stream in zip(chunk, stream_orderings): + event.internal_metadata.stream_ordering = stream yield self.runInteraction( "persist_events", From c32d3590943542b2d433bb50a7b7fe1529c05acd Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 6 Aug 2019 13:33:42 +0100 Subject: [PATCH 009/110] Newsfile --- changelog.d/5826.misc | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/5826.misc diff --git a/changelog.d/5826.misc b/changelog.d/5826.misc new file mode 100644 index 0000000000..9abed11bbe --- /dev/null +++ b/changelog.d/5826.misc @@ -0,0 +1 @@ +Reduce global pauses in the events stream caused by expensive state resolution during persistence. From af9f1c07646031bf267aa20f2629d5b96c6603b6 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 6 Aug 2019 16:27:46 +0100 Subject: [PATCH 010/110] Add a lower bound for TTL on well known results. It costs both us and the remote server for us to fetch the well known for every single request we send, so we add a minimum cache period. This is set to 5m so that we still honour the basic premise of "refetch frequently". --- synapse/http/federation/matrix_federation_agent.py | 4 ++++ tests/http/federation/test_matrix_federation_agent.py | 4 ++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/synapse/http/federation/matrix_federation_agent.py b/synapse/http/federation/matrix_federation_agent.py index a0d5139839..79e488c942 100644 --- a/synapse/http/federation/matrix_federation_agent.py +++ b/synapse/http/federation/matrix_federation_agent.py @@ -47,6 +47,9 @@ WELL_KNOWN_INVALID_CACHE_PERIOD = 1 * 3600 # cap for .well-known cache period WELL_KNOWN_MAX_CACHE_PERIOD = 48 * 3600 +# lower bound for .well-known cache period +WELL_KNOWN_MIN_CACHE_PERIOD = 5 * 60 + logger = logging.getLogger(__name__) well_known_cache = TTLCache("well-known") @@ -356,6 +359,7 @@ class MatrixFederationAgent(object): cache_period += random.uniform(0, WELL_KNOWN_DEFAULT_CACHE_PERIOD_JITTER) else: cache_period = min(cache_period, WELL_KNOWN_MAX_CACHE_PERIOD) + cache_period = max(cache_period, WELL_KNOWN_MIN_CACHE_PERIOD) return (result, cache_period) diff --git a/tests/http/federation/test_matrix_federation_agent.py b/tests/http/federation/test_matrix_federation_agent.py index 4255add097..5e709c0c17 100644 --- a/tests/http/federation/test_matrix_federation_agent.py +++ b/tests/http/federation/test_matrix_federation_agent.py @@ -953,7 +953,7 @@ class MatrixFederationAgentTests(TestCase): well_known_server = self._handle_well_known_connection( client_factory, expected_sni=b"testserv", - response_headers={b"Cache-Control": b"max-age=10"}, + response_headers={b"Cache-Control": b"max-age=1000"}, content=b'{ "m.server": "target-server" }', ) @@ -969,7 +969,7 @@ class MatrixFederationAgentTests(TestCase): self.assertEqual(r, b"target-server") # expire the cache - self.reactor.pump((10.0,)) + self.reactor.pump((1000.0,)) # now it should connect again fetch_d = self.do_get_well_known(b"testserv") From 107ad133fcf5b5a87e2aa1d53ab415aa39a01266 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 7 Aug 2019 15:36:38 +0100 Subject: [PATCH 011/110] Move well known lookup into a separate clas --- .../federation/matrix_federation_agent.py | 166 ++-------------- .../http/federation/well_known_resolver.py | 184 ++++++++++++++++++ .../test_matrix_federation_agent.py | 39 ++-- 3 files changed, 216 insertions(+), 173 deletions(-) create mode 100644 synapse/http/federation/well_known_resolver.py diff --git a/synapse/http/federation/matrix_federation_agent.py b/synapse/http/federation/matrix_federation_agent.py index 79e488c942..71a15f434d 100644 --- a/synapse/http/federation/matrix_federation_agent.py +++ b/synapse/http/federation/matrix_federation_agent.py @@ -12,10 +12,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import json + import logging -import random -import time import attr from netaddr import IPAddress @@ -24,34 +22,16 @@ from zope.interface import implementer from twisted.internet import defer from twisted.internet.endpoints import HostnameEndpoint, wrapClientTLS from twisted.internet.interfaces import IStreamClientEndpoint -from twisted.web.client import URI, Agent, HTTPConnectionPool, RedirectAgent, readBody -from twisted.web.http import stringToDatetime +from twisted.web.client import URI, Agent, HTTPConnectionPool from twisted.web.http_headers import Headers from twisted.web.iweb import IAgent from synapse.http.federation.srv_resolver import SrvResolver, pick_server_from_list +from synapse.http.federation.well_known_resolver import WellKnownResolver from synapse.logging.context import make_deferred_yieldable from synapse.util import Clock -from synapse.util.caches.ttlcache import TTLCache -from synapse.util.metrics import Measure - -# period to cache .well-known results for by default -WELL_KNOWN_DEFAULT_CACHE_PERIOD = 24 * 3600 - -# jitter to add to the .well-known default cache ttl -WELL_KNOWN_DEFAULT_CACHE_PERIOD_JITTER = 10 * 60 - -# period to cache failure to fetch .well-known for -WELL_KNOWN_INVALID_CACHE_PERIOD = 1 * 3600 - -# cap for .well-known cache period -WELL_KNOWN_MAX_CACHE_PERIOD = 48 * 3600 - -# lower bound for .well-known cache period -WELL_KNOWN_MIN_CACHE_PERIOD = 5 * 60 logger = logging.getLogger(__name__) -well_known_cache = TTLCache("well-known") @implementer(IAgent) @@ -81,7 +61,7 @@ class MatrixFederationAgent(object): reactor, tls_client_options_factory, _srv_resolver=None, - _well_known_cache=well_known_cache, + _well_known_cache=None, ): self._reactor = reactor self._clock = Clock(reactor) @@ -96,20 +76,15 @@ class MatrixFederationAgent(object): self._pool.maxPersistentPerHost = 5 self._pool.cachedConnectionTimeout = 2 * 60 - _well_known_agent = RedirectAgent( - Agent( + self._well_known_resolver = WellKnownResolver( + self._reactor, + agent=Agent( self._reactor, pool=self._pool, contextFactory=tls_client_options_factory, - ) + ), + well_known_cache=_well_known_cache, ) - self._well_known_agent = _well_known_agent - - # our cache of .well-known lookup results, mapping from server name - # to delegated name. The values can be: - # `bytes`: a valid server-name - # `None`: there is no (valid) .well-known here - self._well_known_cache = _well_known_cache @defer.inlineCallbacks def request(self, method, uri, headers=None, bodyProducer=None): @@ -220,7 +195,10 @@ class MatrixFederationAgent(object): if lookup_well_known: # try a .well-known lookup - well_known_server = yield self._get_well_known(parsed_uri.host) + well_known_result = yield self._well_known_resolver.get_well_known( + parsed_uri.host + ) + well_known_server = well_known_result.delegated_server if well_known_server: # if we found a .well-known, start again, but don't do another @@ -283,86 +261,6 @@ class MatrixFederationAgent(object): target_port=port, ) - @defer.inlineCallbacks - def _get_well_known(self, server_name): - """Attempt to fetch and parse a .well-known file for the given server - - Args: - server_name (bytes): name of the server, from the requested url - - Returns: - Deferred[bytes|None]: either the new server name, from the .well-known, or - None if there was no .well-known file. - """ - try: - result = self._well_known_cache[server_name] - except KeyError: - # TODO: should we linearise so that we don't end up doing two .well-known - # requests for the same server in parallel? - with Measure(self._clock, "get_well_known"): - result, cache_period = yield self._do_get_well_known(server_name) - - if cache_period > 0: - self._well_known_cache.set(server_name, result, cache_period) - - return result - - @defer.inlineCallbacks - def _do_get_well_known(self, server_name): - """Actually fetch and parse a .well-known, without checking the cache - - Args: - server_name (bytes): name of the server, from the requested url - - Returns: - Deferred[Tuple[bytes|None|object],int]: - result, cache period, where result is one of: - - the new server name from the .well-known (as a `bytes`) - - None if there was no .well-known file. - - INVALID_WELL_KNOWN if the .well-known was invalid - """ - uri = b"https://%s/.well-known/matrix/server" % (server_name,) - uri_str = uri.decode("ascii") - logger.info("Fetching %s", uri_str) - try: - response = yield make_deferred_yieldable( - self._well_known_agent.request(b"GET", uri) - ) - body = yield make_deferred_yieldable(readBody(response)) - if response.code != 200: - raise Exception("Non-200 response %s" % (response.code,)) - - parsed_body = json.loads(body.decode("utf-8")) - logger.info("Response from .well-known: %s", parsed_body) - if not isinstance(parsed_body, dict): - raise Exception("not a dict") - if "m.server" not in parsed_body: - raise Exception("Missing key 'm.server'") - except Exception as e: - logger.info("Error fetching %s: %s", uri_str, e) - - # add some randomness to the TTL to avoid a stampeding herd every hour - # after startup - cache_period = WELL_KNOWN_INVALID_CACHE_PERIOD - cache_period += random.uniform(0, WELL_KNOWN_DEFAULT_CACHE_PERIOD_JITTER) - return (None, cache_period) - - result = parsed_body["m.server"].encode("ascii") - - cache_period = _cache_period_from_headers( - response.headers, time_now=self._reactor.seconds - ) - if cache_period is None: - cache_period = WELL_KNOWN_DEFAULT_CACHE_PERIOD - # add some randomness to the TTL to avoid a stampeding herd every 24 hours - # after startup - cache_period += random.uniform(0, WELL_KNOWN_DEFAULT_CACHE_PERIOD_JITTER) - else: - cache_period = min(cache_period, WELL_KNOWN_MAX_CACHE_PERIOD) - cache_period = max(cache_period, WELL_KNOWN_MIN_CACHE_PERIOD) - - return (result, cache_period) - @implementer(IStreamClientEndpoint) class LoggingHostnameEndpoint(object): @@ -378,44 +276,6 @@ class LoggingHostnameEndpoint(object): return self.ep.connect(protocol_factory) -def _cache_period_from_headers(headers, time_now=time.time): - cache_controls = _parse_cache_control(headers) - - if b"no-store" in cache_controls: - return 0 - - if b"max-age" in cache_controls: - try: - max_age = int(cache_controls[b"max-age"]) - return max_age - except ValueError: - pass - - expires = headers.getRawHeaders(b"expires") - if expires is not None: - try: - expires_date = stringToDatetime(expires[-1]) - return expires_date - time_now() - except ValueError: - # RFC7234 says 'A cache recipient MUST interpret invalid date formats, - # especially the value "0", as representing a time in the past (i.e., - # "already expired"). - return 0 - - return None - - -def _parse_cache_control(headers): - cache_controls = {} - for hdr in headers.getRawHeaders(b"cache-control", []): - for directive in hdr.split(b","): - splits = [x.strip() for x in directive.split(b"=", 1)] - k = splits[0].lower() - v = splits[1] if len(splits) > 1 else None - cache_controls[k] = v - return cache_controls - - @attr.s class _RoutingResult(object): """The result returned by `_route_matrix_uri`. diff --git a/synapse/http/federation/well_known_resolver.py b/synapse/http/federation/well_known_resolver.py new file mode 100644 index 0000000000..bab4ab015e --- /dev/null +++ b/synapse/http/federation/well_known_resolver.py @@ -0,0 +1,184 @@ +# -*- coding: utf-8 -*- +# Copyright 2019 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json +import logging +import random +import time + +import attr + +from twisted.internet import defer +from twisted.web.client import RedirectAgent, readBody +from twisted.web.http import stringToDatetime + +from synapse.logging.context import make_deferred_yieldable +from synapse.util import Clock +from synapse.util.caches.ttlcache import TTLCache +from synapse.util.metrics import Measure + +# period to cache .well-known results for by default +WELL_KNOWN_DEFAULT_CACHE_PERIOD = 24 * 3600 + +# jitter to add to the .well-known default cache ttl +WELL_KNOWN_DEFAULT_CACHE_PERIOD_JITTER = 10 * 60 + +# period to cache failure to fetch .well-known for +WELL_KNOWN_INVALID_CACHE_PERIOD = 1 * 3600 + +# cap for .well-known cache period +WELL_KNOWN_MAX_CACHE_PERIOD = 48 * 3600 + +# lower bound for .well-known cache period +WELL_KNOWN_MIN_CACHE_PERIOD = 5 * 60 + +logger = logging.getLogger(__name__) + + +@attr.s(slots=True, frozen=True) +class WellKnownLookupResult(object): + delegated_server = attr.ib() + + +class WellKnownResolver(object): + """Handles well-known lookups for matrix servers. + """ + + def __init__(self, reactor, agent, well_known_cache=None): + self._reactor = reactor + self._clock = Clock(reactor) + + if well_known_cache is None: + well_known_cache = TTLCache("well-known") + + self._well_known_cache = well_known_cache + self._well_known_agent = RedirectAgent(agent) + + @defer.inlineCallbacks + def get_well_known(self, server_name): + """Attempt to fetch and parse a .well-known file for the given server + + Args: + server_name (bytes): name of the server, from the requested url + + Returns: + Deferred[WellKnownLookupResult]: The result of the lookup + """ + try: + result = self._well_known_cache[server_name] + except KeyError: + # TODO: should we linearise so that we don't end up doing two .well-known + # requests for the same server in parallel? + with Measure(self._clock, "get_well_known"): + result, cache_period = yield self._do_get_well_known(server_name) + + if cache_period > 0: + self._well_known_cache.set(server_name, result, cache_period) + + return WellKnownLookupResult(delegated_server=result) + + @defer.inlineCallbacks + def _do_get_well_known(self, server_name): + """Actually fetch and parse a .well-known, without checking the cache + + Args: + server_name (bytes): name of the server, from the requested url + + Returns: + Deferred[Tuple[bytes|None|object],int]: + result, cache period, where result is one of: + - the new server name from the .well-known (as a `bytes`) + - None if there was no .well-known file. + - INVALID_WELL_KNOWN if the .well-known was invalid + """ + uri = b"https://%s/.well-known/matrix/server" % (server_name,) + uri_str = uri.decode("ascii") + logger.info("Fetching %s", uri_str) + try: + response = yield make_deferred_yieldable( + self._well_known_agent.request(b"GET", uri) + ) + body = yield make_deferred_yieldable(readBody(response)) + if response.code != 200: + raise Exception("Non-200 response %s" % (response.code,)) + + parsed_body = json.loads(body.decode("utf-8")) + logger.info("Response from .well-known: %s", parsed_body) + if not isinstance(parsed_body, dict): + raise Exception("not a dict") + if "m.server" not in parsed_body: + raise Exception("Missing key 'm.server'") + except Exception as e: + logger.info("Error fetching %s: %s", uri_str, e) + + # add some randomness to the TTL to avoid a stampeding herd every hour + # after startup + cache_period = WELL_KNOWN_INVALID_CACHE_PERIOD + cache_period += random.uniform(0, WELL_KNOWN_DEFAULT_CACHE_PERIOD_JITTER) + return (None, cache_period) + + result = parsed_body["m.server"].encode("ascii") + + cache_period = _cache_period_from_headers( + response.headers, time_now=self._reactor.seconds + ) + if cache_period is None: + cache_period = WELL_KNOWN_DEFAULT_CACHE_PERIOD + # add some randomness to the TTL to avoid a stampeding herd every 24 hours + # after startup + cache_period += random.uniform(0, WELL_KNOWN_DEFAULT_CACHE_PERIOD_JITTER) + else: + cache_period = min(cache_period, WELL_KNOWN_MAX_CACHE_PERIOD) + cache_period = max(cache_period, WELL_KNOWN_MIN_CACHE_PERIOD) + + return (result, cache_period) + + +def _cache_period_from_headers(headers, time_now=time.time): + cache_controls = _parse_cache_control(headers) + + if b"no-store" in cache_controls: + return 0 + + if b"max-age" in cache_controls: + try: + max_age = int(cache_controls[b"max-age"]) + return max_age + except ValueError: + pass + + expires = headers.getRawHeaders(b"expires") + if expires is not None: + try: + expires_date = stringToDatetime(expires[-1]) + return expires_date - time_now() + except ValueError: + # RFC7234 says 'A cache recipient MUST interpret invalid date formats, + # especially the value "0", as representing a time in the past (i.e., + # "already expired"). + return 0 + + return None + + +def _parse_cache_control(headers): + cache_controls = {} + for hdr in headers.getRawHeaders(b"cache-control", []): + for directive in hdr.split(b","): + splits = [x.strip() for x in directive.split(b"=", 1)] + k = splits[0].lower() + v = splits[1] if len(splits) > 1 else None + cache_controls[k] = v + return cache_controls diff --git a/tests/http/federation/test_matrix_federation_agent.py b/tests/http/federation/test_matrix_federation_agent.py index 5e709c0c17..1435baede2 100644 --- a/tests/http/federation/test_matrix_federation_agent.py +++ b/tests/http/federation/test_matrix_federation_agent.py @@ -25,17 +25,19 @@ from twisted.internet._sslverify import ClientTLSOptions, OpenSSLCertificateOpti from twisted.internet.protocol import Factory from twisted.protocols.tls import TLSMemoryBIOFactory from twisted.web._newclient import ResponseNeverReceived +from twisted.web.client import Agent from twisted.web.http import HTTPChannel from twisted.web.http_headers import Headers from twisted.web.iweb import IPolicyForHTTPS from synapse.config.homeserver import HomeServerConfig from synapse.crypto.context_factory import ClientTLSOptionsFactory -from synapse.http.federation.matrix_federation_agent import ( - MatrixFederationAgent, +from synapse.http.federation.matrix_federation_agent import MatrixFederationAgent +from synapse.http.federation.srv_resolver import Server +from synapse.http.federation.well_known_resolver import ( + WellKnownResolver, _cache_period_from_headers, ) -from synapse.http.federation.srv_resolver import Server from synapse.logging.context import LoggingContext from synapse.util.caches.ttlcache import TTLCache @@ -79,9 +81,10 @@ class MatrixFederationAgentTests(TestCase): self._config = config = HomeServerConfig() config.parse_config_dict(config_dict, "", "") + self.tls_factory = ClientTLSOptionsFactory(config) self.agent = MatrixFederationAgent( reactor=self.reactor, - tls_client_options_factory=ClientTLSOptionsFactory(config), + tls_client_options_factory=self.tls_factory, _srv_resolver=self.mock_resolver, _well_known_cache=self.well_known_cache, ) @@ -928,20 +931,16 @@ class MatrixFederationAgentTests(TestCase): self.reactor.pump((0.1,)) self.successResultOf(test_d) - @defer.inlineCallbacks - def do_get_well_known(self, serv): - try: - result = yield self.agent._get_well_known(serv) - logger.info("Result from well-known fetch: %s", result) - except Exception as e: - logger.warning("Error fetching well-known: %s", e) - raise - return result - def test_well_known_cache(self): + well_known_resolver = WellKnownResolver( + self.reactor, + Agent(self.reactor, contextFactory=self.tls_factory), + well_known_cache=self.well_known_cache, + ) + self.reactor.lookups["testserv"] = "1.2.3.4" - fetch_d = self.do_get_well_known(b"testserv") + fetch_d = well_known_resolver.get_well_known(b"testserv") # there should be an attempt to connect on port 443 for the .well-known clients = self.reactor.tcpClients @@ -958,21 +957,21 @@ class MatrixFederationAgentTests(TestCase): ) r = self.successResultOf(fetch_d) - self.assertEqual(r, b"target-server") + self.assertEqual(r.delegated_server, b"target-server") # close the tcp connection well_known_server.loseConnection() # repeat the request: it should hit the cache - fetch_d = self.do_get_well_known(b"testserv") + fetch_d = well_known_resolver.get_well_known(b"testserv") r = self.successResultOf(fetch_d) - self.assertEqual(r, b"target-server") + self.assertEqual(r.delegated_server, b"target-server") # expire the cache self.reactor.pump((1000.0,)) # now it should connect again - fetch_d = self.do_get_well_known(b"testserv") + fetch_d = well_known_resolver.get_well_known(b"testserv") self.assertEqual(len(clients), 1) (host, port, client_factory, _timeout, _bindAddress) = clients.pop(0) @@ -986,7 +985,7 @@ class MatrixFederationAgentTests(TestCase): ) r = self.successResultOf(fetch_d) - self.assertEqual(r, b"other-server") + self.assertEqual(r.delegated_server, b"other-server") class TestCachePeriodFromHeaders(TestCase): From 1016f303e58b1305ed5b3572fde002e1273e0fc0 Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Thu, 8 Aug 2019 14:58:21 +0100 Subject: [PATCH 012/110] make user creation steps clearer --- INSTALL.md | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/INSTALL.md b/INSTALL.md index 25343593d5..5728882460 100644 --- a/INSTALL.md +++ b/INSTALL.md @@ -419,12 +419,11 @@ If Synapse is not configured with an SMTP server, password reset via email will ## Registering a user -You will need at least one user on your server in order to use a Matrix -client. Users can be registered either via a Matrix client, or via a -commandline script. +The easiest way to create a new user is to do so from a client like [Riot](https://riot.im). -To get started, it is easiest to use the command line to register new -users. This can be done as follows: +Alternatively you can do so from the command line if you have installed via pip. + +This can be done as follows: ``` $ source ~/synapse/env/bin/activate From a7f0161276c06e0d46a86ced9cc9dc5aa1e36486 Mon Sep 17 00:00:00 2001 From: Thomas Citharel Date: Fri, 9 Aug 2019 18:33:15 +0200 Subject: [PATCH 013/110] Fix curl command typo in purge_remote_media.sh Was verbose option instead of -X, command didn't work Signed-off-by: Thomas Citharel --- changelog.d/5839.bugfix | 1 + contrib/purge_api/purge_remote_media.sh | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) create mode 100644 changelog.d/5839.bugfix diff --git a/changelog.d/5839.bugfix b/changelog.d/5839.bugfix new file mode 100644 index 0000000000..829aea4687 --- /dev/null +++ b/changelog.d/5839.bugfix @@ -0,0 +1 @@ +The purge_remote_media.sh script was fixed diff --git a/contrib/purge_api/purge_remote_media.sh b/contrib/purge_api/purge_remote_media.sh index 99c07c663d..77220d3bd5 100644 --- a/contrib/purge_api/purge_remote_media.sh +++ b/contrib/purge_api/purge_remote_media.sh @@ -51,4 +51,4 @@ TOKEN=$(sql "SELECT token FROM access_tokens WHERE user_id='$ADMIN' ORDER BY id # finally start pruning media: ############################################################################### set -x # for debugging the generated string -curl --header "Authorization: Bearer $TOKEN" -v POST "$API_URL/admin/purge_media_cache/?before_ts=$UNIX_TIMESTAMP" +curl --header "Authorization: Bearer $TOKEN" -X POST "$API_URL/admin/purge_media_cache/?before_ts=$UNIX_TIMESTAMP" From 41546f946e896ba6ca0ce2b98cf13d04ef516f13 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 12 Aug 2019 09:56:58 +0100 Subject: [PATCH 014/110] Newsfile --- changelog.d/5836.misc | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/5836.misc diff --git a/changelog.d/5836.misc b/changelog.d/5836.misc new file mode 100644 index 0000000000..18f2488201 --- /dev/null +++ b/changelog.d/5836.misc @@ -0,0 +1 @@ +Add a lower bound to well-known lookup cache time to avoid repeated lookups. From c9456193d38ef83cef3bb76b30ef94ea28433e6d Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 12 Aug 2019 13:56:26 +0100 Subject: [PATCH 015/110] Whitelist history visbility sytests for worker mode --- .buildkite/worker-blacklist | 4 ---- 1 file changed, 4 deletions(-) diff --git a/.buildkite/worker-blacklist b/.buildkite/worker-blacklist index 8ed8eef1a3..cda5c84e94 100644 --- a/.buildkite/worker-blacklist +++ b/.buildkite/worker-blacklist @@ -3,10 +3,6 @@ Message history can be paginated -m.room.history_visibility == "world_readable" allows/forbids appropriately for Guest users - -m.room.history_visibility == "world_readable" allows/forbids appropriately for Real users - Can re-join room if re-invited /upgrade creates a new room From 156a461cbd9e965fbc1ce386bb68f3c5237cc772 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 12 Aug 2019 13:57:52 +0100 Subject: [PATCH 016/110] Newsfile --- changelog.d/5843.misc | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/5843.misc diff --git a/changelog.d/5843.misc b/changelog.d/5843.misc new file mode 100644 index 0000000000..e7e7d572b7 --- /dev/null +++ b/changelog.d/5843.misc @@ -0,0 +1 @@ +Whitelist history visbility sytests in worker mode tests. From 3de6cc245fb2eb950cd7bc5e6c50f462a06937fe Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 12 Aug 2019 14:16:42 +0100 Subject: [PATCH 017/110] Changelogs should end in '.' or '!' --- changelog.d/5839.bugfix | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/changelog.d/5839.bugfix b/changelog.d/5839.bugfix index 829aea4687..5775bfa653 100644 --- a/changelog.d/5839.bugfix +++ b/changelog.d/5839.bugfix @@ -1 +1 @@ -The purge_remote_media.sh script was fixed +The purge_remote_media.sh script was fixed. From f218705d2a9ce60b0b996ab29b7c85efb9236109 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 13 Aug 2019 10:06:51 +0100 Subject: [PATCH 018/110] Make default well known cache global again. --- synapse/http/federation/well_known_resolver.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/synapse/http/federation/well_known_resolver.py b/synapse/http/federation/well_known_resolver.py index bab4ab015e..d2866ff67d 100644 --- a/synapse/http/federation/well_known_resolver.py +++ b/synapse/http/federation/well_known_resolver.py @@ -47,6 +47,9 @@ WELL_KNOWN_MIN_CACHE_PERIOD = 5 * 60 logger = logging.getLogger(__name__) +_well_known_cache = TTLCache("well-known") + + @attr.s(slots=True, frozen=True) class WellKnownLookupResult(object): delegated_server = attr.ib() @@ -61,7 +64,7 @@ class WellKnownResolver(object): self._clock = Clock(reactor) if well_known_cache is None: - well_known_cache = TTLCache("well-known") + well_known_cache = _well_known_cache self._well_known_cache = well_known_cache self._well_known_agent = RedirectAgent(agent) From fb3469f53ac86c4771caa9fdfc946eaa298977b9 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 13 Aug 2019 10:17:23 +0100 Subject: [PATCH 019/110] Clarify docstring Co-Authored-By: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> --- synapse/storage/pusher.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/storage/pusher.py b/synapse/storage/pusher.py index 888035fe86..b431d24b8a 100644 --- a/synapse/storage/pusher.py +++ b/synapse/storage/pusher.py @@ -318,7 +318,7 @@ class PusherStore(PusherWorkerStore): last_success (int) Returns: - Deferred[bool]: Whether the pusher stil exists or not. + Deferred[bool]: True if the pusher still exists; False if it has been deleted. """ updated = yield self._simple_update( table="pushers", From 0b6fbb28a858f56766c77eedede7d1dade9e9b1c Mon Sep 17 00:00:00 2001 From: Amber Brown Date: Tue, 13 Aug 2019 21:49:28 +1000 Subject: [PATCH 020/110] Don't load the media repo when configured to use an external media repo (#5754) --- .gitignore | 1 + changelog.d/5754.feature | 1 + docs/sample_config.yaml | 7 ++ docs/workers.rst | 7 ++ synapse/app/media_repository.py | 9 ++ synapse/config/repository.py | 20 +++++ synapse/rest/admin/__init__.py | 102 +++------------------- synapse/rest/admin/_base.py | 25 ++++++ synapse/rest/admin/media.py | 101 +++++++++++++++++++++ synapse/rest/media/v1/media_repository.py | 6 +- 10 files changed, 188 insertions(+), 91 deletions(-) create mode 100644 changelog.d/5754.feature create mode 100644 synapse/rest/admin/media.py diff --git a/.gitignore b/.gitignore index a84c41b0c9..f6168a8819 100644 --- a/.gitignore +++ b/.gitignore @@ -16,6 +16,7 @@ _trial_temp*/ /*.log /*.log.config /*.pid +/.python-version /*.signing.key /env/ /homeserver*.yaml diff --git a/changelog.d/5754.feature b/changelog.d/5754.feature new file mode 100644 index 0000000000..c1a09a4dce --- /dev/null +++ b/changelog.d/5754.feature @@ -0,0 +1 @@ +Synapse will no longer serve any media repo admin endpoints when `enable_media_repo` is set to False in the configuration. If a media repo worker is used, the admin APIs relating to the media repo will be served from it instead. \ No newline at end of file diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml index 1b206fe6bf..0c6be30e51 100644 --- a/docs/sample_config.yaml +++ b/docs/sample_config.yaml @@ -565,6 +565,13 @@ log_config: "CONFDIR/SERVERNAME.log.config" +## Media Store ## + +# Enable the media store service in the Synapse master. Uncomment the +# following if you are using a separate media store worker. +# +#enable_media_repo: false + # Directory where uploaded images and attachments are stored. # media_store_path: "DATADIR/media_store" diff --git a/docs/workers.rst b/docs/workers.rst index 7b2d2db533..e11e117418 100644 --- a/docs/workers.rst +++ b/docs/workers.rst @@ -206,6 +206,13 @@ Handles the media repository. It can handle all endpoints starting with:: /_matrix/media/ +And the following regular expressions matching media-specific administration +APIs:: + + ^/_synapse/admin/v1/purge_media_cache$ + ^/_synapse/admin/v1/room/.*/media$ + ^/_synapse/admin/v1/quarantine_media/.*$ + You should also set ``enable_media_repo: False`` in the shared configuration file to stop the main synapse running background jobs related to managing the media repository. diff --git a/synapse/app/media_repository.py b/synapse/app/media_repository.py index ea26f29acb..3a168577c7 100644 --- a/synapse/app/media_repository.py +++ b/synapse/app/media_repository.py @@ -26,6 +26,7 @@ from synapse.app import _base from synapse.config._base import ConfigError from synapse.config.homeserver import HomeServerConfig from synapse.config.logger import setup_logging +from synapse.http.server import JsonResource from synapse.http.site import SynapseSite from synapse.logging.context import LoggingContext from synapse.metrics import METRICS_PREFIX, MetricsResource, RegistryProxy @@ -35,6 +36,7 @@ from synapse.replication.slave.storage.client_ips import SlavedClientIpStore from synapse.replication.slave.storage.registration import SlavedRegistrationStore from synapse.replication.slave.storage.transactions import SlavedTransactionStore from synapse.replication.tcp.client import ReplicationClientHandler +from synapse.rest.admin import register_servlets_for_media_repo from synapse.rest.media.v0.content_repository import ContentRepoResource from synapse.server import HomeServer from synapse.storage.engines import create_engine @@ -71,6 +73,12 @@ class MediaRepositoryServer(HomeServer): resources[METRICS_PREFIX] = MetricsResource(RegistryProxy) elif name == "media": media_repo = self.get_media_repository_resource() + + # We need to serve the admin servlets for media on the + # worker. + admin_resource = JsonResource(self, canonical_json=False) + register_servlets_for_media_repo(self, admin_resource) + resources.update( { MEDIA_PREFIX: media_repo, @@ -78,6 +86,7 @@ class MediaRepositoryServer(HomeServer): CONTENT_REPO_PREFIX: ContentRepoResource( self, self.config.uploads_path ), + "/_synapse/admin": admin_resource, } ) diff --git a/synapse/config/repository.py b/synapse/config/repository.py index 80a628d9b0..db39697e45 100644 --- a/synapse/config/repository.py +++ b/synapse/config/repository.py @@ -12,6 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + import os from collections import namedtuple @@ -87,6 +88,18 @@ def parse_thumbnail_requirements(thumbnail_sizes): class ContentRepositoryConfig(Config): def read_config(self, config, **kwargs): + + # Only enable the media repo if either the media repo is enabled or the + # current worker app is the media repo. + if ( + self.enable_media_repo is False + and config.worker_app != "synapse.app.media_repository" + ): + self.can_load_media_repo = False + return + else: + self.can_load_media_repo = True + self.max_upload_size = self.parse_size(config.get("max_upload_size", "10M")) self.max_image_pixels = self.parse_size(config.get("max_image_pixels", "32M")) self.max_spider_size = self.parse_size(config.get("max_spider_size", "10M")) @@ -202,6 +215,13 @@ class ContentRepositoryConfig(Config): return ( r""" + ## Media Store ## + + # Enable the media store service in the Synapse master. Uncomment the + # following if you are using a separate media store worker. + # + #enable_media_repo: false + # Directory where uploaded images and attachments are stored. # media_store_path: "%(media_store)s" diff --git a/synapse/rest/admin/__init__.py b/synapse/rest/admin/__init__.py index 0a7d9b81b2..5720cab425 100644 --- a/synapse/rest/admin/__init__.py +++ b/synapse/rest/admin/__init__.py @@ -27,7 +27,7 @@ from twisted.internet import defer import synapse from synapse.api.constants import Membership, UserTypes -from synapse.api.errors import AuthError, Codes, NotFoundError, SynapseError +from synapse.api.errors import Codes, NotFoundError, SynapseError from synapse.http.server import JsonResource from synapse.http.servlet import ( RestServlet, @@ -36,7 +36,12 @@ from synapse.http.servlet import ( parse_json_object_from_request, parse_string, ) -from synapse.rest.admin._base import assert_requester_is_admin, assert_user_is_admin +from synapse.rest.admin._base import ( + assert_requester_is_admin, + assert_user_is_admin, + historical_admin_path_patterns, +) +from synapse.rest.admin.media import register_servlets_for_media_repo from synapse.rest.admin.server_notice_servlet import SendServerNoticeServlet from synapse.types import UserID, create_requester from synapse.util.versionstring import get_version_string @@ -44,28 +49,6 @@ from synapse.util.versionstring import get_version_string logger = logging.getLogger(__name__) -def historical_admin_path_patterns(path_regex): - """Returns the list of patterns for an admin endpoint, including historical ones - - This is a backwards-compatibility hack. Previously, the Admin API was exposed at - various paths under /_matrix/client. This function returns a list of patterns - matching those paths (as well as the new one), so that existing scripts which rely - on the endpoints being available there are not broken. - - Note that this should only be used for existing endpoints: new ones should just - register for the /_synapse/admin path. - """ - return list( - re.compile(prefix + path_regex) - for prefix in ( - "^/_synapse/admin/v1", - "^/_matrix/client/api/v1/admin", - "^/_matrix/client/unstable/admin", - "^/_matrix/client/r0/admin", - ) - ) - - class UsersRestServlet(RestServlet): PATTERNS = historical_admin_path_patterns("/users/(?P[^/]*)") @@ -255,25 +238,6 @@ class WhoisRestServlet(RestServlet): return (200, ret) -class PurgeMediaCacheRestServlet(RestServlet): - PATTERNS = historical_admin_path_patterns("/purge_media_cache") - - def __init__(self, hs): - self.media_repository = hs.get_media_repository() - self.auth = hs.get_auth() - - @defer.inlineCallbacks - def on_POST(self, request): - yield assert_requester_is_admin(self.auth, request) - - before_ts = parse_integer(request, "before_ts", required=True) - logger.info("before_ts: %r", before_ts) - - ret = yield self.media_repository.delete_old_remote_media(before_ts) - - return (200, ret) - - class PurgeHistoryRestServlet(RestServlet): PATTERNS = historical_admin_path_patterns( "/purge_history/(?P[^/]*)(/(?P[^/]+))?" @@ -542,50 +506,6 @@ class ShutdownRoomRestServlet(RestServlet): ) -class QuarantineMediaInRoom(RestServlet): - """Quarantines all media in a room so that no one can download it via - this server. - """ - - PATTERNS = historical_admin_path_patterns("/quarantine_media/(?P[^/]+)") - - def __init__(self, hs): - self.store = hs.get_datastore() - self.auth = hs.get_auth() - - @defer.inlineCallbacks - def on_POST(self, request, room_id): - requester = yield self.auth.get_user_by_req(request) - yield assert_user_is_admin(self.auth, requester.user) - - num_quarantined = yield self.store.quarantine_media_ids_in_room( - room_id, requester.user.to_string() - ) - - return (200, {"num_quarantined": num_quarantined}) - - -class ListMediaInRoom(RestServlet): - """Lists all of the media in a given room. - """ - - PATTERNS = historical_admin_path_patterns("/room/(?P[^/]+)/media") - - def __init__(self, hs): - self.store = hs.get_datastore() - - @defer.inlineCallbacks - def on_GET(self, request, room_id): - requester = yield self.auth.get_user_by_req(request) - is_admin = yield self.auth.is_server_admin(requester.user) - if not is_admin: - raise AuthError(403, "You are not a server admin") - - local_mxcs, remote_mxcs = yield self.store.get_media_mxcs_in_room(room_id) - - return (200, {"local": local_mxcs, "remote": remote_mxcs}) - - class ResetPasswordRestServlet(RestServlet): """Post request to allow an administrator reset password for a user. This needs user to have administrator access in Synapse. @@ -825,7 +745,6 @@ def register_servlets(hs, http_server): def register_servlets_for_client_rest_resource(hs, http_server): """Register only the servlets which need to be exposed on /_matrix/client/xxx""" WhoisRestServlet(hs).register(http_server) - PurgeMediaCacheRestServlet(hs).register(http_server) PurgeHistoryStatusRestServlet(hs).register(http_server) DeactivateAccountRestServlet(hs).register(http_server) PurgeHistoryRestServlet(hs).register(http_server) @@ -834,10 +753,13 @@ def register_servlets_for_client_rest_resource(hs, http_server): GetUsersPaginatedRestServlet(hs).register(http_server) SearchUsersRestServlet(hs).register(http_server) ShutdownRoomRestServlet(hs).register(http_server) - QuarantineMediaInRoom(hs).register(http_server) - ListMediaInRoom(hs).register(http_server) UserRegisterServlet(hs).register(http_server) DeleteGroupAdminRestServlet(hs).register(http_server) AccountValidityRenewServlet(hs).register(http_server) + + # Load the media repo ones if we're using them. + if hs.config.can_load_media_repo: + register_servlets_for_media_repo(hs, http_server) + # don't add more things here: new servlets should only be exposed on # /_synapse/admin so should not go here. Instead register them in AdminRestResource. diff --git a/synapse/rest/admin/_base.py b/synapse/rest/admin/_base.py index 881d67b89c..5a9b08d3ef 100644 --- a/synapse/rest/admin/_base.py +++ b/synapse/rest/admin/_base.py @@ -12,11 +12,36 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + +import re + from twisted.internet import defer from synapse.api.errors import AuthError +def historical_admin_path_patterns(path_regex): + """Returns the list of patterns for an admin endpoint, including historical ones + + This is a backwards-compatibility hack. Previously, the Admin API was exposed at + various paths under /_matrix/client. This function returns a list of patterns + matching those paths (as well as the new one), so that existing scripts which rely + on the endpoints being available there are not broken. + + Note that this should only be used for existing endpoints: new ones should just + register for the /_synapse/admin path. + """ + return list( + re.compile(prefix + path_regex) + for prefix in ( + "^/_synapse/admin/v1", + "^/_matrix/client/api/v1/admin", + "^/_matrix/client/unstable/admin", + "^/_matrix/client/r0/admin", + ) + ) + + @defer.inlineCallbacks def assert_requester_is_admin(auth, request): """Verify that the requester is an admin user diff --git a/synapse/rest/admin/media.py b/synapse/rest/admin/media.py new file mode 100644 index 0000000000..824df919f2 --- /dev/null +++ b/synapse/rest/admin/media.py @@ -0,0 +1,101 @@ +# -*- coding: utf-8 -*- +# Copyright 2014-2016 OpenMarket Ltd +# Copyright 2018-2019 New Vector Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging + +from twisted.internet import defer + +from synapse.api.errors import AuthError +from synapse.http.servlet import RestServlet, parse_integer +from synapse.rest.admin._base import ( + assert_requester_is_admin, + assert_user_is_admin, + historical_admin_path_patterns, +) + +logger = logging.getLogger(__name__) + + +class QuarantineMediaInRoom(RestServlet): + """Quarantines all media in a room so that no one can download it via + this server. + """ + + PATTERNS = historical_admin_path_patterns("/quarantine_media/(?P[^/]+)") + + def __init__(self, hs): + self.store = hs.get_datastore() + self.auth = hs.get_auth() + + @defer.inlineCallbacks + def on_POST(self, request, room_id): + requester = yield self.auth.get_user_by_req(request) + yield assert_user_is_admin(self.auth, requester.user) + + num_quarantined = yield self.store.quarantine_media_ids_in_room( + room_id, requester.user.to_string() + ) + + return (200, {"num_quarantined": num_quarantined}) + + +class ListMediaInRoom(RestServlet): + """Lists all of the media in a given room. + """ + + PATTERNS = historical_admin_path_patterns("/room/(?P[^/]+)/media") + + def __init__(self, hs): + self.store = hs.get_datastore() + + @defer.inlineCallbacks + def on_GET(self, request, room_id): + requester = yield self.auth.get_user_by_req(request) + is_admin = yield self.auth.is_server_admin(requester.user) + if not is_admin: + raise AuthError(403, "You are not a server admin") + + local_mxcs, remote_mxcs = yield self.store.get_media_mxcs_in_room(room_id) + + return (200, {"local": local_mxcs, "remote": remote_mxcs}) + + +class PurgeMediaCacheRestServlet(RestServlet): + PATTERNS = historical_admin_path_patterns("/purge_media_cache") + + def __init__(self, hs): + self.media_repository = hs.get_media_repository() + self.auth = hs.get_auth() + + @defer.inlineCallbacks + def on_POST(self, request): + yield assert_requester_is_admin(self.auth, request) + + before_ts = parse_integer(request, "before_ts", required=True) + logger.info("before_ts: %r", before_ts) + + ret = yield self.media_repository.delete_old_remote_media(before_ts) + + return (200, ret) + + +def register_servlets_for_media_repo(hs, http_server): + """ + Media repo specific APIs. + """ + PurgeMediaCacheRestServlet(hs).register(http_server) + QuarantineMediaInRoom(hs).register(http_server) + ListMediaInRoom(hs).register(http_server) diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index 92beefa176..cf5759e9a6 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -33,6 +33,7 @@ from synapse.api.errors import ( RequestSendFailed, SynapseError, ) +from synapse.config._base import ConfigError from synapse.logging.context import defer_to_thread from synapse.metrics.background_process_metrics import run_as_background_process from synapse.util.async_helpers import Linearizer @@ -753,8 +754,11 @@ class MediaRepositoryResource(Resource): """ def __init__(self, hs): - Resource.__init__(self) + # If we're not configured to use it, raise if we somehow got here. + if not hs.config.can_load_media_repo: + raise ConfigError("Synapse is not configured to use a media repo.") + super().__init__() media_repo = hs.get_media_repository() self.putChild(b"upload", UploadResource(hs, media_repo)) From 96bdd661b88f373826ea25baa94199cf2b8d25f9 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 13 Aug 2019 12:50:36 +0100 Subject: [PATCH 021/110] Remove redundant return --- synapse/push/emailpusher.py | 1 - 1 file changed, 1 deletion(-) diff --git a/synapse/push/emailpusher.py b/synapse/push/emailpusher.py index f688d4152d..42e5b0c0a5 100644 --- a/synapse/push/emailpusher.py +++ b/synapse/push/emailpusher.py @@ -247,7 +247,6 @@ class EmailPusher(object): # The pusher has been deleted while we were processing, so # lets just stop and return. self.on_stop() - return def seconds_until(self, ts_msec): secs = (ts_msec - self.clock.time_msec()) / 1000 From 17e1e807264ce13774d9b343c96406795ea24c27 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 12 Aug 2019 15:39:14 +0100 Subject: [PATCH 022/110] Retry well-known lookup before expiry. This gives a bit of a grace period where we can attempt to refetch a remote `well-known`, while still using the cached result if that fails. Hopefully this will make the well-known resolution a bit more torelant of failures, rather than it immediately treating failures as "no result" and caching that for an hour. --- .../http/federation/well_known_resolver.py | 82 ++++++++++++++----- synapse/util/caches/ttlcache.py | 8 +- .../test_matrix_federation_agent.py | 69 ++++++++++++++++ tests/util/caches/test_ttlcache.py | 4 +- 4 files changed, 136 insertions(+), 27 deletions(-) diff --git a/synapse/http/federation/well_known_resolver.py b/synapse/http/federation/well_known_resolver.py index d2866ff67d..bb250c6922 100644 --- a/synapse/http/federation/well_known_resolver.py +++ b/synapse/http/federation/well_known_resolver.py @@ -44,6 +44,12 @@ WELL_KNOWN_MAX_CACHE_PERIOD = 48 * 3600 # lower bound for .well-known cache period WELL_KNOWN_MIN_CACHE_PERIOD = 5 * 60 +# Attempt to refetch a cached well-known N% of the TTL before it expires. +# e.g. if set to 0.2 and we have a cached entry with a TTL of 5mins, then +# we'll start trying to refetch 1 minute before it expires. +WELL_KNOWN_GRACE_PERIOD_FACTOR = 0.2 + + logger = logging.getLogger(__name__) @@ -80,15 +86,38 @@ class WellKnownResolver(object): Deferred[WellKnownLookupResult]: The result of the lookup """ try: - result = self._well_known_cache[server_name] + prev_result, expiry, ttl = self._well_known_cache.get_with_expiry( + server_name + ) + + now = self._clock.time() + if now < expiry - WELL_KNOWN_GRACE_PERIOD_FACTOR * ttl: + return WellKnownLookupResult(delegated_server=prev_result) except KeyError: - # TODO: should we linearise so that we don't end up doing two .well-known - # requests for the same server in parallel? + prev_result = None + + # TODO: should we linearise so that we don't end up doing two .well-known + # requests for the same server in parallel? + try: with Measure(self._clock, "get_well_known"): result, cache_period = yield self._do_get_well_known(server_name) - if cache_period > 0: - self._well_known_cache.set(server_name, result, cache_period) + except _FetchWellKnownFailure as e: + if prev_result and e.temporary: + # This is a temporary failure and we have a still valid cached + # result, so lets return that. Hopefully the next time we ask + # the remote will be back up again. + return WellKnownLookupResult(delegated_server=prev_result) + + result = None + + # add some randomness to the TTL to avoid a stampeding herd every hour + # after startup + cache_period = WELL_KNOWN_INVALID_CACHE_PERIOD + cache_period += random.uniform(0, WELL_KNOWN_DEFAULT_CACHE_PERIOD_JITTER) + + if cache_period > 0: + self._well_known_cache.set(server_name, result, cache_period) return WellKnownLookupResult(delegated_server=result) @@ -99,40 +128,42 @@ class WellKnownResolver(object): Args: server_name (bytes): name of the server, from the requested url + Raises: + _FetchWellKnownFailure if we fail to lookup a result + Returns: - Deferred[Tuple[bytes|None|object],int]: - result, cache period, where result is one of: - - the new server name from the .well-known (as a `bytes`) - - None if there was no .well-known file. - - INVALID_WELL_KNOWN if the .well-known was invalid + Deferred[Tuple[bytes,int]]: The lookup result and cache period. """ uri = b"https://%s/.well-known/matrix/server" % (server_name,) uri_str = uri.decode("ascii") logger.info("Fetching %s", uri_str) + + # We do this in two steps to differentiate between possibly transient + # errors (e.g. can't connect to host, 503 response) and more permenant + # errors (such as getting a 404 response). try: response = yield make_deferred_yieldable( self._well_known_agent.request(b"GET", uri) ) body = yield make_deferred_yieldable(readBody(response)) + + if 500 <= response.code < 600: + raise Exception("Non-200 response %s" % (response.code,)) + except Exception as e: + logger.info("Error fetching %s: %s", uri_str, e) + raise _FetchWellKnownFailure(temporary=True) + + try: if response.code != 200: raise Exception("Non-200 response %s" % (response.code,)) parsed_body = json.loads(body.decode("utf-8")) logger.info("Response from .well-known: %s", parsed_body) - if not isinstance(parsed_body, dict): - raise Exception("not a dict") - if "m.server" not in parsed_body: - raise Exception("Missing key 'm.server'") + + result = parsed_body["m.server"].encode("ascii") except Exception as e: logger.info("Error fetching %s: %s", uri_str, e) - - # add some randomness to the TTL to avoid a stampeding herd every hour - # after startup - cache_period = WELL_KNOWN_INVALID_CACHE_PERIOD - cache_period += random.uniform(0, WELL_KNOWN_DEFAULT_CACHE_PERIOD_JITTER) - return (None, cache_period) - - result = parsed_body["m.server"].encode("ascii") + raise _FetchWellKnownFailure(temporary=False) cache_period = _cache_period_from_headers( response.headers, time_now=self._reactor.seconds @@ -185,3 +216,10 @@ def _parse_cache_control(headers): v = splits[1] if len(splits) > 1 else None cache_controls[k] = v return cache_controls + + +@attr.s() +class _FetchWellKnownFailure(Exception): + # True if we didn't get a non-5xx HTTP response, i.e. this may or may not be + # a temporary failure. + temporary = attr.ib() diff --git a/synapse/util/caches/ttlcache.py b/synapse/util/caches/ttlcache.py index 2af8ca43b1..99646c7cf0 100644 --- a/synapse/util/caches/ttlcache.py +++ b/synapse/util/caches/ttlcache.py @@ -55,7 +55,7 @@ class TTLCache(object): if e != SENTINEL: self._expiry_list.remove(e) - entry = _CacheEntry(expiry_time=expiry, key=key, value=value) + entry = _CacheEntry(expiry_time=expiry, ttl=ttl, key=key, value=value) self._data[key] = entry self._expiry_list.add(entry) @@ -87,7 +87,8 @@ class TTLCache(object): key: key to look up Returns: - Tuple[Any, float]: the value from the cache, and the expiry time + Tuple[Any, float, float]: the value from the cache, the expiry time + and the TTL Raises: KeyError if the entry is not found @@ -99,7 +100,7 @@ class TTLCache(object): self._metrics.inc_misses() raise self._metrics.inc_hits() - return e.value, e.expiry_time + return e.value, e.expiry_time, e.ttl def pop(self, key, default=SENTINEL): """Remove a value from the cache @@ -158,5 +159,6 @@ class _CacheEntry(object): # expiry_time is the first attribute, so that entries are sorted by expiry. expiry_time = attr.ib() + ttl = attr.ib() key = attr.ib() value = attr.ib() diff --git a/tests/http/federation/test_matrix_federation_agent.py b/tests/http/federation/test_matrix_federation_agent.py index 1435baede2..2c568788b3 100644 --- a/tests/http/federation/test_matrix_federation_agent.py +++ b/tests/http/federation/test_matrix_federation_agent.py @@ -987,6 +987,75 @@ class MatrixFederationAgentTests(TestCase): r = self.successResultOf(fetch_d) self.assertEqual(r.delegated_server, b"other-server") + def test_well_known_cache_with_temp_failure(self): + """Test that we refetch well-known before the cache expires, and that + it ignores transient errors. + """ + + well_known_resolver = WellKnownResolver( + self.reactor, + Agent(self.reactor, contextFactory=self.tls_factory), + well_known_cache=self.well_known_cache, + ) + + self.reactor.lookups["testserv"] = "1.2.3.4" + + fetch_d = well_known_resolver.get_well_known(b"testserv") + + # there should be an attempt to connect on port 443 for the .well-known + clients = self.reactor.tcpClients + self.assertEqual(len(clients), 1) + (host, port, client_factory, _timeout, _bindAddress) = clients.pop(0) + self.assertEqual(host, "1.2.3.4") + self.assertEqual(port, 443) + + well_known_server = self._handle_well_known_connection( + client_factory, + expected_sni=b"testserv", + response_headers={b"Cache-Control": b"max-age=1000"}, + content=b'{ "m.server": "target-server" }', + ) + + r = self.successResultOf(fetch_d) + self.assertEqual(r.delegated_server, b"target-server") + + # close the tcp connection + well_known_server.loseConnection() + + # Get close to the cache expiry, this will cause the resolver to do + # another lookup. + self.reactor.pump((900.0,)) + + fetch_d = well_known_resolver.get_well_known(b"testserv") + clients = self.reactor.tcpClients + (host, port, client_factory, _timeout, _bindAddress) = clients.pop(0) + + # fonx the connection attempt, this will be treated as a temporary + # failure. + client_factory.clientConnectionFailed(None, Exception("nope")) + + # attemptdelay on the hostnameendpoint is 0.3, so takes that long before the + # .well-known request fails. + self.reactor.pump((0.4,)) + + # Resolver should return cached value, despite the lookup failing. + r = self.successResultOf(fetch_d) + self.assertEqual(r.delegated_server, b"target-server") + + # Expire the cache and repeat the request + self.reactor.pump((100.0,)) + + # Repated the request, this time it should fail if the lookup fails. + fetch_d = well_known_resolver.get_well_known(b"testserv") + + clients = self.reactor.tcpClients + (host, port, client_factory, _timeout, _bindAddress) = clients.pop(0) + client_factory.clientConnectionFailed(None, Exception("nope")) + self.reactor.pump((0.4,)) + + r = self.successResultOf(fetch_d) + self.assertEqual(r.delegated_server, None) + class TestCachePeriodFromHeaders(TestCase): def test_cache_control(self): diff --git a/tests/util/caches/test_ttlcache.py b/tests/util/caches/test_ttlcache.py index c94cbb662b..816795c136 100644 --- a/tests/util/caches/test_ttlcache.py +++ b/tests/util/caches/test_ttlcache.py @@ -36,7 +36,7 @@ class CacheTestCase(unittest.TestCase): self.assertTrue("one" in self.cache) self.assertEqual(self.cache.get("one"), "1") self.assertEqual(self.cache["one"], "1") - self.assertEqual(self.cache.get_with_expiry("one"), ("1", 110)) + self.assertEqual(self.cache.get_with_expiry("one"), ("1", 110, 10)) self.assertEqual(self.cache._metrics.hits, 3) self.assertEqual(self.cache._metrics.misses, 0) @@ -77,7 +77,7 @@ class CacheTestCase(unittest.TestCase): self.assertEqual(self.cache["two"], "2") self.assertEqual(self.cache["three"], "3") - self.assertEqual(self.cache.get_with_expiry("two"), ("2", 120)) + self.assertEqual(self.cache.get_with_expiry("two"), ("2", 120, 20)) self.assertEqual(self.cache._metrics.hits, 5) self.assertEqual(self.cache._metrics.misses, 0) From aedfec3ad75f9986e03ecd64dee2ce6f9af0ee13 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 13 Aug 2019 12:40:09 +0100 Subject: [PATCH 023/110] Newsfile --- changelog.d/5844.misc | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/5844.misc diff --git a/changelog.d/5844.misc b/changelog.d/5844.misc new file mode 100644 index 0000000000..a0826af0d2 --- /dev/null +++ b/changelog.d/5844.misc @@ -0,0 +1 @@ +Retry well-known lookup before the cache expires, giving a grace period where the remote well-known can be down but we still use the old result. From 18bdac8ee4813005813a7021b1056ae83b44d6a2 Mon Sep 17 00:00:00 2001 From: "Amber H. Brown" Date: Wed, 14 Aug 2019 02:05:11 +1000 Subject: [PATCH 024/110] fix config being a dict, actually --- synapse/config/repository.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/config/repository.py b/synapse/config/repository.py index db39697e45..fdb1f246d0 100644 --- a/synapse/config/repository.py +++ b/synapse/config/repository.py @@ -93,7 +93,7 @@ class ContentRepositoryConfig(Config): # current worker app is the media repo. if ( self.enable_media_repo is False - and config.worker_app != "synapse.app.media_repository" + and config.get("worker_app") != "synapse.app.media_repository" ): self.can_load_media_repo = False return From 28bce1ac7c900d7f8bd8c12bb0558e28c337fddc Mon Sep 17 00:00:00 2001 From: "Amber H. Brown" Date: Wed, 14 Aug 2019 02:08:24 +1000 Subject: [PATCH 025/110] changelog --- changelog.d/5848.feature | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/5848.feature diff --git a/changelog.d/5848.feature b/changelog.d/5848.feature new file mode 100644 index 0000000000..c1a09a4dce --- /dev/null +++ b/changelog.d/5848.feature @@ -0,0 +1 @@ +Synapse will no longer serve any media repo admin endpoints when `enable_media_repo` is set to False in the configuration. If a media repo worker is used, the admin APIs relating to the media repo will be served from it instead. \ No newline at end of file From f70d0a1dd998cf131ff3e6fc9f4319c51903b9f3 Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Tue, 13 Aug 2019 18:17:47 +0100 Subject: [PATCH 026/110] 1.3.0rc1 --- CHANGES.md | 74 ++++++++++++++++++++++++++++++++++++++++ changelog.d/5678.removal | 1 - changelog.d/5686.feature | 1 - changelog.d/5693.bugfix | 1 - changelog.d/5694.misc | 1 - changelog.d/5695.misc | 1 - changelog.d/5706.misc | 1 - changelog.d/5713.misc | 1 - changelog.d/5715.misc | 1 - changelog.d/5717.misc | 1 - changelog.d/5719.misc | 1 - changelog.d/5720.misc | 1 - changelog.d/5722.misc | 1 - changelog.d/5724.bugfix | 1 - changelog.d/5725.bugfix | 1 - changelog.d/5729.removal | 1 - changelog.d/5730.misc | 1 - changelog.d/5731.misc | 1 - changelog.d/5732.feature | 1 - changelog.d/5733.misc | 1 - changelog.d/5736.misc | 1 - changelog.d/5738.misc | 1 - changelog.d/5740.misc | 1 - changelog.d/5743.bugfix | 1 - changelog.d/5746.misc | 1 - changelog.d/5749.misc | 1 - changelog.d/5750.misc | 1 - changelog.d/5752.misc | 1 - changelog.d/5753.misc | 1 - changelog.d/5754.feature | 1 - changelog.d/5768.misc | 1 - changelog.d/5770.misc | 1 - changelog.d/5774.misc | 1 - changelog.d/5775.bugfix | 1 - changelog.d/5780.misc | 1 - changelog.d/5782.removal | 1 - changelog.d/5783.feature | 1 - changelog.d/5785.misc | 1 - changelog.d/5787.misc | 1 - changelog.d/5788.bugfix | 1 - changelog.d/5789.bugfix | 1 - changelog.d/5790.misc | 1 - changelog.d/5792.misc | 1 - changelog.d/5793.misc | 1 - changelog.d/5794.misc | 1 - changelog.d/5796.misc | 1 - changelog.d/5798.bugfix | 1 - changelog.d/5801.misc | 1 - changelog.d/5802.misc | 1 - changelog.d/5804.bugfix | 1 - changelog.d/5805.misc | 1 - changelog.d/5806.bugfix | 1 - changelog.d/5807.feature | 1 - changelog.d/5808.misc | 1 - changelog.d/5809.misc | 1 - changelog.d/5810.misc | 1 - changelog.d/5825.bugfix | 1 - changelog.d/5826.misc | 1 - changelog.d/5836.misc | 1 - changelog.d/5839.bugfix | 1 - changelog.d/5843.misc | 1 - changelog.d/5848.feature | 1 - synapse/__init__.py | 2 +- 63 files changed, 75 insertions(+), 62 deletions(-) delete mode 100644 changelog.d/5678.removal delete mode 100644 changelog.d/5686.feature delete mode 100644 changelog.d/5693.bugfix delete mode 100644 changelog.d/5694.misc delete mode 100644 changelog.d/5695.misc delete mode 100644 changelog.d/5706.misc delete mode 100644 changelog.d/5713.misc delete mode 100644 changelog.d/5715.misc delete mode 100644 changelog.d/5717.misc delete mode 100644 changelog.d/5719.misc delete mode 100644 changelog.d/5720.misc delete mode 100644 changelog.d/5722.misc delete mode 100644 changelog.d/5724.bugfix delete mode 100644 changelog.d/5725.bugfix delete mode 100644 changelog.d/5729.removal delete mode 100644 changelog.d/5730.misc delete mode 100644 changelog.d/5731.misc delete mode 100644 changelog.d/5732.feature delete mode 100644 changelog.d/5733.misc delete mode 100644 changelog.d/5736.misc delete mode 100644 changelog.d/5738.misc delete mode 100644 changelog.d/5740.misc delete mode 100644 changelog.d/5743.bugfix delete mode 100644 changelog.d/5746.misc delete mode 100644 changelog.d/5749.misc delete mode 100644 changelog.d/5750.misc delete mode 100644 changelog.d/5752.misc delete mode 100644 changelog.d/5753.misc delete mode 100644 changelog.d/5754.feature delete mode 100644 changelog.d/5768.misc delete mode 100644 changelog.d/5770.misc delete mode 100644 changelog.d/5774.misc delete mode 100644 changelog.d/5775.bugfix delete mode 100644 changelog.d/5780.misc delete mode 100644 changelog.d/5782.removal delete mode 100644 changelog.d/5783.feature delete mode 100644 changelog.d/5785.misc delete mode 100644 changelog.d/5787.misc delete mode 100644 changelog.d/5788.bugfix delete mode 100644 changelog.d/5789.bugfix delete mode 100644 changelog.d/5790.misc delete mode 100644 changelog.d/5792.misc delete mode 100644 changelog.d/5793.misc delete mode 100644 changelog.d/5794.misc delete mode 100644 changelog.d/5796.misc delete mode 100644 changelog.d/5798.bugfix delete mode 100644 changelog.d/5801.misc delete mode 100644 changelog.d/5802.misc delete mode 100644 changelog.d/5804.bugfix delete mode 100644 changelog.d/5805.misc delete mode 100644 changelog.d/5806.bugfix delete mode 100644 changelog.d/5807.feature delete mode 100644 changelog.d/5808.misc delete mode 100644 changelog.d/5809.misc delete mode 100644 changelog.d/5810.misc delete mode 100644 changelog.d/5825.bugfix delete mode 100644 changelog.d/5826.misc delete mode 100644 changelog.d/5836.misc delete mode 100644 changelog.d/5839.bugfix delete mode 100644 changelog.d/5843.misc delete mode 100644 changelog.d/5848.feature diff --git a/CHANGES.md b/CHANGES.md index 7bdc7ae6cc..eca9c82f55 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,3 +1,77 @@ +Synapse 1.3.0rc1 (2019-08-13) +========================== + +Features +-------- + +- Use `M_USER_DEACTIVATED` instead of `M_UNKNOWN` for errcode when a deactivated user attempts to login. ([\#5686](https://github.com/matrix-org/synapse/issues/5686)) +- Add sd_notify hooks to ease systemd integration and allows usage of Type=Notify. ([\#5732](https://github.com/matrix-org/synapse/issues/5732)) +- Synapse will no longer serve any media repo admin endpoints when `enable_media_repo` is set to False in the configuration. If a media repo worker is used, the admin APIs relating to the media repo will be served from it instead. ([\#5754](https://github.com/matrix-org/synapse/issues/5754), [\#5848](https://github.com/matrix-org/synapse/issues/5848)) +- Synapse can now be configured to not join remote rooms of a given "complexity" (currently, state events) over federation. This option can be used to prevent adverse performance on resource-constrained homeservers. ([\#5783](https://github.com/matrix-org/synapse/issues/5783)) +- Allow defining HTML templates to serve the user on account renewal attempt when using the account validity feature. ([\#5807](https://github.com/matrix-org/synapse/issues/5807)) + + +Bugfixes +-------- + +- Fix UISIs during homeserver outage. ([\#5693](https://github.com/matrix-org/synapse/issues/5693), [\#5789](https://github.com/matrix-org/synapse/issues/5789)) +- Fix stack overflow in server key lookup code. ([\#5724](https://github.com/matrix-org/synapse/issues/5724)) +- start.sh no longer uses deprecated cli option. ([\#5725](https://github.com/matrix-org/synapse/issues/5725)) +- Log when we receive an event receipt from an unexpected origin. ([\#5743](https://github.com/matrix-org/synapse/issues/5743)) +- Fix debian packaging scripts to correctly build sid packages. ([\#5775](https://github.com/matrix-org/synapse/issues/5775)) +- Correctly handle redactions of redactions. ([\#5788](https://github.com/matrix-org/synapse/issues/5788)) +- Return 404 instead of 403 when accessing /rooms/{roomId}/event/{eventId} for an event without the appropriate permissions. ([\#5798](https://github.com/matrix-org/synapse/issues/5798)) +- Fix check that tombstone is a state event in push rules. ([\#5804](https://github.com/matrix-org/synapse/issues/5804)) +- Fix error when trying to login as a deactivated user when using a worker to handle login. ([\#5806](https://github.com/matrix-org/synapse/issues/5806)) +- Fix bug where user `/sync` stream could get wedged in rare circumstances. ([\#5825](https://github.com/matrix-org/synapse/issues/5825)) +- The purge_remote_media.sh script was fixed. ([\#5839](https://github.com/matrix-org/synapse/issues/5839)) + + +Deprecations and Removals +------------------------- + +- Synapse now no longer accepts the `-v`/`--verbose`, `-f`/`--log-file`, or `--log-config` command line flags, and removes the deprecated `verbose` and `log_file` configuration file options. Users of these options should migrate their options into the dedicated log configuration. ([\#5678](https://github.com/matrix-org/synapse/issues/5678), [\#5729](https://github.com/matrix-org/synapse/issues/5729)) +- Remove non-functional 'expire_access_token' setting. ([\#5782](https://github.com/matrix-org/synapse/issues/5782)) + + +Internal Changes +---------------- + +- Make Jaeger fully configurable. ([\#5694](https://github.com/matrix-org/synapse/issues/5694)) +- Add precautionary measures to prevent future abuse of `window.opener` in default welcome page. ([\#5695](https://github.com/matrix-org/synapse/issues/5695)) +- Reduce database IO usage by optimising queries for current membership. ([\#5706](https://github.com/matrix-org/synapse/issues/5706), [\#5738](https://github.com/matrix-org/synapse/issues/5738), [\#5746](https://github.com/matrix-org/synapse/issues/5746), [\#5752](https://github.com/matrix-org/synapse/issues/5752), [\#5770](https://github.com/matrix-org/synapse/issues/5770), [\#5774](https://github.com/matrix-org/synapse/issues/5774), [\#5792](https://github.com/matrix-org/synapse/issues/5792), [\#5793](https://github.com/matrix-org/synapse/issues/5793)) +- Improve caching when fetching `get_filtered_current_state_ids`. ([\#5713](https://github.com/matrix-org/synapse/issues/5713)) +- Don't accept opentracing data from clients. ([\#5715](https://github.com/matrix-org/synapse/issues/5715)) +- Speed up PostgreSQL unit tests in CI. ([\#5717](https://github.com/matrix-org/synapse/issues/5717)) +- Update the coding style document. ([\#5719](https://github.com/matrix-org/synapse/issues/5719)) +- Improve database query performance when recording retry intervals for remote hosts. ([\#5720](https://github.com/matrix-org/synapse/issues/5720)) +- Add a set of opentracing utils. ([\#5722](https://github.com/matrix-org/synapse/issues/5722)) +- Cache result of get_version_string to reduce overhead of `/version` federation requests. ([\#5730](https://github.com/matrix-org/synapse/issues/5730)) +- Return 'user_type' in admin API user endpoints results. ([\#5731](https://github.com/matrix-org/synapse/issues/5731)) +- Don't package the sytest test blacklist file. ([\#5733](https://github.com/matrix-org/synapse/issues/5733)) +- Replace uses of returnValue with plain return, as returnValue is not needed on Python 3. ([\#5736](https://github.com/matrix-org/synapse/issues/5736)) +- Blacklist some flakey tests in worker mode. ([\#5740](https://github.com/matrix-org/synapse/issues/5740)) +- Fix some error cases in the caching layer. ([\#5749](https://github.com/matrix-org/synapse/issues/5749)) +- Add a prometheus metric for pending cache lookups. ([\#5750](https://github.com/matrix-org/synapse/issues/5750)) +- Stop trying to fetch events with event_id=None. ([\#5753](https://github.com/matrix-org/synapse/issues/5753)) +- Convert RedactionTestCase to modern test style. ([\#5768](https://github.com/matrix-org/synapse/issues/5768)) +- Allow looping calls to be given arguments. ([\#5780](https://github.com/matrix-org/synapse/issues/5780)) +- Set the logs emitted when checking typing and presence timeouts to DEBUG level, not INFO. ([\#5785](https://github.com/matrix-org/synapse/issues/5785)) +- Remove DelayedCall debugging from the test suite, as it is no longer required in the vast majority of Synapse's tests. ([\#5787](https://github.com/matrix-org/synapse/issues/5787)) +- Remove some spurious exceptions from the logs where we failed to talk to a remote server. ([\#5790](https://github.com/matrix-org/synapse/issues/5790)) +- Improve performance when making `.well-known` requests by sharing the SSL options between requests. ([\#5794](https://github.com/matrix-org/synapse/issues/5794)) +- Disable codecov GitHub comments on PRs. ([\#5796](https://github.com/matrix-org/synapse/issues/5796)) +- Don't allow clients to send tombstone events that reference the room it's sent in. ([\#5801](https://github.com/matrix-org/synapse/issues/5801)) +- Deny redactions of events sent in a different room. ([\#5802](https://github.com/matrix-org/synapse/issues/5802)) +- Deny sending well known state types as non-state events. ([\#5805](https://github.com/matrix-org/synapse/issues/5805)) +- Handle incorrectly encoded query params correctly by returning a 400. ([\#5808](https://github.com/matrix-org/synapse/issues/5808)) +- Handle pusher being deleted during processing rather than logging an exception. ([\#5809](https://github.com/matrix-org/synapse/issues/5809)) +- Return 502 not 500 when failing to reach any remote server. ([\#5810](https://github.com/matrix-org/synapse/issues/5810)) +- Reduce global pauses in the events stream caused by expensive state resolution during persistence. ([\#5826](https://github.com/matrix-org/synapse/issues/5826)) +- Add a lower bound to well-known lookup cache time to avoid repeated lookups. ([\#5836](https://github.com/matrix-org/synapse/issues/5836)) +- Whitelist history visbility sytests in worker mode tests. ([\#5843](https://github.com/matrix-org/synapse/issues/5843)) + + Synapse 1.2.1 (2019-07-26) ========================== diff --git a/changelog.d/5678.removal b/changelog.d/5678.removal deleted file mode 100644 index 085b84fda6..0000000000 --- a/changelog.d/5678.removal +++ /dev/null @@ -1 +0,0 @@ -Synapse now no longer accepts the `-v`/`--verbose`, `-f`/`--log-file`, or `--log-config` command line flags, and removes the deprecated `verbose` and `log_file` configuration file options. Users of these options should migrate their options into the dedicated log configuration. diff --git a/changelog.d/5686.feature b/changelog.d/5686.feature deleted file mode 100644 index 367aa1eca2..0000000000 --- a/changelog.d/5686.feature +++ /dev/null @@ -1 +0,0 @@ -Use `M_USER_DEACTIVATED` instead of `M_UNKNOWN` for errcode when a deactivated user attempts to login. diff --git a/changelog.d/5693.bugfix b/changelog.d/5693.bugfix deleted file mode 100644 index d6f4e590ae..0000000000 --- a/changelog.d/5693.bugfix +++ /dev/null @@ -1 +0,0 @@ -Fix UISIs during homeserver outage. diff --git a/changelog.d/5694.misc b/changelog.d/5694.misc deleted file mode 100644 index 3b12dcc849..0000000000 --- a/changelog.d/5694.misc +++ /dev/null @@ -1 +0,0 @@ -Make Jaeger fully configurable. diff --git a/changelog.d/5695.misc b/changelog.d/5695.misc deleted file mode 100644 index 4741d32e25..0000000000 --- a/changelog.d/5695.misc +++ /dev/null @@ -1 +0,0 @@ -Add precautionary measures to prevent future abuse of `window.opener` in default welcome page. diff --git a/changelog.d/5706.misc b/changelog.d/5706.misc deleted file mode 100644 index 5e15dfd5fa..0000000000 --- a/changelog.d/5706.misc +++ /dev/null @@ -1 +0,0 @@ -Reduce database IO usage by optimising queries for current membership. diff --git a/changelog.d/5713.misc b/changelog.d/5713.misc deleted file mode 100644 index 01ea1cf8d7..0000000000 --- a/changelog.d/5713.misc +++ /dev/null @@ -1 +0,0 @@ -Improve caching when fetching `get_filtered_current_state_ids`. diff --git a/changelog.d/5715.misc b/changelog.d/5715.misc deleted file mode 100644 index a77366e0c0..0000000000 --- a/changelog.d/5715.misc +++ /dev/null @@ -1 +0,0 @@ -Don't accept opentracing data from clients. diff --git a/changelog.d/5717.misc b/changelog.d/5717.misc deleted file mode 100644 index 07dc3bca94..0000000000 --- a/changelog.d/5717.misc +++ /dev/null @@ -1 +0,0 @@ -Speed up PostgreSQL unit tests in CI. diff --git a/changelog.d/5719.misc b/changelog.d/5719.misc deleted file mode 100644 index 6d5294724c..0000000000 --- a/changelog.d/5719.misc +++ /dev/null @@ -1 +0,0 @@ -Update the coding style document. diff --git a/changelog.d/5720.misc b/changelog.d/5720.misc deleted file mode 100644 index 590f64f19d..0000000000 --- a/changelog.d/5720.misc +++ /dev/null @@ -1 +0,0 @@ -Improve database query performance when recording retry intervals for remote hosts. diff --git a/changelog.d/5722.misc b/changelog.d/5722.misc deleted file mode 100644 index f2d236188d..0000000000 --- a/changelog.d/5722.misc +++ /dev/null @@ -1 +0,0 @@ -Add a set of opentracing utils. diff --git a/changelog.d/5724.bugfix b/changelog.d/5724.bugfix deleted file mode 100644 index 1b3683daf6..0000000000 --- a/changelog.d/5724.bugfix +++ /dev/null @@ -1 +0,0 @@ -Fix stack overflow in server key lookup code. \ No newline at end of file diff --git a/changelog.d/5725.bugfix b/changelog.d/5725.bugfix deleted file mode 100644 index 73ef419727..0000000000 --- a/changelog.d/5725.bugfix +++ /dev/null @@ -1 +0,0 @@ -start.sh no longer uses deprecated cli option. diff --git a/changelog.d/5729.removal b/changelog.d/5729.removal deleted file mode 100644 index 3af5198e6b..0000000000 --- a/changelog.d/5729.removal +++ /dev/null @@ -1 +0,0 @@ - Synapse now no longer accepts the `-v`/`--verbose`, `-f`/`--log-file`, or `--log-config` command line flags, and removes the deprecated `verbose` and `log_file` configuration file options. Users of these options should migrate their options into the dedicated log configuration. diff --git a/changelog.d/5730.misc b/changelog.d/5730.misc deleted file mode 100644 index a99677f5e7..0000000000 --- a/changelog.d/5730.misc +++ /dev/null @@ -1 +0,0 @@ -Cache result of get_version_string to reduce overhead of `/version` federation requests. diff --git a/changelog.d/5731.misc b/changelog.d/5731.misc deleted file mode 100644 index dffae5d874..0000000000 --- a/changelog.d/5731.misc +++ /dev/null @@ -1 +0,0 @@ -Return 'user_type' in admin API user endpoints results. diff --git a/changelog.d/5732.feature b/changelog.d/5732.feature deleted file mode 100644 index 9021864350..0000000000 --- a/changelog.d/5732.feature +++ /dev/null @@ -1 +0,0 @@ -Add sd_notify hooks to ease systemd integration and allows usage of Type=Notify. diff --git a/changelog.d/5733.misc b/changelog.d/5733.misc deleted file mode 100644 index a2a8c26383..0000000000 --- a/changelog.d/5733.misc +++ /dev/null @@ -1 +0,0 @@ -Don't package the sytest test blacklist file. diff --git a/changelog.d/5736.misc b/changelog.d/5736.misc deleted file mode 100644 index 5713b8b32d..0000000000 --- a/changelog.d/5736.misc +++ /dev/null @@ -1 +0,0 @@ -Replace uses of returnValue with plain return, as returnValue is not needed on Python 3. diff --git a/changelog.d/5738.misc b/changelog.d/5738.misc deleted file mode 100644 index 5e15dfd5fa..0000000000 --- a/changelog.d/5738.misc +++ /dev/null @@ -1 +0,0 @@ -Reduce database IO usage by optimising queries for current membership. diff --git a/changelog.d/5740.misc b/changelog.d/5740.misc deleted file mode 100644 index 97a476bef5..0000000000 --- a/changelog.d/5740.misc +++ /dev/null @@ -1 +0,0 @@ -Blacklist some flakey tests in worker mode. diff --git a/changelog.d/5743.bugfix b/changelog.d/5743.bugfix deleted file mode 100644 index 65728ff079..0000000000 --- a/changelog.d/5743.bugfix +++ /dev/null @@ -1 +0,0 @@ -Log when we receive an event receipt from an unexpected origin. diff --git a/changelog.d/5746.misc b/changelog.d/5746.misc deleted file mode 100644 index 5e15dfd5fa..0000000000 --- a/changelog.d/5746.misc +++ /dev/null @@ -1 +0,0 @@ -Reduce database IO usage by optimising queries for current membership. diff --git a/changelog.d/5749.misc b/changelog.d/5749.misc deleted file mode 100644 index 48dd61f461..0000000000 --- a/changelog.d/5749.misc +++ /dev/null @@ -1 +0,0 @@ -Fix some error cases in the caching layer. diff --git a/changelog.d/5750.misc b/changelog.d/5750.misc deleted file mode 100644 index 6beaa460a5..0000000000 --- a/changelog.d/5750.misc +++ /dev/null @@ -1 +0,0 @@ -Add a prometheus metric for pending cache lookups. \ No newline at end of file diff --git a/changelog.d/5752.misc b/changelog.d/5752.misc deleted file mode 100644 index 5e15dfd5fa..0000000000 --- a/changelog.d/5752.misc +++ /dev/null @@ -1 +0,0 @@ -Reduce database IO usage by optimising queries for current membership. diff --git a/changelog.d/5753.misc b/changelog.d/5753.misc deleted file mode 100644 index 22bba9ce3c..0000000000 --- a/changelog.d/5753.misc +++ /dev/null @@ -1 +0,0 @@ -Stop trying to fetch events with event_id=None. diff --git a/changelog.d/5754.feature b/changelog.d/5754.feature deleted file mode 100644 index c1a09a4dce..0000000000 --- a/changelog.d/5754.feature +++ /dev/null @@ -1 +0,0 @@ -Synapse will no longer serve any media repo admin endpoints when `enable_media_repo` is set to False in the configuration. If a media repo worker is used, the admin APIs relating to the media repo will be served from it instead. \ No newline at end of file diff --git a/changelog.d/5768.misc b/changelog.d/5768.misc deleted file mode 100644 index 7a9c88b4c2..0000000000 --- a/changelog.d/5768.misc +++ /dev/null @@ -1 +0,0 @@ -Convert RedactionTestCase to modern test style. diff --git a/changelog.d/5770.misc b/changelog.d/5770.misc deleted file mode 100644 index 5e15dfd5fa..0000000000 --- a/changelog.d/5770.misc +++ /dev/null @@ -1 +0,0 @@ -Reduce database IO usage by optimising queries for current membership. diff --git a/changelog.d/5774.misc b/changelog.d/5774.misc deleted file mode 100644 index 5e15dfd5fa..0000000000 --- a/changelog.d/5774.misc +++ /dev/null @@ -1 +0,0 @@ -Reduce database IO usage by optimising queries for current membership. diff --git a/changelog.d/5775.bugfix b/changelog.d/5775.bugfix deleted file mode 100644 index b124897d80..0000000000 --- a/changelog.d/5775.bugfix +++ /dev/null @@ -1 +0,0 @@ -Fix debian packaging scripts to correctly build sid packages. diff --git a/changelog.d/5780.misc b/changelog.d/5780.misc deleted file mode 100644 index b7eb56e625..0000000000 --- a/changelog.d/5780.misc +++ /dev/null @@ -1 +0,0 @@ -Allow looping calls to be given arguments. diff --git a/changelog.d/5782.removal b/changelog.d/5782.removal deleted file mode 100644 index 658bf923ab..0000000000 --- a/changelog.d/5782.removal +++ /dev/null @@ -1 +0,0 @@ -Remove non-functional 'expire_access_token' setting. diff --git a/changelog.d/5783.feature b/changelog.d/5783.feature deleted file mode 100644 index 18f5a3cb28..0000000000 --- a/changelog.d/5783.feature +++ /dev/null @@ -1 +0,0 @@ -Synapse can now be configured to not join remote rooms of a given "complexity" (currently, state events) over federation. This option can be used to prevent adverse performance on resource-constrained homeservers. diff --git a/changelog.d/5785.misc b/changelog.d/5785.misc deleted file mode 100644 index 0691222c42..0000000000 --- a/changelog.d/5785.misc +++ /dev/null @@ -1 +0,0 @@ -Set the logs emitted when checking typing and presence timeouts to DEBUG level, not INFO. diff --git a/changelog.d/5787.misc b/changelog.d/5787.misc deleted file mode 100644 index ead0b04b62..0000000000 --- a/changelog.d/5787.misc +++ /dev/null @@ -1 +0,0 @@ -Remove DelayedCall debugging from the test suite, as it is no longer required in the vast majority of Synapse's tests. diff --git a/changelog.d/5788.bugfix b/changelog.d/5788.bugfix deleted file mode 100644 index 5632f3cb99..0000000000 --- a/changelog.d/5788.bugfix +++ /dev/null @@ -1 +0,0 @@ -Correctly handle redactions of redactions. diff --git a/changelog.d/5789.bugfix b/changelog.d/5789.bugfix deleted file mode 100644 index d6f4e590ae..0000000000 --- a/changelog.d/5789.bugfix +++ /dev/null @@ -1 +0,0 @@ -Fix UISIs during homeserver outage. diff --git a/changelog.d/5790.misc b/changelog.d/5790.misc deleted file mode 100644 index 3e9e435d7a..0000000000 --- a/changelog.d/5790.misc +++ /dev/null @@ -1 +0,0 @@ -Remove some spurious exceptions from the logs where we failed to talk to a remote server. diff --git a/changelog.d/5792.misc b/changelog.d/5792.misc deleted file mode 100644 index 5e15dfd5fa..0000000000 --- a/changelog.d/5792.misc +++ /dev/null @@ -1 +0,0 @@ -Reduce database IO usage by optimising queries for current membership. diff --git a/changelog.d/5793.misc b/changelog.d/5793.misc deleted file mode 100644 index 5e15dfd5fa..0000000000 --- a/changelog.d/5793.misc +++ /dev/null @@ -1 +0,0 @@ -Reduce database IO usage by optimising queries for current membership. diff --git a/changelog.d/5794.misc b/changelog.d/5794.misc deleted file mode 100644 index 720e0ddcfb..0000000000 --- a/changelog.d/5794.misc +++ /dev/null @@ -1 +0,0 @@ -Improve performance when making `.well-known` requests by sharing the SSL options between requests. diff --git a/changelog.d/5796.misc b/changelog.d/5796.misc deleted file mode 100644 index be520946c7..0000000000 --- a/changelog.d/5796.misc +++ /dev/null @@ -1 +0,0 @@ -Disable codecov GitHub comments on PRs. diff --git a/changelog.d/5798.bugfix b/changelog.d/5798.bugfix deleted file mode 100644 index 7db2c37af5..0000000000 --- a/changelog.d/5798.bugfix +++ /dev/null @@ -1 +0,0 @@ -Return 404 instead of 403 when accessing /rooms/{roomId}/event/{eventId} for an event without the appropriate permissions. diff --git a/changelog.d/5801.misc b/changelog.d/5801.misc deleted file mode 100644 index e19854de82..0000000000 --- a/changelog.d/5801.misc +++ /dev/null @@ -1 +0,0 @@ -Don't allow clients to send tombstone events that reference the room it's sent in. diff --git a/changelog.d/5802.misc b/changelog.d/5802.misc deleted file mode 100644 index de31192652..0000000000 --- a/changelog.d/5802.misc +++ /dev/null @@ -1 +0,0 @@ -Deny redactions of events sent in a different room. diff --git a/changelog.d/5804.bugfix b/changelog.d/5804.bugfix deleted file mode 100644 index 75c17b460d..0000000000 --- a/changelog.d/5804.bugfix +++ /dev/null @@ -1 +0,0 @@ -Fix check that tombstone is a state event in push rules. diff --git a/changelog.d/5805.misc b/changelog.d/5805.misc deleted file mode 100644 index 352cb3db04..0000000000 --- a/changelog.d/5805.misc +++ /dev/null @@ -1 +0,0 @@ -Deny sending well known state types as non-state events. diff --git a/changelog.d/5806.bugfix b/changelog.d/5806.bugfix deleted file mode 100644 index c5ca0f5629..0000000000 --- a/changelog.d/5806.bugfix +++ /dev/null @@ -1 +0,0 @@ -Fix error when trying to login as a deactivated user when using a worker to handle login. diff --git a/changelog.d/5807.feature b/changelog.d/5807.feature deleted file mode 100644 index 8b7d29a23c..0000000000 --- a/changelog.d/5807.feature +++ /dev/null @@ -1 +0,0 @@ -Allow defining HTML templates to serve the user on account renewal attempt when using the account validity feature. diff --git a/changelog.d/5808.misc b/changelog.d/5808.misc deleted file mode 100644 index cac3fd34d1..0000000000 --- a/changelog.d/5808.misc +++ /dev/null @@ -1 +0,0 @@ -Handle incorrectly encoded query params correctly by returning a 400. diff --git a/changelog.d/5809.misc b/changelog.d/5809.misc deleted file mode 100644 index 82a812480e..0000000000 --- a/changelog.d/5809.misc +++ /dev/null @@ -1 +0,0 @@ -Handle pusher being deleted during processing rather than logging an exception. diff --git a/changelog.d/5810.misc b/changelog.d/5810.misc deleted file mode 100644 index 0a5ccbbb3f..0000000000 --- a/changelog.d/5810.misc +++ /dev/null @@ -1 +0,0 @@ -Return 502 not 500 when failing to reach any remote server. diff --git a/changelog.d/5825.bugfix b/changelog.d/5825.bugfix deleted file mode 100644 index fb2c6f821d..0000000000 --- a/changelog.d/5825.bugfix +++ /dev/null @@ -1 +0,0 @@ -Fix bug where user `/sync` stream could get wedged in rare circumstances. diff --git a/changelog.d/5826.misc b/changelog.d/5826.misc deleted file mode 100644 index 9abed11bbe..0000000000 --- a/changelog.d/5826.misc +++ /dev/null @@ -1 +0,0 @@ -Reduce global pauses in the events stream caused by expensive state resolution during persistence. diff --git a/changelog.d/5836.misc b/changelog.d/5836.misc deleted file mode 100644 index 18f2488201..0000000000 --- a/changelog.d/5836.misc +++ /dev/null @@ -1 +0,0 @@ -Add a lower bound to well-known lookup cache time to avoid repeated lookups. diff --git a/changelog.d/5839.bugfix b/changelog.d/5839.bugfix deleted file mode 100644 index 5775bfa653..0000000000 --- a/changelog.d/5839.bugfix +++ /dev/null @@ -1 +0,0 @@ -The purge_remote_media.sh script was fixed. diff --git a/changelog.d/5843.misc b/changelog.d/5843.misc deleted file mode 100644 index e7e7d572b7..0000000000 --- a/changelog.d/5843.misc +++ /dev/null @@ -1 +0,0 @@ -Whitelist history visbility sytests in worker mode tests. diff --git a/changelog.d/5848.feature b/changelog.d/5848.feature deleted file mode 100644 index c1a09a4dce..0000000000 --- a/changelog.d/5848.feature +++ /dev/null @@ -1 +0,0 @@ -Synapse will no longer serve any media repo admin endpoints when `enable_media_repo` is set to False in the configuration. If a media repo worker is used, the admin APIs relating to the media repo will be served from it instead. \ No newline at end of file diff --git a/synapse/__init__.py b/synapse/__init__.py index 8301a13d8f..d2316c7df9 100644 --- a/synapse/__init__.py +++ b/synapse/__init__.py @@ -35,4 +35,4 @@ try: except ImportError: pass -__version__ = "1.2.1" +__version__ = "1.3.0rc1" From 1b63ccd8483e99aa7ea72b91f99f4cd94ded2e36 Mon Sep 17 00:00:00 2001 From: "Olivier Wilkinson (reivilibre)" Date: Wed, 14 Aug 2019 14:05:50 +0100 Subject: [PATCH 027/110] Wrap `get_local_public_room_list` call in `maybeDeferred` because it is cached and so does not always return a `Deferred`. `await` does not silently pass-through non-Deferreds like `yield` used to. Signed-off-by: Olivier Wilkinson (reivilibre) --- synapse/federation/transport/server.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/synapse/federation/transport/server.py b/synapse/federation/transport/server.py index ea4e1b6d0f..9a86bd0263 100644 --- a/synapse/federation/transport/server.py +++ b/synapse/federation/transport/server.py @@ -19,6 +19,8 @@ import functools import logging import re +from twisted.internet.defer import maybeDeferred + import synapse import synapse.logging.opentracing as opentracing from synapse.api.errors import Codes, FederationDeniedError, SynapseError @@ -745,8 +747,12 @@ class PublicRoomList(BaseFederationServlet): else: network_tuple = ThirdPartyInstanceID(None, None) - data = await self.handler.get_local_public_room_list( - limit, since_token, network_tuple=network_tuple, from_federation=True + data = await maybeDeferred( + self.handler.get_local_public_room_list, + limit, + since_token, + network_tuple=network_tuple, + from_federation=True, ) return 200, data From 3ad24ab3865ab0e52bab7cbb7bb50f10c3cab7d8 Mon Sep 17 00:00:00 2001 From: "Olivier Wilkinson (reivilibre)" Date: Wed, 14 Aug 2019 14:09:31 +0100 Subject: [PATCH 028/110] Newsfile Signed-off-by: Olivier Wilkinson (reivilibre) --- changelog.d/5851.bugfix | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 changelog.d/5851.bugfix diff --git a/changelog.d/5851.bugfix b/changelog.d/5851.bugfix new file mode 100644 index 0000000000..6da40e6e38 --- /dev/null +++ b/changelog.d/5851.bugfix @@ -0,0 +1,2 @@ +Fixes 500 Internal Server Error on `publicRooms` when the public room list was +cached. \ No newline at end of file From d6de55bce967e89c7f8ffdbbe04ba655f969845c Mon Sep 17 00:00:00 2001 From: reivilibre <38398653+reivilibre@users.noreply.github.com> Date: Wed, 14 Aug 2019 14:48:18 +0100 Subject: [PATCH 029/110] Update changelog.d/5851.bugfix Use imperative Co-Authored-By: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> --- changelog.d/5851.bugfix | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/changelog.d/5851.bugfix b/changelog.d/5851.bugfix index 6da40e6e38..58f7c0c1b8 100644 --- a/changelog.d/5851.bugfix +++ b/changelog.d/5851.bugfix @@ -1,2 +1,2 @@ -Fixes 500 Internal Server Error on `publicRooms` when the public room list was -cached. \ No newline at end of file +Fix 500 Internal Server Error on `publicRooms` when the public room list was +cached. From b7f7cc7ace5977d6494581e64669a0bb68208cc1 Mon Sep 17 00:00:00 2001 From: Hubert Chathi Date: Wed, 14 Aug 2019 17:14:40 -0700 Subject: [PATCH 030/110] add the version field to the index for e2e_room_keys --- .../schema/delta/56/fix_room_keys_index.sql | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 synapse/storage/schema/delta/56/fix_room_keys_index.sql diff --git a/synapse/storage/schema/delta/56/fix_room_keys_index.sql b/synapse/storage/schema/delta/56/fix_room_keys_index.sql new file mode 100644 index 0000000000..014cb3b538 --- /dev/null +++ b/synapse/storage/schema/delta/56/fix_room_keys_index.sql @@ -0,0 +1,18 @@ +/* Copyright 2019 Matrix.org Foundation CIC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +-- version is supposed to be part of the room keys index +CREATE UNIQUE INDEX e2e_room_keys_with_version_idx ON e2e_room_keys(user_id, version, room_id, session_id); +DROP INDEX IF EXISTS e2e_room_keys_idx; From 81b8080acd9796e8a01ed5e7259eb2115bd0eb52 Mon Sep 17 00:00:00 2001 From: Hubert Chathi Date: Wed, 14 Aug 2019 17:53:33 -0700 Subject: [PATCH 031/110] add changelog --- changelog.d/5857.bugfix | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/5857.bugfix diff --git a/changelog.d/5857.bugfix b/changelog.d/5857.bugfix new file mode 100644 index 0000000000..bfeae6a6f2 --- /dev/null +++ b/changelog.d/5857.bugfix @@ -0,0 +1 @@ +Add missing version field to e2e_room_keys database index. \ No newline at end of file From c058aeb88da3e4a4fb414183e442c1a832470398 Mon Sep 17 00:00:00 2001 From: Hubert Chathi Date: Wed, 14 Aug 2019 18:02:58 -0700 Subject: [PATCH 032/110] update set_e2e_room_key to agree with fixed index --- synapse/storage/e2e_room_keys.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/storage/e2e_room_keys.py b/synapse/storage/e2e_room_keys.py index 99128f2df7..b1901404af 100644 --- a/synapse/storage/e2e_room_keys.py +++ b/synapse/storage/e2e_room_keys.py @@ -82,11 +82,11 @@ class EndToEndRoomKeyStore(SQLBaseStore): table="e2e_room_keys", keyvalues={ "user_id": user_id, + "version": version, "room_id": room_id, "session_id": session_id, }, values={ - "version": version, "first_message_index": room_key["first_message_index"], "forwarded_count": room_key["forwarded_count"], "is_verified": room_key["is_verified"], From e6e136decca12648933f974e4151fb936ad9e1fa Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 13 Aug 2019 18:04:46 +0100 Subject: [PATCH 033/110] Retry well known on fail. If we have recently seen a valid well-known for a domain we want to retry on (non-final) errors a few times, to handle temporary blips in networking/etc. --- .../federation/matrix_federation_agent.py | 26 ++-- .../http/federation/well_known_resolver.py | 122 ++++++++++++++---- .../test_matrix_federation_agent.py | 79 +++++++----- 3 files changed, 160 insertions(+), 67 deletions(-) diff --git a/synapse/http/federation/matrix_federation_agent.py b/synapse/http/federation/matrix_federation_agent.py index 71a15f434d..64f62aaeec 100644 --- a/synapse/http/federation/matrix_federation_agent.py +++ b/synapse/http/federation/matrix_federation_agent.py @@ -51,9 +51,9 @@ class MatrixFederationAgent(object): SRVResolver impl to use for looking up SRV records. None to use a default implementation. - _well_known_cache (TTLCache|None): - TTLCache impl for storing cached well-known lookups. None to use a default - implementation. + _well_known_resolver (WellKnownResolver|None): + WellKnownResolver to use to perform well-known lookups. None to use a + default implementation. """ def __init__( @@ -61,7 +61,7 @@ class MatrixFederationAgent(object): reactor, tls_client_options_factory, _srv_resolver=None, - _well_known_cache=None, + _well_known_resolver=None, ): self._reactor = reactor self._clock = Clock(reactor) @@ -76,15 +76,17 @@ class MatrixFederationAgent(object): self._pool.maxPersistentPerHost = 5 self._pool.cachedConnectionTimeout = 2 * 60 - self._well_known_resolver = WellKnownResolver( - self._reactor, - agent=Agent( + if _well_known_resolver is None: + _well_known_resolver = WellKnownResolver( self._reactor, - pool=self._pool, - contextFactory=tls_client_options_factory, - ), - well_known_cache=_well_known_cache, - ) + agent=Agent( + self._reactor, + pool=self._pool, + contextFactory=tls_client_options_factory, + ), + ) + + self._well_known_resolver = _well_known_resolver @defer.inlineCallbacks def request(self, method, uri, headers=None, bodyProducer=None): diff --git a/synapse/http/federation/well_known_resolver.py b/synapse/http/federation/well_known_resolver.py index bb250c6922..d59864e298 100644 --- a/synapse/http/federation/well_known_resolver.py +++ b/synapse/http/federation/well_known_resolver.py @@ -38,6 +38,13 @@ WELL_KNOWN_DEFAULT_CACHE_PERIOD_JITTER = 10 * 60 # period to cache failure to fetch .well-known for WELL_KNOWN_INVALID_CACHE_PERIOD = 1 * 3600 +# period to cache failure to fetch .well-known if there has recently been a +# valid well-known for that domain. +WELL_KNOWN_DOWN_CACHE_PERIOD = 2 * 60 + +# period to remember there was a valid well-known after valid record expires +WELL_KNOWN_REMEMBER_DOMAIN_HAD_VALID = 2 * 3600 + # cap for .well-known cache period WELL_KNOWN_MAX_CACHE_PERIOD = 48 * 3600 @@ -49,11 +56,16 @@ WELL_KNOWN_MIN_CACHE_PERIOD = 5 * 60 # we'll start trying to refetch 1 minute before it expires. WELL_KNOWN_GRACE_PERIOD_FACTOR = 0.2 +# Number of times we retry fetching a well-known for a domain we know recently +# had a valid entry. +WELL_KNOWN_RETRY_ATTEMPTS = 3 + logger = logging.getLogger(__name__) _well_known_cache = TTLCache("well-known") +_had_valid_well_known_cache = TTLCache("had-valid-well-known") @attr.s(slots=True, frozen=True) @@ -65,14 +77,20 @@ class WellKnownResolver(object): """Handles well-known lookups for matrix servers. """ - def __init__(self, reactor, agent, well_known_cache=None): + def __init__( + self, reactor, agent, well_known_cache=None, had_well_known_cache=None + ): self._reactor = reactor self._clock = Clock(reactor) if well_known_cache is None: well_known_cache = _well_known_cache + if had_well_known_cache is None: + had_well_known_cache = _had_valid_well_known_cache + self._well_known_cache = well_known_cache + self._had_valid_well_known_cache = had_well_known_cache self._well_known_agent = RedirectAgent(agent) @defer.inlineCallbacks @@ -100,7 +118,7 @@ class WellKnownResolver(object): # requests for the same server in parallel? try: with Measure(self._clock, "get_well_known"): - result, cache_period = yield self._do_get_well_known(server_name) + result, cache_period = yield self._fetch_well_known(server_name) except _FetchWellKnownFailure as e: if prev_result and e.temporary: @@ -111,10 +129,20 @@ class WellKnownResolver(object): result = None - # add some randomness to the TTL to avoid a stampeding herd every hour - # after startup - cache_period = WELL_KNOWN_INVALID_CACHE_PERIOD - cache_period += random.uniform(0, WELL_KNOWN_DEFAULT_CACHE_PERIOD_JITTER) + if self._had_valid_well_known_cache.get(server_name, False): + # We have recently seen a valid well-known record for this + # server, so we cache the lack of well-known for a shorter time. + cache_period = WELL_KNOWN_DOWN_CACHE_PERIOD + cache_period += random.uniform( + 0, WELL_KNOWN_DEFAULT_CACHE_PERIOD_JITTER + ) + else: + # add some randomness to the TTL to avoid a stampeding herd every hour + # after startup + cache_period = WELL_KNOWN_INVALID_CACHE_PERIOD + cache_period += random.uniform( + 0, WELL_KNOWN_DEFAULT_CACHE_PERIOD_JITTER + ) if cache_period > 0: self._well_known_cache.set(server_name, result, cache_period) @@ -122,7 +150,7 @@ class WellKnownResolver(object): return WellKnownLookupResult(delegated_server=result) @defer.inlineCallbacks - def _do_get_well_known(self, server_name): + def _fetch_well_known(self, server_name): """Actually fetch and parse a .well-known, without checking the cache Args: @@ -134,24 +162,17 @@ class WellKnownResolver(object): Returns: Deferred[Tuple[bytes,int]]: The lookup result and cache period. """ - uri = b"https://%s/.well-known/matrix/server" % (server_name,) - uri_str = uri.decode("ascii") - logger.info("Fetching %s", uri_str) + + had_valid_well_known = bool( + self._had_valid_well_known_cache.get(server_name, False) + ) # We do this in two steps to differentiate between possibly transient # errors (e.g. can't connect to host, 503 response) and more permenant # errors (such as getting a 404 response). - try: - response = yield make_deferred_yieldable( - self._well_known_agent.request(b"GET", uri) - ) - body = yield make_deferred_yieldable(readBody(response)) - - if 500 <= response.code < 600: - raise Exception("Non-200 response %s" % (response.code,)) - except Exception as e: - logger.info("Error fetching %s: %s", uri_str, e) - raise _FetchWellKnownFailure(temporary=True) + response, body = yield self._make_well_known_request( + server_name, retry=had_valid_well_known + ) try: if response.code != 200: @@ -161,8 +182,11 @@ class WellKnownResolver(object): logger.info("Response from .well-known: %s", parsed_body) result = parsed_body["m.server"].encode("ascii") + except defer.CancelledError: + # Bail if we've been cancelled + raise except Exception as e: - logger.info("Error fetching %s: %s", uri_str, e) + logger.info("Error parsing well-known for %s: %s", server_name, e) raise _FetchWellKnownFailure(temporary=False) cache_period = _cache_period_from_headers( @@ -177,8 +201,62 @@ class WellKnownResolver(object): cache_period = min(cache_period, WELL_KNOWN_MAX_CACHE_PERIOD) cache_period = max(cache_period, WELL_KNOWN_MIN_CACHE_PERIOD) + # We got a success, mark as such in the cache + self._had_valid_well_known_cache.set( + server_name, + bool(result), + cache_period + WELL_KNOWN_REMEMBER_DOMAIN_HAD_VALID, + ) + return (result, cache_period) + @defer.inlineCallbacks + def _make_well_known_request(self, server_name, retry): + """Make the well known request. + + This will retry the request if requested and it fails (with unable + to connect or receives a 5xx error). + + Args: + server_name (bytes) + retry (bool): Whether to retry the request if it fails. + + Returns: + Deferred[tuple[IResponse, bytes]] Returns the response object and + body. Response may be a non-200 response. + """ + uri = b"https://%s/.well-known/matrix/server" % (server_name,) + uri_str = uri.decode("ascii") + + i = 0 + while True: + i += 1 + + logger.info("Fetching %s", uri_str) + try: + response = yield make_deferred_yieldable( + self._well_known_agent.request(b"GET", uri) + ) + body = yield make_deferred_yieldable(readBody(response)) + + if 500 <= response.code < 600: + raise Exception("Non-200 response %s" % (response.code,)) + + return response, body + except defer.CancelledError: + # Bail if we've been cancelled + raise + except Exception as e: + logger.info("Retry: %s", retry) + if not retry or i >= WELL_KNOWN_RETRY_ATTEMPTS: + logger.info("Error fetching %s: %s", uri_str, e) + raise _FetchWellKnownFailure(temporary=True) + + logger.info("Error fetching %s: %s. Retrying", uri_str, e) + + # Sleep briefly in the hopes that they come back up + yield self._clock.sleep(0.5) + def _cache_period_from_headers(headers, time_now=time.time): cache_controls = _parse_cache_control(headers) diff --git a/tests/http/federation/test_matrix_federation_agent.py b/tests/http/federation/test_matrix_federation_agent.py index 2c568788b3..4d3f31d18c 100644 --- a/tests/http/federation/test_matrix_federation_agent.py +++ b/tests/http/federation/test_matrix_federation_agent.py @@ -73,8 +73,6 @@ class MatrixFederationAgentTests(TestCase): self.mock_resolver = Mock() - self.well_known_cache = TTLCache("test_cache", timer=self.reactor.seconds) - config_dict = default_config("test", parse=False) config_dict["federation_custom_ca_list"] = [get_test_ca_cert_file()] @@ -82,11 +80,21 @@ class MatrixFederationAgentTests(TestCase): config.parse_config_dict(config_dict, "", "") self.tls_factory = ClientTLSOptionsFactory(config) + + self.well_known_cache = TTLCache("test_cache", timer=self.reactor.seconds) + self.had_well_known_cache = TTLCache("test_cache", timer=self.reactor.seconds) + self.well_known_resolver = WellKnownResolver( + self.reactor, + Agent(self.reactor, contextFactory=self.tls_factory), + well_known_cache=self.well_known_cache, + had_well_known_cache=self.had_well_known_cache, + ) + self.agent = MatrixFederationAgent( reactor=self.reactor, tls_client_options_factory=self.tls_factory, _srv_resolver=self.mock_resolver, - _well_known_cache=self.well_known_cache, + _well_known_resolver=self.well_known_resolver, ) def _make_connection(self, client_factory, expected_sni): @@ -701,11 +709,18 @@ class MatrixFederationAgentTests(TestCase): config = default_config("test", parse=True) + # Build a new agent and WellKnownResolver with a different tls factory + tls_factory = ClientTLSOptionsFactory(config) agent = MatrixFederationAgent( reactor=self.reactor, - tls_client_options_factory=ClientTLSOptionsFactory(config), + tls_client_options_factory=tls_factory, _srv_resolver=self.mock_resolver, - _well_known_cache=self.well_known_cache, + _well_known_resolver=WellKnownResolver( + self.reactor, + Agent(self.reactor, contextFactory=tls_factory), + well_known_cache=self.well_known_cache, + had_well_known_cache=self.had_well_known_cache, + ), ) test_d = agent.request(b"GET", b"matrix://testserv/foo/bar") @@ -932,15 +947,9 @@ class MatrixFederationAgentTests(TestCase): self.successResultOf(test_d) def test_well_known_cache(self): - well_known_resolver = WellKnownResolver( - self.reactor, - Agent(self.reactor, contextFactory=self.tls_factory), - well_known_cache=self.well_known_cache, - ) - self.reactor.lookups["testserv"] = "1.2.3.4" - fetch_d = well_known_resolver.get_well_known(b"testserv") + fetch_d = self.well_known_resolver.get_well_known(b"testserv") # there should be an attempt to connect on port 443 for the .well-known clients = self.reactor.tcpClients @@ -963,7 +972,7 @@ class MatrixFederationAgentTests(TestCase): well_known_server.loseConnection() # repeat the request: it should hit the cache - fetch_d = well_known_resolver.get_well_known(b"testserv") + fetch_d = self.well_known_resolver.get_well_known(b"testserv") r = self.successResultOf(fetch_d) self.assertEqual(r.delegated_server, b"target-server") @@ -971,7 +980,7 @@ class MatrixFederationAgentTests(TestCase): self.reactor.pump((1000.0,)) # now it should connect again - fetch_d = well_known_resolver.get_well_known(b"testserv") + fetch_d = self.well_known_resolver.get_well_known(b"testserv") self.assertEqual(len(clients), 1) (host, port, client_factory, _timeout, _bindAddress) = clients.pop(0) @@ -992,15 +1001,9 @@ class MatrixFederationAgentTests(TestCase): it ignores transient errors. """ - well_known_resolver = WellKnownResolver( - self.reactor, - Agent(self.reactor, contextFactory=self.tls_factory), - well_known_cache=self.well_known_cache, - ) - self.reactor.lookups["testserv"] = "1.2.3.4" - fetch_d = well_known_resolver.get_well_known(b"testserv") + fetch_d = self.well_known_resolver.get_well_known(b"testserv") # there should be an attempt to connect on port 443 for the .well-known clients = self.reactor.tcpClients @@ -1026,27 +1029,37 @@ class MatrixFederationAgentTests(TestCase): # another lookup. self.reactor.pump((900.0,)) - fetch_d = well_known_resolver.get_well_known(b"testserv") - clients = self.reactor.tcpClients - (host, port, client_factory, _timeout, _bindAddress) = clients.pop(0) + fetch_d = self.well_known_resolver.get_well_known(b"testserv") - # fonx the connection attempt, this will be treated as a temporary - # failure. - client_factory.clientConnectionFailed(None, Exception("nope")) + # The resolver may retry a few times, so fonx all requests that come along + attempts = 0 + while self.reactor.tcpClients: + clients = self.reactor.tcpClients + (host, port, client_factory, _timeout, _bindAddress) = clients.pop(0) - # attemptdelay on the hostnameendpoint is 0.3, so takes that long before the - # .well-known request fails. - self.reactor.pump((0.4,)) + attempts += 1 + + # fonx the connection attempt, this will be treated as a temporary + # failure. + client_factory.clientConnectionFailed(None, Exception("nope")) + + # There's a few sleeps involved, so we have to pump the reactor a + # bit. + self.reactor.pump((1.0, 1.0)) + + # We expect to see more than one attempt as there was previously a valid + # well known. + self.assertGreater(attempts, 1) # Resolver should return cached value, despite the lookup failing. r = self.successResultOf(fetch_d) self.assertEqual(r.delegated_server, b"target-server") - # Expire the cache and repeat the request - self.reactor.pump((100.0,)) + # Expire both caches and repeat the request + self.reactor.pump((10000.0,)) # Repated the request, this time it should fail if the lookup fails. - fetch_d = well_known_resolver.get_well_known(b"testserv") + fetch_d = self.well_known_resolver.get_well_known(b"testserv") clients = self.reactor.tcpClients (host, port, client_factory, _timeout, _bindAddress) = clients.pop(0) From 1771f0045d035b8057ba8766ebd5deab230725d3 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 14 Aug 2019 10:54:26 +0100 Subject: [PATCH 034/110] Newsfile --- changelog.d/5850.misc | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/5850.misc diff --git a/changelog.d/5850.misc b/changelog.d/5850.misc new file mode 100644 index 0000000000..c4f879ca2f --- /dev/null +++ b/changelog.d/5850.misc @@ -0,0 +1 @@ +Retry well-known lookups if we have recently seen a valid well-known record for the server. From baee288fb462d4a29ea953261d83312129b1d487 Mon Sep 17 00:00:00 2001 From: Michael Telatynski <7t3chguy@gmail.com> Date: Thu, 15 Aug 2019 09:45:57 +0100 Subject: [PATCH 035/110] Don't create broken room when power_level_content_override.users does not contain creator_id. (#5633) --- changelog.d/5633.bugfix | 1 + synapse/handlers/room.py | 14 +++++++++++++- 2 files changed, 14 insertions(+), 1 deletion(-) create mode 100644 changelog.d/5633.bugfix diff --git a/changelog.d/5633.bugfix b/changelog.d/5633.bugfix new file mode 100644 index 0000000000..b2ff803b9d --- /dev/null +++ b/changelog.d/5633.bugfix @@ -0,0 +1 @@ +Don't create broken room when power_level_content_override.users does not contain creator_id. \ No newline at end of file diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py index 5caa90c3b7..6e47fe7867 100644 --- a/synapse/handlers/room.py +++ b/synapse/handlers/room.py @@ -560,6 +560,18 @@ class RoomCreationHandler(BaseHandler): yield self.event_creation_handler.assert_accepted_privacy_policy(requester) + power_level_content_override = config.get("power_level_content_override") + if ( + power_level_content_override + and "users" in power_level_content_override + and user_id not in power_level_content_override["users"] + ): + raise SynapseError( + 400, + "Not a valid power_level_content_override: 'users' did not contain %s" + % (user_id,), + ) + invite_3pid_list = config.get("invite_3pid", []) visibility = config.get("visibility", None) @@ -604,7 +616,7 @@ class RoomCreationHandler(BaseHandler): initial_state=initial_state, creation_content=creation_content, room_alias=room_alias, - power_level_content_override=config.get("power_level_content_override"), + power_level_content_override=power_level_content_override, creator_join_profile=creator_join_profile, ) From 6fadb560fcdc92466b0b5cac02551cb41ca9f148 Mon Sep 17 00:00:00 2001 From: "Olivier Wilkinson (reivilibre)" Date: Wed, 14 Aug 2019 13:30:36 +0100 Subject: [PATCH 036/110] Support MSC2197 outbound with unstable prefix Signed-off-by: Olivier Wilkinson (reivilibre) --- synapse/federation/transport/client.py | 40 ++++++++++++++++++-------- synapse/handlers/room_list.py | 29 ++++++++++++++++++- 2 files changed, 56 insertions(+), 13 deletions(-) diff --git a/synapse/federation/transport/client.py b/synapse/federation/transport/client.py index 0cea0d2a10..2e99f77eb1 100644 --- a/synapse/federation/transport/client.py +++ b/synapse/federation/transport/client.py @@ -327,21 +327,37 @@ class TransportLayerClient(object): include_all_networks=False, third_party_instance_id=None, ): - path = _create_v1_path("/publicRooms") + if search_filter: + # TODO(MSC2197): Move to V1 prefix + path = _create_path(FEDERATION_UNSTABLE_PREFIX, "/publicRooms") - args = {"include_all_networks": "true" if include_all_networks else "false"} - if third_party_instance_id: - args["third_party_instance_id"] = (third_party_instance_id,) - if limit: - args["limit"] = [str(limit)] - if since_token: - args["since"] = [since_token] + data = {"include_all_networks": "true" if include_all_networks else "false"} + if third_party_instance_id: + data["third_party_instance_id"] = third_party_instance_id + if limit: + data["limit"] = str(limit) + if since_token: + data["since"] = since_token - # TODO(erikj): Actually send the search_filter across federation. + data["filter"] = search_filter - response = yield self.client.get_json( - destination=remote_server, path=path, args=args, ignore_backoff=True - ) + response = yield self.client.post_json( + destination=remote_server, path=path, data=data, ignore_backoff=True + ) + else: + path = _create_v1_path("/publicRooms") + + args = {"include_all_networks": "true" if include_all_networks else "false"} + if third_party_instance_id: + args["third_party_instance_id"] = (third_party_instance_id,) + if limit: + args["limit"] = [str(limit)] + if since_token: + args["since"] = [since_token] + + response = yield self.client.get_json( + destination=remote_server, path=path, args=args, ignore_backoff=True + ) return response diff --git a/synapse/handlers/room_list.py b/synapse/handlers/room_list.py index e9094ad02b..a7e55f00e5 100644 --- a/synapse/handlers/room_list.py +++ b/synapse/handlers/room_list.py @@ -25,6 +25,7 @@ from unpaddedbase64 import decode_base64, encode_base64 from twisted.internet import defer from synapse.api.constants import EventTypes, JoinRules +from synapse.api.errors import Codes, HttpResponseException from synapse.types import ThirdPartyInstanceID from synapse.util.async_helpers import concurrently_execute from synapse.util.caches.descriptors import cachedInlineCallbacks @@ -485,7 +486,33 @@ class RoomListHandler(BaseHandler): return {"chunk": [], "total_room_count_estimate": 0} if search_filter: - # We currently don't support searching across federation, so we have + # Searching across federation is defined in MSC2197. + # However, the remote homeserver may or may not actually support it. + # So we first try an MSC2197 remote-filtered search, then fall back + # to a locally-filtered search if we must. + + try: + res = yield self._get_remote_list_cached( + server_name, + limit=limit, + since_token=since_token, + include_all_networks=include_all_networks, + third_party_instance_id=third_party_instance_id, + search_filter=search_filter, + ) + return res + except HttpResponseException as hre: + syn_err = hre.to_synapse_error() + if hre.code in (404, 405) or syn_err.errcode in ( + Codes.UNRECOGNIZED, + Codes.NOT_FOUND, + ): + logger.debug("Falling back to locally-filtered /publicRooms") + else: + raise # Not an error that should trigger a fallback. + + # if we reach this point, then we fall back to the situation where + # we currently don't support searching across federation, so we have # to do it manually without pagination limit = None since_token = None From 2253b083d9b3cc0aba89aba98298214cf960acd7 Mon Sep 17 00:00:00 2001 From: "Olivier Wilkinson (reivilibre)" Date: Thu, 15 Aug 2019 11:06:21 +0100 Subject: [PATCH 037/110] Add support for inbound MSC2197 requests on unstable Federation API Signed-off-by: Olivier Wilkinson (reivilibre) --- synapse/federation/transport/server.py | 60 +++++++++++++++++++++++++- 1 file changed, 59 insertions(+), 1 deletion(-) diff --git a/synapse/federation/transport/server.py b/synapse/federation/transport/server.py index ea4e1b6d0f..e17555c4cf 100644 --- a/synapse/federation/transport/server.py +++ b/synapse/federation/transport/server.py @@ -751,6 +751,64 @@ class PublicRoomList(BaseFederationServlet): return 200, data +class UnstablePublicRoomList(BaseFederationServlet): + """ + Fetch the public room list for this server. + + This API returns information in the same format as /publicRooms on the + client API, but will only ever include local public rooms and hence is + intended for consumption by other home servers. + + This is the unstable-prefixed version which adds support for MSC2197, which + is still undergoing review. + """ + + PATH = "/publicRooms" + PREFIX = FEDERATION_UNSTABLE_PREFIX + + def __init__(self, handler, authenticator, ratelimiter, server_name, allow_access): + super(UnstablePublicRoomList, self).__init__( + handler, authenticator, ratelimiter, server_name + ) + self.allow_access = allow_access + + # TODO(MSC2197): Move away from Unstable prefix and back to normal prefix + async def on_POST(self, origin, content, query): + if not self.allow_access: + raise FederationDeniedError(origin) + + limit = int(content.get("limit", 100)) + since_token = content.get("since", None) + search_filter = content.get("filter", None) + + include_all_networks = content.get("include_all_networks", False) + third_party_instance_id = content.get("third_party_instance_id", None) + + if include_all_networks: + network_tuple = None + if third_party_instance_id is not None: + raise SynapseError( + 400, "Can't use include_all_networks with an explicit network" + ) + elif third_party_instance_id is None: + network_tuple = ThirdPartyInstanceID(None, None) + else: + network_tuple = ThirdPartyInstanceID.from_string(third_party_instance_id) + + if search_filter is None: + logger.warning("Nonefilter") + + data = await self.handler.get_local_public_room_list( + limit=limit, + since_token=since_token, + search_filter=search_filter, + network_tuple=network_tuple, + from_federation=True, + ) + + return 200, data + + class FederationVersionServlet(BaseFederationServlet): PATH = "/version" @@ -1315,7 +1373,7 @@ FEDERATION_SERVLET_CLASSES = ( OPENID_SERVLET_CLASSES = (OpenIdUserInfo,) -ROOM_LIST_CLASSES = (PublicRoomList,) +ROOM_LIST_CLASSES = (PublicRoomList, UnstablePublicRoomList) GROUP_SERVER_SERVLET_CLASSES = ( FederationGroupsProfileServlet, From a3df04a899f9feccf145e167e7ddc5228fa927e2 Mon Sep 17 00:00:00 2001 From: "Olivier Wilkinson (reivilibre)" Date: Thu, 15 Aug 2019 11:08:54 +0100 Subject: [PATCH 038/110] Newsfile Signed-off-by: Olivier Wilkinson (reivilibre) --- changelog.d/5859.feature | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 changelog.d/5859.feature diff --git a/changelog.d/5859.feature b/changelog.d/5859.feature new file mode 100644 index 0000000000..c897c66037 --- /dev/null +++ b/changelog.d/5859.feature @@ -0,0 +1,2 @@ +Add unstable support for MSC2197 (filtered search requests over federation), in +order to allow upcoming room directory query performance improvements. From 8cf7fbbce089d03d01e69fa52f035904d929a3bf Mon Sep 17 00:00:00 2001 From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> Date: Thu, 15 Aug 2019 11:32:23 +0100 Subject: [PATCH 039/110] Remove libsqlite3-dev from required build dependencies. (#5766) --- changelog.d/5766.misc | 1 + debian/changelog | 7 +++---- debian/control | 1 - 3 files changed, 4 insertions(+), 5 deletions(-) create mode 100644 changelog.d/5766.misc diff --git a/changelog.d/5766.misc b/changelog.d/5766.misc new file mode 100644 index 0000000000..163ca2f0d4 --- /dev/null +++ b/changelog.d/5766.misc @@ -0,0 +1 @@ +Remove libsqlite3-dev from required build dependencies. diff --git a/debian/changelog b/debian/changelog index 6634c1085a..55c28be853 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,8 +1,7 @@ -matrix-synapse-py3 (1.2.1) stable; urgency=medium +matrix-synapse-py3 (1.3.0) UNRELEASED; urgency=medium - * New synapse release 1.2.1. - - -- Synapse Packaging team Fri, 26 Jul 2019 11:32:47 +0100 + [ Andrew Morgan ] + * Remove libsqlite3-dev from required build dependencies. matrix-synapse-py3 (1.2.0) stable; urgency=medium diff --git a/debian/control b/debian/control index 9e679c9d42..bae14b41e4 100644 --- a/debian/control +++ b/debian/control @@ -15,7 +15,6 @@ Build-Depends: python3-setuptools, python3-pip, python3-venv, - libsqlite3-dev, tar, Standards-Version: 3.9.8 Homepage: https://github.com/matrix-org/synapse From 748aa38378887006da6e9bc5e7330dbc6a3fc692 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 15 Aug 2019 12:02:18 +0100 Subject: [PATCH 040/110] Remove logging for #5407 and update comments --- synapse/handlers/sync.py | 22 +++++----------------- 1 file changed, 5 insertions(+), 17 deletions(-) diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index 98da2318a0..ef7f2ca980 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -786,9 +786,8 @@ class SyncHandler(object): batch.events[0].event_id, state_filter=state_filter ) else: - # Its not clear how we get here, but empirically we do - # (#5407). Logging has been added elsewhere to try and - # figure out where this state comes from. + # We can get here if the user has ignored the senders of all + # the recent events. state_at_timeline_start = yield self.get_state_at( room_id, stream_position=now_token, state_filter=state_filter ) @@ -1771,20 +1770,9 @@ class SyncHandler(object): newly_joined_room=newly_joined, ) - if not batch and batch.limited: - # This resulted in #5407, which is weird, so lets log! We do it - # here as we have the maximum amount of information. - user_id = sync_result_builder.sync_config.user.to_string() - logger.info( - "Issue #5407: Found limited batch with no events. user %s, room %s," - " sync_config %s, newly_joined %s, events %s, batch %s.", - user_id, - room_id, - sync_config, - newly_joined, - events, - batch, - ) + # Note: `batch` can be both empty and limited here in the case where + # `_load_filtered_recents` can't find any events the user should see + # (e.g. due to having ignored the sender of the last 50 events). if newly_joined: # debug for https://github.com/matrix-org/synapse/issues/4422 From fb5acd703973249063f2a78266eed65c8dfaaf84 Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Thu, 15 Aug 2019 12:05:24 +0100 Subject: [PATCH 041/110] 1.3.0 --- CHANGES.md | 10 ++++++++++ changelog.d/5766.misc | 1 - changelog.d/5851.bugfix | 2 -- debian/changelog | 5 +++-- synapse/__init__.py | 2 +- 5 files changed, 14 insertions(+), 6 deletions(-) delete mode 100644 changelog.d/5766.misc delete mode 100644 changelog.d/5851.bugfix diff --git a/CHANGES.md b/CHANGES.md index eca9c82f55..d13dcb717e 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,3 +1,13 @@ +Synapse 1.3.0 (2019-08-15) +========================== + +Bugfixes +-------- + +- Fix 500 Internal Server Error on `publicRooms` when the public room list was + cached. ([\#5851](https://github.com/matrix-org/synapse/issues/5851)) + + Synapse 1.3.0rc1 (2019-08-13) ========================== diff --git a/changelog.d/5766.misc b/changelog.d/5766.misc deleted file mode 100644 index 163ca2f0d4..0000000000 --- a/changelog.d/5766.misc +++ /dev/null @@ -1 +0,0 @@ -Remove libsqlite3-dev from required build dependencies. diff --git a/changelog.d/5851.bugfix b/changelog.d/5851.bugfix deleted file mode 100644 index 58f7c0c1b8..0000000000 --- a/changelog.d/5851.bugfix +++ /dev/null @@ -1,2 +0,0 @@ -Fix 500 Internal Server Error on `publicRooms` when the public room list was -cached. diff --git a/debian/changelog b/debian/changelog index 55c28be853..83232a0bad 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,4 +1,4 @@ -matrix-synapse-py3 (1.3.0) UNRELEASED; urgency=medium +matrix-synapse-py3 (1.3.0) stable; urgency=medium [ Andrew Morgan ] * Remove libsqlite3-dev from required build dependencies. @@ -13,8 +13,9 @@ matrix-synapse-py3 (1.2.0) stable; urgency=medium [ Synapse Packaging team ] * New synapse release 1.2.0. + * New synapse release 1.3.0. - -- Synapse Packaging team Thu, 25 Jul 2019 14:10:07 +0100 + -- Synapse Packaging team Thu, 15 Aug 2019 12:04:23 +0100 matrix-synapse-py3 (1.1.0) stable; urgency=medium diff --git a/synapse/__init__.py b/synapse/__init__.py index d2316c7df9..02ae90b072 100644 --- a/synapse/__init__.py +++ b/synapse/__init__.py @@ -35,4 +35,4 @@ try: except ImportError: pass -__version__ = "1.3.0rc1" +__version__ = "1.3.0" From f299c5414c2dd300103b0e11e7114123d8eb58a1 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 8 Aug 2019 15:30:04 +0100 Subject: [PATCH 042/110] Refactor MatrixFederationAgent to retry SRV. This refactors MatrixFederationAgent to move the SRV lookup into the endpoint code, this has two benefits: 1. Its easier to retry different host/ports in the same way as HostnameEndpoint. 2. We avoid SRV lookups if we have a free connection in the pool --- .../federation/matrix_federation_agent.py | 370 +++++++++--------- synapse/http/federation/srv_resolver.py | 35 +- .../test_matrix_federation_agent.py | 63 ++- tests/http/federation/test_srv_resolver.py | 8 +- 4 files changed, 275 insertions(+), 201 deletions(-) diff --git a/synapse/http/federation/matrix_federation_agent.py b/synapse/http/federation/matrix_federation_agent.py index 71a15f434d..c208185791 100644 --- a/synapse/http/federation/matrix_federation_agent.py +++ b/synapse/http/federation/matrix_federation_agent.py @@ -14,21 +14,21 @@ # limitations under the License. import logging +import urllib -import attr -from netaddr import IPAddress +from netaddr import AddrFormatError, IPAddress from zope.interface import implementer from twisted.internet import defer from twisted.internet.endpoints import HostnameEndpoint, wrapClientTLS from twisted.internet.interfaces import IStreamClientEndpoint -from twisted.web.client import URI, Agent, HTTPConnectionPool +from twisted.web.client import Agent, HTTPConnectionPool from twisted.web.http_headers import Headers -from twisted.web.iweb import IAgent +from twisted.web.iweb import IAgent, IAgentEndpointFactory -from synapse.http.federation.srv_resolver import SrvResolver, pick_server_from_list +from synapse.http.federation.srv_resolver import Server, SrvResolver from synapse.http.federation.well_known_resolver import WellKnownResolver -from synapse.logging.context import make_deferred_yieldable +from synapse.logging.context import PreserveLoggingContext, make_deferred_yieldable from synapse.util import Clock logger = logging.getLogger(__name__) @@ -36,8 +36,9 @@ logger = logging.getLogger(__name__) @implementer(IAgent) class MatrixFederationAgent(object): - """An Agent-like thing which provides a `request` method which will look up a matrix - server and send an HTTP request to it. + """An Agent-like thing which provides a `request` method which correctly + handles resolving matrix server names when using matrix://. Handles standard + https URIs as normal. Doesn't implement any retries. (Those are done in MatrixFederationHttpClient.) @@ -65,23 +66,25 @@ class MatrixFederationAgent(object): ): self._reactor = reactor self._clock = Clock(reactor) - - self._tls_client_options_factory = tls_client_options_factory - if _srv_resolver is None: - _srv_resolver = SrvResolver() - self._srv_resolver = _srv_resolver - self._pool = HTTPConnectionPool(reactor) self._pool.retryAutomatically = False self._pool.maxPersistentPerHost = 5 self._pool.cachedConnectionTimeout = 2 * 60 + self._agent = Agent.usingEndpointFactory( + self._reactor, + MatrixHostnameEndpointFactory( + reactor, tls_client_options_factory, _srv_resolver + ), + pool=self._pool, + ) + self._well_known_resolver = WellKnownResolver( self._reactor, agent=Agent( self._reactor, - pool=self._pool, contextFactory=tls_client_options_factory, + pool=self._pool, ), well_known_cache=_well_known_cache, ) @@ -91,19 +94,15 @@ class MatrixFederationAgent(object): """ Args: method (bytes): HTTP method: GET/POST/etc - uri (bytes): Absolute URI to be retrieved - headers (twisted.web.http_headers.Headers|None): HTTP headers to send with the request, or None to send no extra headers. - bodyProducer (twisted.web.iweb.IBodyProducer|None): An object which can generate bytes to make up the body of this request (for example, the properly encoded contents of a file for a file upload). Or None if the request is to have no body. - Returns: Deferred[twisted.web.iweb.IResponse]: fires when the header of the response has been received (regardless of the @@ -111,210 +110,195 @@ class MatrixFederationAgent(object): response from being received (including problems that prevent the request from being sent). """ - parsed_uri = URI.fromBytes(uri, defaultPort=-1) - res = yield self._route_matrix_uri(parsed_uri) + # We use urlparse as that will set `port` to None if there is no + # explicit port. + parsed_uri = urllib.parse.urlparse(uri) - # set up the TLS connection params + # If this is a matrix:// URI check if the server has delegated matrix + # traffic using well-known delegation. # - # XXX disabling TLS is really only supported here for the benefit of the - # unit tests. We should make the UTs cope with TLS rather than having to make - # the code support the unit tests. - if self._tls_client_options_factory is None: - tls_options = None - else: - tls_options = self._tls_client_options_factory.get_options( - res.tls_server_name.decode("ascii") + # We have to do this here and not in the endpoint as we need to rewrite + # the host header with the delegated server name. + delegated_server = None + if ( + parsed_uri.scheme == b"matrix" + and not _is_ip_literal(parsed_uri.hostname) + and not parsed_uri.port + ): + well_known_result = yield self._well_known_resolver.get_well_known( + parsed_uri.hostname ) + delegated_server = well_known_result.delegated_server - # make sure that the Host header is set correctly + if delegated_server: + # Ok, the server has delegated matrix traffic to somewhere else, so + # lets rewrite the URL to replace the server with the delegated + # server name. + uri = urllib.parse.urlunparse( + ( + parsed_uri.scheme, + delegated_server, + parsed_uri.path, + parsed_uri.params, + parsed_uri.query, + parsed_uri.fragment, + ) + ) + parsed_uri = urllib.parse.urlparse(uri) + + # We need to make sure the host header is set to the netloc of the + # server. if headers is None: headers = Headers() else: headers = headers.copy() if not headers.hasHeader(b"host"): - headers.addRawHeader(b"host", res.host_header) + headers.addRawHeader(b"host", parsed_uri.netloc) - class EndpointFactory(object): - @staticmethod - def endpointForURI(_uri): - ep = LoggingHostnameEndpoint( - self._reactor, res.target_host, res.target_port - ) - if tls_options is not None: - ep = wrapClientTLS(tls_options, ep) - return ep + with PreserveLoggingContext(): + res = yield self._agent.request(method, uri, headers, bodyProducer) - agent = Agent.usingEndpointFactory(self._reactor, EndpointFactory(), self._pool) - res = yield make_deferred_yieldable( - agent.request(method, uri, headers, bodyProducer) - ) return res - @defer.inlineCallbacks - def _route_matrix_uri(self, parsed_uri, lookup_well_known=True): - """Helper for `request`: determine the routing for a Matrix URI - Args: - parsed_uri (twisted.web.client.URI): uri to route. Note that it should be - parsed with URI.fromBytes(uri, defaultPort=-1) to set the `port` to -1 - if there is no explicit port given. +@implementer(IAgentEndpointFactory) +class MatrixHostnameEndpointFactory(object): + """Factory for MatrixHostnameEndpoint for parsing to an Agent. + """ - lookup_well_known (bool): True if we should look up the .well-known file if - there is no SRV record. + def __init__(self, reactor, tls_client_options_factory, srv_resolver): + self._reactor = reactor + self._tls_client_options_factory = tls_client_options_factory - Returns: - Deferred[_RoutingResult] - """ - # check for an IP literal - try: - ip_address = IPAddress(parsed_uri.host.decode("ascii")) - except Exception: - # not an IP address - ip_address = None + if srv_resolver is None: + srv_resolver = SrvResolver() - if ip_address: - port = parsed_uri.port - if port == -1: - port = 8448 - return _RoutingResult( - host_header=parsed_uri.netloc, - tls_server_name=parsed_uri.host, - target_host=parsed_uri.host, - target_port=port, - ) + self._srv_resolver = srv_resolver - if parsed_uri.port != -1: - # there is an explicit port - return _RoutingResult( - host_header=parsed_uri.netloc, - tls_server_name=parsed_uri.host, - target_host=parsed_uri.host, - target_port=parsed_uri.port, - ) - - if lookup_well_known: - # try a .well-known lookup - well_known_result = yield self._well_known_resolver.get_well_known( - parsed_uri.host - ) - well_known_server = well_known_result.delegated_server - - if well_known_server: - # if we found a .well-known, start again, but don't do another - # .well-known lookup. - - # parse the server name in the .well-known response into host/port. - # (This code is lifted from twisted.web.client.URI.fromBytes). - if b":" in well_known_server: - well_known_host, well_known_port = well_known_server.rsplit(b":", 1) - try: - well_known_port = int(well_known_port) - except ValueError: - # the part after the colon could not be parsed as an int - # - we assume it is an IPv6 literal with no port (the closing - # ']' stops it being parsed as an int) - well_known_host, well_known_port = well_known_server, -1 - else: - well_known_host, well_known_port = well_known_server, -1 - - new_uri = URI( - scheme=parsed_uri.scheme, - netloc=well_known_server, - host=well_known_host, - port=well_known_port, - path=parsed_uri.path, - params=parsed_uri.params, - query=parsed_uri.query, - fragment=parsed_uri.fragment, - ) - - res = yield self._route_matrix_uri(new_uri, lookup_well_known=False) - return res - - # try a SRV lookup - service_name = b"_matrix._tcp.%s" % (parsed_uri.host,) - server_list = yield self._srv_resolver.resolve_service(service_name) - - if not server_list: - target_host = parsed_uri.host - port = 8448 - logger.debug( - "No SRV record for %s, using %s:%i", - parsed_uri.host.decode("ascii"), - target_host.decode("ascii"), - port, - ) - else: - target_host, port = pick_server_from_list(server_list) - logger.debug( - "Picked %s:%i from SRV records for %s", - target_host.decode("ascii"), - port, - parsed_uri.host.decode("ascii"), - ) - - return _RoutingResult( - host_header=parsed_uri.netloc, - tls_server_name=parsed_uri.host, - target_host=target_host, - target_port=port, + def endpointForURI(self, parsed_uri): + return MatrixHostnameEndpoint( + self._reactor, + self._tls_client_options_factory, + self._srv_resolver, + parsed_uri, ) @implementer(IStreamClientEndpoint) -class LoggingHostnameEndpoint(object): - """A wrapper for HostnameEndpint which logs when it connects""" +class MatrixHostnameEndpoint(object): + """An endpoint that resolves matrix:// URLs using Matrix server name + resolution (i.e. via SRV). Does not check for well-known delegation. + """ - def __init__(self, reactor, host, port, *args, **kwargs): - self.host = host - self.port = port - self.ep = HostnameEndpoint(reactor, host, port, *args, **kwargs) + def __init__(self, reactor, tls_client_options_factory, srv_resolver, parsed_uri): + self._reactor = reactor + # We reparse the URI so that defaultPort is -1 rather than 80 + self._parsed_uri = parsed_uri + + # set up the TLS connection params + # + # XXX disabling TLS is really only supported here for the benefit of the + # unit tests. We should make the UTs cope with TLS rather than having to make + # the code support the unit tests. + + if tls_client_options_factory is None: + self._tls_options = None + else: + self._tls_options = tls_client_options_factory.get_options( + self._parsed_uri.host.decode("ascii") + ) + + self._srv_resolver = srv_resolver + + @defer.inlineCallbacks def connect(self, protocol_factory): - logger.info("Connecting to %s:%i", self.host.decode("ascii"), self.port) - return self.ep.connect(protocol_factory) + """Implements IStreamClientEndpoint interface + """ + + first_exception = None + + server_list = yield self._resolve_server() + + for server in server_list: + host = server.host + port = server.port + + try: + logger.info("Connecting to %s:%i", host.decode("ascii"), port) + endpoint = HostnameEndpoint(self._reactor, host, port) + if self._tls_options: + endpoint = wrapClientTLS(self._tls_options, endpoint) + result = yield make_deferred_yieldable( + endpoint.connect(protocol_factory) + ) + + return result + except Exception as e: + logger.info( + "Failed to connect to %s:%i: %s", host.decode("ascii"), port, e + ) + if not first_exception: + first_exception = e + + # We return the first failure because that's probably the most interesting. + if first_exception: + raise first_exception + + # This shouldn't happen as we should always have at least one host/port + # to try and if that doesn't work then we'll have an exception. + raise Exception("Failed to resolve server %r" % (self._parsed_uri.netloc,)) + + @defer.inlineCallbacks + def _resolve_server(self): + """Resolves the server name to a list of hosts and ports to attempt to + connect to. + + Returns: + Deferred[list[Server]] + """ + + if self._parsed_uri.scheme != b"matrix": + return [Server(host=self._parsed_uri.host, port=self._parsed_uri.port)] + + # Note: We don't do well-known lookup as that needs to have happened + # before now, due to needing to rewrite the Host header of the HTTP + # request. + + parsed_uri = urllib.parse.urlparse(self._parsed_uri.toBytes()) + + host = parsed_uri.hostname + port = parsed_uri.port + + # If there is an explicit port or the host is an IP address we bypass + # SRV lookups and just use the given host/port. + if port or _is_ip_literal(host): + return [Server(host, port or 8448)] + + server_list = yield self._srv_resolver.resolve_service(b"_matrix._tcp." + host) + + if server_list: + return server_list + + # No SRV records, so we fallback to host and 8448 + return [Server(host, 8448)] -@attr.s -class _RoutingResult(object): - """The result returned by `_route_matrix_uri`. +def _is_ip_literal(host): + """Test if the given host name is either an IPv4 or IPv6 literal. - Contains the parameters needed to direct a federation connection to a particular - server. + Args: + host (bytes) - Where a SRV record points to several servers, this object contains a single server - chosen from the list. + Returns: + bool """ - host_header = attr.ib() - """ - The value we should assign to the Host header (host:port from the matrix - URI, or .well-known). + host = host.decode("ascii") - :type: bytes - """ - - tls_server_name = attr.ib() - """ - The server name we should set in the SNI (typically host, without port, from the - matrix URI or .well-known) - - :type: bytes - """ - - target_host = attr.ib() - """ - The hostname (or IP literal) we should route the TCP connection to (the target of the - SRV record, or the hostname from the URL/.well-known) - - :type: bytes - """ - - target_port = attr.ib() - """ - The port we should route the TCP connection to (the target of the SRV record, or - the port from the URL/.well-known, or 8448) - - :type: int - """ + try: + IPAddress(host) + return True + except AddrFormatError: + return False diff --git a/synapse/http/federation/srv_resolver.py b/synapse/http/federation/srv_resolver.py index b32188766d..bbda0a23f4 100644 --- a/synapse/http/federation/srv_resolver.py +++ b/synapse/http/federation/srv_resolver.py @@ -32,7 +32,7 @@ logger = logging.getLogger(__name__) SERVER_CACHE = {} -@attr.s +@attr.s(slots=True, frozen=True) class Server(object): """ Our record of an individual server which can be tried to reach a destination. @@ -83,6 +83,35 @@ def pick_server_from_list(server_list): raise RuntimeError("pick_server_from_list got to end of eligible server list.") +def _sort_server_list(server_list): + """Given a list of SRV records sort them into priority order and shuffle + each priority with the given weight. + """ + priority_map = {} + + for server in server_list: + priority_map.setdefault(server.priority, []).append(server) + + results = [] + for priority in sorted(priority_map): + servers = priority_map.pop(priority) + + while servers: + total_weight = sum(s.weight for s in servers) + target_weight = random.randint(0, total_weight) + + for s in servers: + target_weight -= s.weight + + if target_weight <= 0: + break + + results.append(s) + servers.remove(s) + + return results + + class SrvResolver(object): """Interface to the dns client to do SRV lookups, with result caching. @@ -120,7 +149,7 @@ class SrvResolver(object): if cache_entry: if all(s.expires > now for s in cache_entry): servers = list(cache_entry) - return servers + return _sort_server_list(servers) try: answers, _, _ = yield make_deferred_yieldable( @@ -169,4 +198,4 @@ class SrvResolver(object): ) self._cache[service_name] = list(servers) - return servers + return _sort_server_list(servers) diff --git a/tests/http/federation/test_matrix_federation_agent.py b/tests/http/federation/test_matrix_federation_agent.py index 2c568788b3..f97c8a59f6 100644 --- a/tests/http/federation/test_matrix_federation_agent.py +++ b/tests/http/federation/test_matrix_federation_agent.py @@ -41,9 +41,9 @@ from synapse.http.federation.well_known_resolver import ( from synapse.logging.context import LoggingContext from synapse.util.caches.ttlcache import TTLCache +from tests import unittest from tests.http import TestServerTLSConnectionFactory, get_test_ca_cert_file from tests.server import FakeTransport, ThreadedMemoryReactorClock -from tests.unittest import TestCase from tests.utils import default_config logger = logging.getLogger(__name__) @@ -67,7 +67,8 @@ def get_connection_factory(): return test_server_connection_factory -class MatrixFederationAgentTests(TestCase): +@unittest.DEBUG +class MatrixFederationAgentTests(unittest.TestCase): def setUp(self): self.reactor = ThreadedMemoryReactorClock() @@ -1056,8 +1057,64 @@ class MatrixFederationAgentTests(TestCase): r = self.successResultOf(fetch_d) self.assertEqual(r.delegated_server, None) + def test_srv_fallbacks(self): + """Test that other SRV results are tried if the first one fails. + """ -class TestCachePeriodFromHeaders(TestCase): + self.mock_resolver.resolve_service.side_effect = lambda _: [ + Server(host=b"target.com", port=8443), + Server(host=b"target.com", port=8444), + ] + self.reactor.lookups["target.com"] = "1.2.3.4" + + test_d = self._make_get_request(b"matrix://testserv/foo/bar") + + # Nothing happened yet + self.assertNoResult(test_d) + + self.mock_resolver.resolve_service.assert_called_once_with( + b"_matrix._tcp.testserv" + ) + + # We should see an attempt to connect to the first server + clients = self.reactor.tcpClients + self.assertEqual(len(clients), 1) + (host, port, client_factory, _timeout, _bindAddress) = clients.pop(0) + self.assertEqual(host, "1.2.3.4") + self.assertEqual(port, 8443) + + # Fonx the connection + client_factory.clientConnectionFailed(None, Exception("nope")) + + # There's a 300ms delay in HostnameEndpoint + self.reactor.pump((0.4,)) + + # Hasn't failed yet + self.assertNoResult(test_d) + + # We shouldnow see an attempt to connect to the second server + clients = self.reactor.tcpClients + self.assertEqual(len(clients), 1) + (host, port, client_factory, _timeout, _bindAddress) = clients.pop(0) + self.assertEqual(host, "1.2.3.4") + self.assertEqual(port, 8444) + + # make a test server, and wire up the client + http_server = self._make_connection(client_factory, expected_sni=b"testserv") + + self.assertEqual(len(http_server.requests), 1) + request = http_server.requests[0] + self.assertEqual(request.method, b"GET") + self.assertEqual(request.path, b"/foo/bar") + self.assertEqual(request.requestHeaders.getRawHeaders(b"host"), [b"testserv"]) + + # finish the request + request.finish() + self.reactor.pump((0.1,)) + self.successResultOf(test_d) + + +class TestCachePeriodFromHeaders(unittest.TestCase): def test_cache_control(self): # uppercase self.assertEqual( diff --git a/tests/http/federation/test_srv_resolver.py b/tests/http/federation/test_srv_resolver.py index 3b885ef64b..df034ab237 100644 --- a/tests/http/federation/test_srv_resolver.py +++ b/tests/http/federation/test_srv_resolver.py @@ -83,8 +83,10 @@ class SrvResolverTestCase(unittest.TestCase): service_name = b"test_service.example.com" - entry = Mock(spec_set=["expires"]) + entry = Mock(spec_set=["expires", "priority", "weight"]) entry.expires = 0 + entry.priority = 0 + entry.weight = 0 cache = {service_name: [entry]} resolver = SrvResolver(dns_client=dns_client_mock, cache=cache) @@ -105,8 +107,10 @@ class SrvResolverTestCase(unittest.TestCase): service_name = b"test_service.example.com" - entry = Mock(spec_set=["expires"]) + entry = Mock(spec_set=["expires", "priority", "weight"]) entry.expires = 999999999 + entry.priority = 0 + entry.weight = 0 cache = {service_name: [entry]} resolver = SrvResolver( From c03e3e83010d0147515e3771353af6b89bf8cf03 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 15 Aug 2019 15:33:22 +0100 Subject: [PATCH 043/110] Newsfile --- changelog.d/5864.misc | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/5864.misc diff --git a/changelog.d/5864.misc b/changelog.d/5864.misc new file mode 100644 index 0000000000..40ac11db64 --- /dev/null +++ b/changelog.d/5864.misc @@ -0,0 +1 @@ +Correctly retry all hosts returned from SRV when we fail to connect. From b13cac896d4f9bfd946517e96676394e2975e54b Mon Sep 17 00:00:00 2001 From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> Date: Thu, 15 Aug 2019 16:27:11 +0100 Subject: [PATCH 044/110] Fix up password reset template config names (#5863) Fixes #5833 The emailconfig code was attempting to pull incorrect config file names. This corrects that, while also marking a difference between a config file variable that's a filepath versus a str containing HTML. --- changelog.d/5863.bugfix | 1 + synapse/config/emailconfig.py | 16 ++++++++-------- synapse/rest/client/v2_alpha/account.py | 4 ++-- 3 files changed, 11 insertions(+), 10 deletions(-) create mode 100644 changelog.d/5863.bugfix diff --git a/changelog.d/5863.bugfix b/changelog.d/5863.bugfix new file mode 100644 index 0000000000..bceae5be67 --- /dev/null +++ b/changelog.d/5863.bugfix @@ -0,0 +1 @@ +Fix Synapse looking for config options `password_reset_failure_template` and `password_reset_success_template`, when they are actually `password_reset_template_failure_html`, `password_reset_template_success_html`. diff --git a/synapse/config/emailconfig.py b/synapse/config/emailconfig.py index 8381b8eb29..36d01a10af 100644 --- a/synapse/config/emailconfig.py +++ b/synapse/config/emailconfig.py @@ -132,21 +132,21 @@ class EmailConfig(Config): self.email_password_reset_template_text = email_config.get( "password_reset_template_text", "password_reset.txt" ) - self.email_password_reset_failure_template = email_config.get( - "password_reset_failure_template", "password_reset_failure.html" + self.email_password_reset_template_failure_html = email_config.get( + "password_reset_template_failure_html", "password_reset_failure.html" ) # This template does not support any replaceable variables, so we will # read it from the disk once during setup - email_password_reset_success_template = email_config.get( - "password_reset_success_template", "password_reset_success.html" + email_password_reset_template_success_html = email_config.get( + "password_reset_template_success_html", "password_reset_success.html" ) # Check templates exist for f in [ self.email_password_reset_template_html, self.email_password_reset_template_text, - self.email_password_reset_failure_template, - email_password_reset_success_template, + self.email_password_reset_template_failure_html, + email_password_reset_template_success_html, ]: p = os.path.join(self.email_template_dir, f) if not os.path.isfile(p): @@ -154,9 +154,9 @@ class EmailConfig(Config): # Retrieve content of web templates filepath = os.path.join( - self.email_template_dir, email_password_reset_success_template + self.email_template_dir, email_password_reset_template_success_html ) - self.email_password_reset_success_html_content = self.read_file( + self.email_password_reset_template_success_html_content = self.read_file( filepath, "email.password_reset_template_success_html" ) diff --git a/synapse/rest/client/v2_alpha/account.py b/synapse/rest/client/v2_alpha/account.py index 7ac456812a..934ed5d16d 100644 --- a/synapse/rest/client/v2_alpha/account.py +++ b/synapse/rest/client/v2_alpha/account.py @@ -282,13 +282,13 @@ class PasswordResetSubmitTokenServlet(RestServlet): return None # Otherwise show the success template - html = self.config.email_password_reset_success_html_content + html = self.config.email_password_reset_template_success_html_content request.setResponseCode(200) except ThreepidValidationError as e: # Show a failure page with a reason html = self.load_jinja2_template( self.config.email_template_dir, - self.config.email_password_reset_failure_template, + self.config.email_password_reset_template_failure_html, template_vars={"failure_reason": e.msg}, ) request.setResponseCode(e.code) From e132ba79aece7cd6683ba7b3d593d772de24d95c Mon Sep 17 00:00:00 2001 From: Hubert Chathi Date: Thu, 15 Aug 2019 21:02:40 -0700 Subject: [PATCH 045/110] fix changelog --- changelog.d/5857.bugfix | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/changelog.d/5857.bugfix b/changelog.d/5857.bugfix index bfeae6a6f2..008799ccbb 100644 --- a/changelog.d/5857.bugfix +++ b/changelog.d/5857.bugfix @@ -1 +1 @@ -Add missing version field to e2e_room_keys database index. \ No newline at end of file +Fix database index so that different backup versions can have the same sessions. From 861d663c15a8103f5599f0bdda7d1d3ae764fd8f Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 16 Aug 2019 13:15:26 +0100 Subject: [PATCH 046/110] Fixup changelog and remove debug logging --- changelog.d/5850.feature | 1 + changelog.d/5850.misc | 1 - synapse/http/federation/well_known_resolver.py | 5 +---- 3 files changed, 2 insertions(+), 5 deletions(-) create mode 100644 changelog.d/5850.feature delete mode 100644 changelog.d/5850.misc diff --git a/changelog.d/5850.feature b/changelog.d/5850.feature new file mode 100644 index 0000000000..b565929a54 --- /dev/null +++ b/changelog.d/5850.feature @@ -0,0 +1 @@ +Add retry to well-known lookups if we have recently seen a valid well-known record for the server. diff --git a/changelog.d/5850.misc b/changelog.d/5850.misc deleted file mode 100644 index c4f879ca2f..0000000000 --- a/changelog.d/5850.misc +++ /dev/null @@ -1 +0,0 @@ -Retry well-known lookups if we have recently seen a valid well-known record for the server. diff --git a/synapse/http/federation/well_known_resolver.py b/synapse/http/federation/well_known_resolver.py index d59864e298..c846003886 100644 --- a/synapse/http/federation/well_known_resolver.py +++ b/synapse/http/federation/well_known_resolver.py @@ -163,9 +163,7 @@ class WellKnownResolver(object): Deferred[Tuple[bytes,int]]: The lookup result and cache period. """ - had_valid_well_known = bool( - self._had_valid_well_known_cache.get(server_name, False) - ) + had_valid_well_known = self._had_valid_well_known_cache.get(server_name, False) # We do this in two steps to differentiate between possibly transient # errors (e.g. can't connect to host, 503 response) and more permenant @@ -247,7 +245,6 @@ class WellKnownResolver(object): # Bail if we've been cancelled raise except Exception as e: - logger.info("Retry: %s", retry) if not retry or i >= WELL_KNOWN_RETRY_ATTEMPTS: logger.info("Error fetching %s: %s", uri_str, e) raise _FetchWellKnownFailure(temporary=True) From ebba15ee7f00f2aad2a6a2a3b2e2b4810f83282c Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 16 Aug 2019 13:29:41 +0100 Subject: [PATCH 047/110] Newsfile --- changelog.d/5860.misc | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/5860.misc diff --git a/changelog.d/5860.misc b/changelog.d/5860.misc new file mode 100644 index 0000000000..f9960b17b4 --- /dev/null +++ b/changelog.d/5860.misc @@ -0,0 +1 @@ +Remove log line for debugging issue #5407. From 87fa26006b50e3f7d80952fb8e0ee45ecfdc9ae5 Mon Sep 17 00:00:00 2001 From: Jorik Schellekens Date: Fri, 16 Aug 2019 16:13:25 +0100 Subject: [PATCH 048/110] Opentracing misc (#5856) Add authenticated_entity and servlet_names tags. Functionally: - Add a tag for authenticated_entity - Add a tag for servlet_names Stylistically: Moved to importing methods directly from opentracing. --- changelog.d/5856.feature | 1 + synapse/api/auth.py | 4 ++++ synapse/federation/transport/server.py | 13 +++++++------ synapse/http/matrixfederationclient.py | 23 +++++++++++++---------- 4 files changed, 25 insertions(+), 16 deletions(-) create mode 100644 changelog.d/5856.feature diff --git a/changelog.d/5856.feature b/changelog.d/5856.feature new file mode 100644 index 0000000000..f4310b9244 --- /dev/null +++ b/changelog.d/5856.feature @@ -0,0 +1 @@ +Add a tag recording a request's authenticated entity and corresponding servlet in opentracing. diff --git a/synapse/api/auth.py b/synapse/api/auth.py index 179644852a..7b3a5a8221 100644 --- a/synapse/api/auth.py +++ b/synapse/api/auth.py @@ -22,6 +22,7 @@ from netaddr import IPAddress from twisted.internet import defer +import synapse.logging.opentracing as opentracing import synapse.types from synapse import event_auth from synapse.api.constants import EventTypes, JoinRules, Membership @@ -178,6 +179,7 @@ class Auth(object): def get_public_keys(self, invite_event): return event_auth.get_public_keys(invite_event) + @opentracing.trace @defer.inlineCallbacks def get_user_by_req( self, request, allow_guest=False, rights="access", allow_expired=False @@ -209,6 +211,7 @@ class Auth(object): user_id, app_service = yield self._get_appservice_user_id(request) if user_id: request.authenticated_entity = user_id + opentracing.set_tag("authenticated_entity", user_id) if ip_addr and self.hs.config.track_appservice_user_ips: yield self.store.insert_client_ip( @@ -259,6 +262,7 @@ class Auth(object): ) request.authenticated_entity = user.to_string() + opentracing.set_tag("authenticated_entity", user.to_string()) return synapse.types.create_requester( user, token_id, is_guest, device_id, app_service=app_service diff --git a/synapse/federation/transport/server.py b/synapse/federation/transport/server.py index 9a86bd0263..a17148fc3c 100644 --- a/synapse/federation/transport/server.py +++ b/synapse/federation/transport/server.py @@ -22,7 +22,6 @@ import re from twisted.internet.defer import maybeDeferred import synapse -import synapse.logging.opentracing as opentracing from synapse.api.errors import Codes, FederationDeniedError, SynapseError from synapse.api.room_versions import RoomVersions from synapse.api.urls import ( @@ -39,6 +38,7 @@ from synapse.http.servlet import ( parse_string_from_args, ) from synapse.logging.context import run_in_background +from synapse.logging.opentracing import start_active_span_from_context, tags from synapse.types import ThirdPartyInstanceID, get_domain_from_id from synapse.util.ratelimitutils import FederationRateLimiter from synapse.util.versionstring import get_version_string @@ -289,16 +289,17 @@ class BaseFederationServlet(object): raise # Start an opentracing span - with opentracing.start_active_span_from_context( + with start_active_span_from_context( request.requestHeaders, "incoming-federation-request", tags={ "request_id": request.get_request_id(), - opentracing.tags.SPAN_KIND: opentracing.tags.SPAN_KIND_RPC_SERVER, - opentracing.tags.HTTP_METHOD: request.get_method(), - opentracing.tags.HTTP_URL: request.get_redacted_uri(), - opentracing.tags.PEER_HOST_IPV6: request.getClientIP(), + tags.SPAN_KIND: tags.SPAN_KIND_RPC_SERVER, + tags.HTTP_METHOD: request.get_method(), + tags.HTTP_URL: request.get_redacted_uri(), + tags.PEER_HOST_IPV6: request.getClientIP(), "authenticated_entity": origin, + "servlet_name": request.request_metrics.name, }, ): if origin: diff --git a/synapse/http/matrixfederationclient.py b/synapse/http/matrixfederationclient.py index d07d356464..4326e98a28 100644 --- a/synapse/http/matrixfederationclient.py +++ b/synapse/http/matrixfederationclient.py @@ -36,7 +36,6 @@ from twisted.internet.task import _EPSILON, Cooperator from twisted.web._newclient import ResponseDone from twisted.web.http_headers import Headers -import synapse.logging.opentracing as opentracing import synapse.metrics import synapse.util.retryutils from synapse.api.errors import ( @@ -50,6 +49,12 @@ from synapse.http import QuieterFileBodyProducer from synapse.http.client import BlacklistingAgentWrapper, IPBlacklistingResolver from synapse.http.federation.matrix_federation_agent import MatrixFederationAgent from synapse.logging.context import make_deferred_yieldable +from synapse.logging.opentracing import ( + inject_active_span_byte_dict, + set_tag, + start_active_span, + tags, +) from synapse.util.async_helpers import timeout_deferred from synapse.util.metrics import Measure @@ -341,20 +346,20 @@ class MatrixFederationHttpClient(object): query_bytes = b"" # Retreive current span - scope = opentracing.start_active_span( + scope = start_active_span( "outgoing-federation-request", tags={ - opentracing.tags.SPAN_KIND: opentracing.tags.SPAN_KIND_RPC_CLIENT, - opentracing.tags.PEER_ADDRESS: request.destination, - opentracing.tags.HTTP_METHOD: request.method, - opentracing.tags.HTTP_URL: request.path, + tags.SPAN_KIND: tags.SPAN_KIND_RPC_CLIENT, + tags.PEER_ADDRESS: request.destination, + tags.HTTP_METHOD: request.method, + tags.HTTP_URL: request.path, }, finish_on_close=True, ) # Inject the span into the headers headers_dict = {} - opentracing.inject_active_span_byte_dict(headers_dict, request.destination) + inject_active_span_byte_dict(headers_dict, request.destination) headers_dict[b"User-Agent"] = [self.version_string_bytes] @@ -436,9 +441,7 @@ class MatrixFederationHttpClient(object): response.phrase.decode("ascii", errors="replace"), ) - opentracing.set_tag( - opentracing.tags.HTTP_STATUS_CODE, response.code - ) + set_tag(tags.HTTP_STATUS_CODE, response.code) if 200 <= response.code < 300: pass From 6d86df73f18803c148c320c3b3089cb0d7e2f30a Mon Sep 17 00:00:00 2001 From: Chris Moos Date: Thu, 15 Aug 2019 18:16:27 -0700 Subject: [PATCH 049/110] Fix issue with Synapse not starting up. Fixes #5866. Signed-off-by: Chris Moos --- synapse/app/homeserver.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index 7d6b51b5bc..8233905844 100644 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -447,7 +447,7 @@ def setup(config_options): reactor.stop() sys.exit(1) - reactor.addSystemEventTrigger("before", "startup", start) + reactor.callWhenRunning(start) return hs From 20402aa128d69626608a5cf0ab4b35fd67928fa0 Mon Sep 17 00:00:00 2001 From: Chris Moos Date: Fri, 16 Aug 2019 09:25:36 -0700 Subject: [PATCH 050/110] Add changelog entry. --- changelog.d/5867.bugfix | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/5867.bugfix diff --git a/changelog.d/5867.bugfix b/changelog.d/5867.bugfix new file mode 100644 index 0000000000..ece89adae3 --- /dev/null +++ b/changelog.d/5867.bugfix @@ -0,0 +1 @@ +Fix startup issue (hang on ACME provisioning) due to ordering of Twisted reactor startup. \ No newline at end of file From c188bd2c12f222d37de85ce8c469e73adaf81d3c Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Fri, 16 Aug 2019 23:19:23 +0100 Subject: [PATCH 051/110] add attribution --- changelog.d/5867.bugfix | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/changelog.d/5867.bugfix b/changelog.d/5867.bugfix index ece89adae3..d68b152762 100644 --- a/changelog.d/5867.bugfix +++ b/changelog.d/5867.bugfix @@ -1 +1 @@ -Fix startup issue (hang on ACME provisioning) due to ordering of Twisted reactor startup. \ No newline at end of file +Fix startup issue (hang on ACME provisioning) due to ordering of Twisted reactor startup. Thanks to @chrismoos for supplying the fix. \ No newline at end of file From 412c6e21a8d564706b67b99fd6399242707c40ea Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Sat, 17 Aug 2019 09:09:52 +0100 Subject: [PATCH 052/110] Drop dependency on sdnotify (#5871) ... to save OSes which don't use it from having to maintain a port. Fixes #5865. --- changelog.d/5871.feature | 1 + synapse/app/_base.py | 47 ++++++++++++++++++++++++++-------- synapse/python_dependencies.py | 1 - 3 files changed, 38 insertions(+), 11 deletions(-) create mode 100644 changelog.d/5871.feature diff --git a/changelog.d/5871.feature b/changelog.d/5871.feature new file mode 100644 index 0000000000..8805607bf1 --- /dev/null +++ b/changelog.d/5871.feature @@ -0,0 +1 @@ +Drop hard dependency on `sdnotify` python package. \ No newline at end of file diff --git a/synapse/app/_base.py b/synapse/app/_base.py index c010e70955..69dcf3523f 100644 --- a/synapse/app/_base.py +++ b/synapse/app/_base.py @@ -17,10 +17,10 @@ import gc import logging import os import signal +import socket import sys import traceback -import sdnotify from daemonize import Daemonize from twisted.internet import defer, error, reactor @@ -246,13 +246,12 @@ def start(hs, listeners=None): def handle_sighup(*args, **kwargs): # Tell systemd our state, if we're using it. This will silently fail if # we're not using systemd. - sd_channel = sdnotify.SystemdNotifier() - sd_channel.notify("RELOADING=1") + sdnotify(b"RELOADING=1") for i in _sighup_callbacks: i(hs) - sd_channel.notify("READY=1") + sdnotify(b"READY=1") signal.signal(signal.SIGHUP, handle_sighup) @@ -308,16 +307,12 @@ def setup_sdnotify(hs): # Tell systemd our state, if we're using it. This will silently fail if # we're not using systemd. - sd_channel = sdnotify.SystemdNotifier() - hs.get_reactor().addSystemEventTrigger( - "after", - "startup", - lambda: sd_channel.notify("READY=1\nMAINPID=%s" % (os.getpid())), + "after", "startup", sdnotify, b"READY=1\nMAINPID=%i" % (os.getpid(),) ) hs.get_reactor().addSystemEventTrigger( - "before", "shutdown", lambda: sd_channel.notify("STOPPING=1") + "before", "shutdown", sdnotify, b"STOPPING=1" ) @@ -414,3 +409,35 @@ class _DeferredResolutionReceiver(object): def resolutionComplete(self): self._deferred.callback(()) self._receiver.resolutionComplete() + + +sdnotify_sockaddr = os.getenv("NOTIFY_SOCKET") + + +def sdnotify(state): + """ + Send a notification to systemd, if the NOTIFY_SOCKET env var is set. + + This function is based on the sdnotify python package, but since it's only a few + lines of code, it's easier to duplicate it here than to add a dependency on a + package which many OSes don't include as a matter of principle. + + Args: + state (bytes): notification to send + """ + if not isinstance(state, bytes): + raise TypeError("sdnotify should be called with a bytes") + if not sdnotify_sockaddr: + return + addr = sdnotify_sockaddr + if addr[0] == "@": + addr = "\0" + addr[1:] + + try: + with socket.socket(socket.AF_UNIX, socket.SOCK_DGRAM) as sock: + sock.connect(addr) + sock.sendall(state) + except Exception as e: + # this is a bit surprising, since we don't expect to have a NOTIFY_SOCKET + # unless systemd is expecting us to notify it. + logger.warning("Unable to send notification to systemd: %s", e) diff --git a/synapse/python_dependencies.py b/synapse/python_dependencies.py index 195a7a70c8..c6465c0386 100644 --- a/synapse/python_dependencies.py +++ b/synapse/python_dependencies.py @@ -72,7 +72,6 @@ REQUIREMENTS = [ "netaddr>=0.7.18", "Jinja2>=2.9", "bleach>=1.4.3", - "sdnotify>=0.3", ] CONDITIONAL_REQUIREMENTS = { From 74fb72921352a6aaa3736c9b45a9d0e3cdfe0d21 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Sat, 17 Aug 2019 09:16:17 +0100 Subject: [PATCH 053/110] 1.3.1 --- CHANGES.md | 15 +++++++++++++++ changelog.d/5867.bugfix | 1 - changelog.d/5871.feature | 1 - debian/changelog | 14 ++++++++++++-- synapse/__init__.py | 2 +- 5 files changed, 28 insertions(+), 5 deletions(-) delete mode 100644 changelog.d/5867.bugfix delete mode 100644 changelog.d/5871.feature diff --git a/CHANGES.md b/CHANGES.md index d13dcb717e..f25c7d0c1a 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,3 +1,18 @@ +Synapse 1.3.1 (2019-08-17) +========================== + +Features +-------- + +- Drop hard dependency on `sdnotify` python package. ([\#5871](https://github.com/matrix-org/synapse/issues/5871)) + + +Bugfixes +-------- + +- Fix startup issue (hang on ACME provisioning) due to ordering of Twisted reactor startup. Thanks to @chrismoos for supplying the fix. ([\#5867](https://github.com/matrix-org/synapse/issues/5867)) + + Synapse 1.3.0 (2019-08-15) ========================== diff --git a/changelog.d/5867.bugfix b/changelog.d/5867.bugfix deleted file mode 100644 index d68b152762..0000000000 --- a/changelog.d/5867.bugfix +++ /dev/null @@ -1 +0,0 @@ -Fix startup issue (hang on ACME provisioning) due to ordering of Twisted reactor startup. Thanks to @chrismoos for supplying the fix. \ No newline at end of file diff --git a/changelog.d/5871.feature b/changelog.d/5871.feature deleted file mode 100644 index 8805607bf1..0000000000 --- a/changelog.d/5871.feature +++ /dev/null @@ -1 +0,0 @@ -Drop hard dependency on `sdnotify` python package. \ No newline at end of file diff --git a/debian/changelog b/debian/changelog index 83232a0bad..76efc442d7 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,8 +1,19 @@ +matrix-synapse-py3 (1.3.1) stable; urgency=medium + + * New synapse release 1.3.1. + + -- Synapse Packaging team Sat, 17 Aug 2019 09:15:49 +0100 + matrix-synapse-py3 (1.3.0) stable; urgency=medium [ Andrew Morgan ] * Remove libsqlite3-dev from required build dependencies. + [ Synapse Packaging team ] + * New synapse release 1.3.0. + + -- Synapse Packaging team Thu, 15 Aug 2019 12:04:23 +0100 + matrix-synapse-py3 (1.2.0) stable; urgency=medium [ Amber Brown ] @@ -13,9 +24,8 @@ matrix-synapse-py3 (1.2.0) stable; urgency=medium [ Synapse Packaging team ] * New synapse release 1.2.0. - * New synapse release 1.3.0. - -- Synapse Packaging team Thu, 15 Aug 2019 12:04:23 +0100 + -- Synapse Packaging team Thu, 25 Jul 2019 14:10:07 +0100 matrix-synapse-py3 (1.1.0) stable; urgency=medium diff --git a/synapse/__init__.py b/synapse/__init__.py index 02ae90b072..6766ef445c 100644 --- a/synapse/__init__.py +++ b/synapse/__init__.py @@ -35,4 +35,4 @@ try: except ImportError: pass -__version__ = "1.3.0" +__version__ = "1.3.1" From bb29bc29374d10d151ebff13c4e95e07c0ef3a29 Mon Sep 17 00:00:00 2001 From: "Olivier Wilkinson (reivilibre)" Date: Tue, 20 Aug 2019 08:49:31 +0100 Subject: [PATCH 054/110] Use MSC2197 on stable prefix as it has almost finished FCP Signed-off-by: Olivier Wilkinson (reivilibre) --- synapse/federation/transport/client.py | 4 ++-- synapse/federation/transport/server.py | 26 ++------------------------ 2 files changed, 4 insertions(+), 26 deletions(-) diff --git a/synapse/federation/transport/client.py b/synapse/federation/transport/client.py index 2e99f77eb1..482a101c09 100644 --- a/synapse/federation/transport/client.py +++ b/synapse/federation/transport/client.py @@ -328,8 +328,8 @@ class TransportLayerClient(object): third_party_instance_id=None, ): if search_filter: - # TODO(MSC2197): Move to V1 prefix - path = _create_path(FEDERATION_UNSTABLE_PREFIX, "/publicRooms") + # this uses MSC2197 (Search Filtering over Federation) + path = _create_v1_path("/publicRooms") data = {"include_all_networks": "true" if include_all_networks else "false"} if third_party_instance_id: diff --git a/synapse/federation/transport/server.py b/synapse/federation/transport/server.py index e17555c4cf..027b33f67e 100644 --- a/synapse/federation/transport/server.py +++ b/synapse/federation/transport/server.py @@ -750,30 +750,8 @@ class PublicRoomList(BaseFederationServlet): ) return 200, data - -class UnstablePublicRoomList(BaseFederationServlet): - """ - Fetch the public room list for this server. - - This API returns information in the same format as /publicRooms on the - client API, but will only ever include local public rooms and hence is - intended for consumption by other home servers. - - This is the unstable-prefixed version which adds support for MSC2197, which - is still undergoing review. - """ - - PATH = "/publicRooms" - PREFIX = FEDERATION_UNSTABLE_PREFIX - - def __init__(self, handler, authenticator, ratelimiter, server_name, allow_access): - super(UnstablePublicRoomList, self).__init__( - handler, authenticator, ratelimiter, server_name - ) - self.allow_access = allow_access - - # TODO(MSC2197): Move away from Unstable prefix and back to normal prefix async def on_POST(self, origin, content, query): + # This implements MSC2197 (Search Filtering over Federation) if not self.allow_access: raise FederationDeniedError(origin) @@ -1373,7 +1351,7 @@ FEDERATION_SERVLET_CLASSES = ( OPENID_SERVLET_CLASSES = (OpenIdUserInfo,) -ROOM_LIST_CLASSES = (PublicRoomList, UnstablePublicRoomList) +ROOM_LIST_CLASSES = (PublicRoomList,) GROUP_SERVER_SERVLET_CLASSES = ( FederationGroupsProfileServlet, From 502728777c00e3242f7436c18b8d918b6613d377 Mon Sep 17 00:00:00 2001 From: "Olivier Wilkinson (reivilibre)" Date: Tue, 20 Aug 2019 08:49:53 +0100 Subject: [PATCH 055/110] Newsfile on one line Signed-off-by: Olivier Wilkinson (reivilibre) --- changelog.d/5859.feature | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/changelog.d/5859.feature b/changelog.d/5859.feature index c897c66037..52df7fc81b 100644 --- a/changelog.d/5859.feature +++ b/changelog.d/5859.feature @@ -1,2 +1 @@ -Add unstable support for MSC2197 (filtered search requests over federation), in -order to allow upcoming room directory query performance improvements. +Add unstable support for MSC2197 (filtered search requests over federation), in order to allow upcoming room directory query performance improvements. From 1dec31560e5712306e368a0adc6d9f84f924bdc9 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 20 Aug 2019 11:46:00 +0100 Subject: [PATCH 056/110] Change jitter to be a factor rather than absolute value --- .../http/federation/well_known_resolver.py | 23 ++++++++++--------- .../test_matrix_federation_agent.py | 4 ++-- 2 files changed, 14 insertions(+), 13 deletions(-) diff --git a/synapse/http/federation/well_known_resolver.py b/synapse/http/federation/well_known_resolver.py index c846003886..5e9b0befb0 100644 --- a/synapse/http/federation/well_known_resolver.py +++ b/synapse/http/federation/well_known_resolver.py @@ -32,8 +32,8 @@ from synapse.util.metrics import Measure # period to cache .well-known results for by default WELL_KNOWN_DEFAULT_CACHE_PERIOD = 24 * 3600 -# jitter to add to the .well-known default cache ttl -WELL_KNOWN_DEFAULT_CACHE_PERIOD_JITTER = 10 * 60 +# jitter factor to add to the .well-known default cache ttls +WELL_KNOWN_DEFAULT_CACHE_PERIOD_JITTER = 0.1 # period to cache failure to fetch .well-known for WELL_KNOWN_INVALID_CACHE_PERIOD = 1 * 3600 @@ -133,16 +133,14 @@ class WellKnownResolver(object): # We have recently seen a valid well-known record for this # server, so we cache the lack of well-known for a shorter time. cache_period = WELL_KNOWN_DOWN_CACHE_PERIOD - cache_period += random.uniform( - 0, WELL_KNOWN_DEFAULT_CACHE_PERIOD_JITTER - ) else: - # add some randomness to the TTL to avoid a stampeding herd every hour - # after startup cache_period = WELL_KNOWN_INVALID_CACHE_PERIOD - cache_period += random.uniform( - 0, WELL_KNOWN_DEFAULT_CACHE_PERIOD_JITTER - ) + + # add some randomness to the TTL to avoid a stampeding herd + cache_period *= random.uniform( + 1 - WELL_KNOWN_DEFAULT_CACHE_PERIOD_JITTER, + 1 + WELL_KNOWN_DEFAULT_CACHE_PERIOD_JITTER, + ) if cache_period > 0: self._well_known_cache.set(server_name, result, cache_period) @@ -194,7 +192,10 @@ class WellKnownResolver(object): cache_period = WELL_KNOWN_DEFAULT_CACHE_PERIOD # add some randomness to the TTL to avoid a stampeding herd every 24 hours # after startup - cache_period += random.uniform(0, WELL_KNOWN_DEFAULT_CACHE_PERIOD_JITTER) + cache_period *= random.uniform( + 1 - WELL_KNOWN_DEFAULT_CACHE_PERIOD_JITTER, + 1 + WELL_KNOWN_DEFAULT_CACHE_PERIOD_JITTER, + ) else: cache_period = min(cache_period, WELL_KNOWN_MAX_CACHE_PERIOD) cache_period = max(cache_period, WELL_KNOWN_MIN_CACHE_PERIOD) diff --git a/tests/http/federation/test_matrix_federation_agent.py b/tests/http/federation/test_matrix_federation_agent.py index 4d3f31d18c..c55aad8e11 100644 --- a/tests/http/federation/test_matrix_federation_agent.py +++ b/tests/http/federation/test_matrix_federation_agent.py @@ -551,7 +551,7 @@ class MatrixFederationAgentTests(TestCase): self.assertEqual(self.well_known_cache[b"testserv"], b"target-server") # check the cache expires - self.reactor.pump((25 * 3600,)) + self.reactor.pump((48 * 3600,)) self.well_known_cache.expire() self.assertNotIn(b"testserv", self.well_known_cache) @@ -639,7 +639,7 @@ class MatrixFederationAgentTests(TestCase): self.assertEqual(self.well_known_cache[b"testserv"], b"target-server") # check the cache expires - self.reactor.pump((25 * 3600,)) + self.reactor.pump((48 * 3600,)) self.well_known_cache.expire() self.assertNotIn(b"testserv", self.well_known_cache) From 7777d353bfffc840b79391da107e593338a1a2fe Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 20 Aug 2019 11:46:54 +0100 Subject: [PATCH 057/110] Remove test debugs --- tests/federation/test_federation_server.py | 1 - tests/http/federation/test_matrix_federation_agent.py | 1 - tests/test_visibility.py | 1 - 3 files changed, 3 deletions(-) diff --git a/tests/federation/test_federation_server.py b/tests/federation/test_federation_server.py index af15f4cc5a..b08be451aa 100644 --- a/tests/federation/test_federation_server.py +++ b/tests/federation/test_federation_server.py @@ -20,7 +20,6 @@ from synapse.federation.federation_server import server_matches_acl_event from tests import unittest -@unittest.DEBUG class ServerACLsTestCase(unittest.TestCase): def test_blacklisted_server(self): e = _create_acl_event({"allow": ["*"], "deny": ["evil.com"]}) diff --git a/tests/http/federation/test_matrix_federation_agent.py b/tests/http/federation/test_matrix_federation_agent.py index f97c8a59f6..445a0e76ab 100644 --- a/tests/http/federation/test_matrix_federation_agent.py +++ b/tests/http/federation/test_matrix_federation_agent.py @@ -67,7 +67,6 @@ def get_connection_factory(): return test_server_connection_factory -@unittest.DEBUG class MatrixFederationAgentTests(unittest.TestCase): def setUp(self): self.reactor = ThreadedMemoryReactorClock() diff --git a/tests/test_visibility.py b/tests/test_visibility.py index e0605dac2f..18f1a0035d 100644 --- a/tests/test_visibility.py +++ b/tests/test_visibility.py @@ -74,7 +74,6 @@ class FilterEventsForServerTestCase(tests.unittest.TestCase): self.assertEqual(events_to_filter[i].event_id, filtered[i].event_id) self.assertEqual(filtered[i].content["a"], "b") - @tests.unittest.DEBUG @defer.inlineCallbacks def test_erased_user(self): # 4 message events, from erased and unerased users, with a membership From 501994582899ad9d790029b3d7c48ba32f5720a9 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Tue, 20 Aug 2019 11:20:10 +0100 Subject: [PATCH 058/110] Refactor the Appservice scheduler code Get rid of the labyrinthine `recoverer_fn` code, and clean up the startup code (it seemed to be previously inexplicably split between `ApplicationServiceScheduler.start` and `_Recoverer.start`). Add some docstrings too. --- changelog.d/5886.misc | 1 + synapse/appservice/scheduler.py | 110 +++++++++++++++++------------ tests/appservice/test_scheduler.py | 6 +- 3 files changed, 68 insertions(+), 49 deletions(-) create mode 100644 changelog.d/5886.misc diff --git a/changelog.d/5886.misc b/changelog.d/5886.misc new file mode 100644 index 0000000000..22adba3d85 --- /dev/null +++ b/changelog.d/5886.misc @@ -0,0 +1 @@ +Refactor the Appservice scheduler code. diff --git a/synapse/appservice/scheduler.py b/synapse/appservice/scheduler.py index 42a350bff8..03a14402c5 100644 --- a/synapse/appservice/scheduler.py +++ b/synapse/appservice/scheduler.py @@ -70,35 +70,37 @@ class ApplicationServiceScheduler(object): self.store = hs.get_datastore() self.as_api = hs.get_application_service_api() - def create_recoverer(service, callback): - return _Recoverer(self.clock, self.store, self.as_api, service, callback) - - self.txn_ctrl = _TransactionController( - self.clock, self.store, self.as_api, create_recoverer - ) + self.txn_ctrl = _TransactionController(self.clock, self.store, self.as_api) self.queuer = _ServiceQueuer(self.txn_ctrl, self.clock) @defer.inlineCallbacks def start(self): logger.info("Starting appservice scheduler") + # check for any DOWN ASes and start recoverers for them. - recoverers = yield _Recoverer.start( - self.clock, self.store, self.as_api, self.txn_ctrl.on_recovered + services = yield self.store.get_appservices_by_state( + ApplicationServiceState.DOWN ) - self.txn_ctrl.add_recoverers(recoverers) + + for service in services: + self.txn_ctrl.start_recoverer(service) def submit_event_for_as(self, service, event): self.queuer.enqueue(service, event) class _ServiceQueuer(object): - """Queues events for the same application service together, sending - transactions as soon as possible. Once a transaction is sent successfully, - this schedules any other events in the queue to run. + """Queue of events waiting to be sent to appservices. + + Groups events into transactions per-appservice, and sends them on to the + TransactionController. Makes sure that we only have one transaction in flight per + appservice at a given time. """ def __init__(self, txn_ctrl, clock): self.queued_events = {} # dict of {service_id: [events]} + + # the appservices which currently have a transaction in flight self.requests_in_flight = set() self.txn_ctrl = txn_ctrl self.clock = clock @@ -136,13 +138,29 @@ class _ServiceQueuer(object): class _TransactionController(object): - def __init__(self, clock, store, as_api, recoverer_fn): + """Transaction manager. + + Builds AppServiceTransactions and runs their lifecycle. Also starts a Recoverer + if a transaction fails. + + (Note we have only have one of these in the homeserver.) + + Args: + clock (synapse.util.Clock): + store (synapse.storage.DataStore): + as_api (synapse.appservice.api.ApplicationServiceApi): + """ + + def __init__(self, clock, store, as_api): self.clock = clock self.store = store self.as_api = as_api - self.recoverer_fn = recoverer_fn - # keep track of how many recoverers there are - self.recoverers = [] + + # map from service id to recoverer instance + self.recoverers = {} + + # for UTs + self.RECOVERER_CLASS = _Recoverer @defer.inlineCallbacks def send(self, service, events): @@ -154,42 +172,45 @@ class _TransactionController(object): if sent: yield txn.complete(self.store) else: - run_in_background(self._start_recoverer, service) + run_in_background(self._on_txn_fail, service) except Exception: logger.exception("Error creating appservice transaction") - run_in_background(self._start_recoverer, service) + run_in_background(self._on_txn_fail, service) @defer.inlineCallbacks def on_recovered(self, recoverer): - self.recoverers.remove(recoverer) logger.info( "Successfully recovered application service AS ID %s", recoverer.service.id ) + self.recoverers.pop(recoverer.service.id) logger.info("Remaining active recoverers: %s", len(self.recoverers)) yield self.store.set_appservice_state( recoverer.service, ApplicationServiceState.UP ) - def add_recoverers(self, recoverers): - for r in recoverers: - self.recoverers.append(r) - if len(recoverers) > 0: - logger.info("New active recoverers: %s", len(self.recoverers)) - @defer.inlineCallbacks - def _start_recoverer(self, service): + def _on_txn_fail(self, service): try: yield self.store.set_appservice_state(service, ApplicationServiceState.DOWN) - logger.info( - "Application service falling behind. Starting recoverer. AS ID %s", - service.id, - ) - recoverer = self.recoverer_fn(service, self.on_recovered) - self.add_recoverers([recoverer]) - recoverer.recover() + self.start_recoverer(service) except Exception: logger.exception("Error starting AS recoverer") + def start_recoverer(self, service): + """Start a Recoverer for the given service + + Args: + service (synapse.appservice.ApplicationService): + """ + logger.info("Starting recoverer for AS ID %s", service.id) + assert service.id not in self.recoverers + recoverer = self.RECOVERER_CLASS( + self.clock, self.store, self.as_api, service, self.on_recovered + ) + self.recoverers[service.id] = recoverer + recoverer.recover() + logger.info("Now %i active recoverers", len(self.recoverers)) + @defer.inlineCallbacks def _is_service_up(self, service): state = yield self.store.get_appservice_state(service) @@ -197,18 +218,17 @@ class _TransactionController(object): class _Recoverer(object): - @staticmethod - @defer.inlineCallbacks - def start(clock, store, as_api, callback): - services = yield store.get_appservices_by_state(ApplicationServiceState.DOWN) - recoverers = [_Recoverer(clock, store, as_api, s, callback) for s in services] - for r in recoverers: - logger.info( - "Starting recoverer for AS ID %s which was marked as " "DOWN", - r.service.id, - ) - r.recover() - return recoverers + """Manages retries and backoff for a DOWN appservice. + + We have one of these for each appservice which is currently considered DOWN. + + Args: + clock (synapse.util.Clock): + store (synapse.storage.DataStore): + as_api (synapse.appservice.api.ApplicationServiceApi): + service (synapse.appservice.ApplicationService): the service we are managing + callback (callable[_Recoverer]): called once the service recovers. + """ def __init__(self, clock, store, as_api, service, callback): self.clock = clock diff --git a/tests/appservice/test_scheduler.py b/tests/appservice/test_scheduler.py index 04b8c2c07c..52f89d3f83 100644 --- a/tests/appservice/test_scheduler.py +++ b/tests/appservice/test_scheduler.py @@ -37,11 +37,9 @@ class ApplicationServiceSchedulerTransactionCtrlTestCase(unittest.TestCase): self.recoverer = Mock() self.recoverer_fn = Mock(return_value=self.recoverer) self.txnctrl = _TransactionController( - clock=self.clock, - store=self.store, - as_api=self.as_api, - recoverer_fn=self.recoverer_fn, + clock=self.clock, store=self.store, as_api=self.as_api ) + self.txnctrl.RECOVERER_CLASS = self.recoverer_fn def test_single_service_up_txn_sent(self): # Test: The AS is up and the txn is successfully sent. From 1f9df1cc7ba7027aef3a38d01909a928ecf2a8c5 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 20 Aug 2019 11:49:44 +0100 Subject: [PATCH 059/110] Fixup _sort_server_list to be slightly more efficient Also document that we are using the algorithm described in RFC2782 and ensure we handle zero weight correctly. --- synapse/http/federation/srv_resolver.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/synapse/http/federation/srv_resolver.py b/synapse/http/federation/srv_resolver.py index bbda0a23f4..110b112e85 100644 --- a/synapse/http/federation/srv_resolver.py +++ b/synapse/http/federation/srv_resolver.py @@ -94,10 +94,18 @@ def _sort_server_list(server_list): results = [] for priority in sorted(priority_map): - servers = priority_map.pop(priority) + servers = priority_map[priority] + # This algorithms follows the algorithm described in RFC2782. + # + # N.B. Weights can be zero, which means that you should pick that server + # last *or* that its the only server in this priority. + + # We sort to ensure zero weighted items are first. + servers.sort(key=lambda s: s.weight) + + total_weight = sum(s.weight for s in servers) while servers: - total_weight = sum(s.weight for s in servers) target_weight = random.randint(0, total_weight) for s in servers: @@ -108,6 +116,7 @@ def _sort_server_list(server_list): results.append(s) servers.remove(s) + total_weight -= s.weight return results From 74f016d343fe270ab3affe79cc82266d94120e5c Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 20 Aug 2019 11:50:12 +0100 Subject: [PATCH 060/110] Remove now unused pick_server_from_list --- synapse/http/federation/srv_resolver.py | 30 ------------------------- 1 file changed, 30 deletions(-) diff --git a/synapse/http/federation/srv_resolver.py b/synapse/http/federation/srv_resolver.py index 110b112e85..c8ca3fd0e9 100644 --- a/synapse/http/federation/srv_resolver.py +++ b/synapse/http/federation/srv_resolver.py @@ -53,36 +53,6 @@ class Server(object): expires = attr.ib(default=0) -def pick_server_from_list(server_list): - """Randomly choose a server from the server list - - Args: - server_list (list[Server]): list of candidate servers - - Returns: - Tuple[bytes, int]: (host, port) pair for the chosen server - """ - if not server_list: - raise RuntimeError("pick_server_from_list called with empty list") - - # TODO: currently we only use the lowest-priority servers. We should maintain a - # cache of servers known to be "down" and filter them out - - min_priority = min(s.priority for s in server_list) - eligible_servers = list(s for s in server_list if s.priority == min_priority) - total_weight = sum(s.weight for s in eligible_servers) - target_weight = random.randint(0, total_weight) - - for s in eligible_servers: - target_weight -= s.weight - - if target_weight <= 0: - return s.host, s.port - - # this should be impossible. - raise RuntimeError("pick_server_from_list got to end of eligible server list.") - - def _sort_server_list(server_list): """Given a list of SRV records sort them into priority order and shuffle each priority with the given weight. From 29763f01c63f1c5d5053dad413b69f1980208131 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 20 Aug 2019 12:38:06 +0100 Subject: [PATCH 061/110] Make changelog entry be a feature --- changelog.d/{5864.misc => 5864.feature} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename changelog.d/{5864.misc => 5864.feature} (100%) diff --git a/changelog.d/5864.misc b/changelog.d/5864.feature similarity index 100% rename from changelog.d/5864.misc rename to changelog.d/5864.feature From c886f976e0ba8bc6d55c8be8f0f1241ac5b80ebc Mon Sep 17 00:00:00 2001 From: Jorik Schellekens Date: Tue, 20 Aug 2019 13:56:03 +0100 Subject: [PATCH 062/110] Opentracing doc update (#5776) Update opentracing docs to use the unified 'trace' method --- changelog.d/5776.misc | 1 + synapse/logging/opentracing.py | 67 ++++++++++++++++++++-------------- 2 files changed, 41 insertions(+), 27 deletions(-) create mode 100644 changelog.d/5776.misc diff --git a/changelog.d/5776.misc b/changelog.d/5776.misc new file mode 100644 index 0000000000..1fb1b9c152 --- /dev/null +++ b/changelog.d/5776.misc @@ -0,0 +1 @@ +Update opentracing docs to use the unified `trace` method. diff --git a/synapse/logging/opentracing.py b/synapse/logging/opentracing.py index d2c209c471..6b706e1892 100644 --- a/synapse/logging/opentracing.py +++ b/synapse/logging/opentracing.py @@ -43,6 +43,9 @@ OpenTracing to be easily disabled in Synapse and thereby have OpenTracing as an optional dependency. This does however limit the number of modifiable spans at any point in the code to one. From here out references to `opentracing` in the code snippets refer to the Synapses module. +Most methods provided in the module have a direct correlation to those provided +by opentracing. Refer to docs there for a more in-depth documentation on some of +the args and methods. Tracing ------- @@ -68,52 +71,62 @@ set a tag on the current active span. Tracing functions ----------------- -Functions can be easily traced using decorators. There is a decorator for -'normal' function and for functions which are actually deferreds. The name of +Functions can be easily traced using decorators. The name of the function becomes the operation name for the span. .. code-block:: python - from synapse.logging.opentracing import trace, trace_deferred + from synapse.logging.opentracing import trace - # Start a span using 'normal_function' as the operation name + # Start a span using 'interesting_function' as the operation name @trace - def normal_function(*args, **kwargs): + def interesting_function(*args, **kwargs): # Does all kinds of cool and expected things return something_usual_and_useful - # Start a span using 'deferred_function' as the operation name - @trace_deferred - @defer.inlineCallbacks - def deferred_function(*args, **kwargs): - # We start - yield we_wait - # we finish - return something_usual_and_useful Operation names can be explicitly set for functions by using -``trace_using_operation_name`` and -``trace_deferred_using_operation_name`` +``trace_using_operation_name`` .. code-block:: python - from synapse.logging.opentracing import ( - trace_using_operation_name, - trace_deferred_using_operation_name - ) + from synapse.logging.opentracing import trace_using_operation_name @trace_using_operation_name("A *much* better operation name") - def normal_function(*args, **kwargs): + def interesting_badly_named_function(*args, **kwargs): # Does all kinds of cool and expected things return something_usual_and_useful - @trace_deferred_using_operation_name("Another exciting operation name!") - @defer.inlineCallbacks - def deferred_function(*args, **kwargs): - # We start - yield we_wait - # we finish - return something_usual_and_useful +Setting Tags +------------ + +To set a tag on the active span do + +.. code-block:: python + + from synapse.logging.opentracing import set_tag + + set_tag(tag_name, tag_value) + +There's a convenient decorator to tag all the args of the method. It uses +inspection in order to use the formal parameter names prefixed with 'ARG_' as +tag names. It uses kwarg names as tag names without the prefix. + +.. code-block:: python + + from synapse.logging.opentracing import tag_args + + @tag_args + def set_fates(clotho, lachesis, atropos, father="Zues", mother="Themis"): + pass + + set_fates("the story", "the end", "the act") + # This will have the following tags + # - ARG_clotho: "the story" + # - ARG_lachesis: "the end" + # - ARG_atropos: "the act" + # - father: "Zues" + # - mother: "Themis" Contexts and carriers --------------------- From baa3f4a80d55615f35e073eecaebd5edd1c86113 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Tue, 20 Aug 2019 17:39:38 +0100 Subject: [PATCH 063/110] Avoid deep recursion in appservice recovery (#5885) Hopefully, this will fix a stack overflow when recovering an appservice. The recursion here leads to a huge chain of deferred callbacks, which then overflows the stack when the chain completes. `inlineCallbacks` makes a better job of this if we use iteration instead. Clean up the code a bit too, while we're there. --- changelog.d/5885.bugfix | 1 + synapse/appservice/scheduler.py | 41 +++++++++++++++++++-------------- 2 files changed, 25 insertions(+), 17 deletions(-) create mode 100644 changelog.d/5885.bugfix diff --git a/changelog.d/5885.bugfix b/changelog.d/5885.bugfix new file mode 100644 index 0000000000..411d925fd4 --- /dev/null +++ b/changelog.d/5885.bugfix @@ -0,0 +1 @@ +Fix stack overflow when recovering an appservice which had an outage. diff --git a/synapse/appservice/scheduler.py b/synapse/appservice/scheduler.py index 42a350bff8..0ae12cbac9 100644 --- a/synapse/appservice/scheduler.py +++ b/synapse/appservice/scheduler.py @@ -224,7 +224,9 @@ class _Recoverer(object): "as-recoverer-%s" % (self.service.id,), self.retry ) - self.clock.call_later((2 ** self.backoff_counter), _retry) + delay = 2 ** self.backoff_counter + logger.info("Scheduling retries on %s in %fs", self.service.id, delay) + self.clock.call_later(delay, _retry) def _backoff(self): # cap the backoff to be around 8.5min => (2^9) = 512 secs @@ -234,25 +236,30 @@ class _Recoverer(object): @defer.inlineCallbacks def retry(self): + logger.info("Starting retries on %s", self.service.id) try: - txn = yield self.store.get_oldest_unsent_txn(self.service) - if txn: + while True: + txn = yield self.store.get_oldest_unsent_txn(self.service) + if not txn: + # nothing left: we're done! + self.callback(self) + return + logger.info( "Retrying transaction %s for AS ID %s", txn.id, txn.service.id ) sent = yield txn.send(self.as_api) - if sent: - yield txn.complete(self.store) - # reset the backoff counter and retry immediately - self.backoff_counter = 1 - yield self.retry() - else: - self._backoff() - else: - self._set_service_recovered() - except Exception as e: - logger.exception(e) - self._backoff() + if not sent: + break - def _set_service_recovered(self): - self.callback(self) + yield txn.complete(self.store) + + # reset the backoff counter and then process the next transaction + self.backoff_counter = 1 + + except Exception: + logger.exception("Unexpected error running retries") + + # we didn't manage to send all of the transactions before we got an error of + # some flavour: reschedule the next retry. + self._backoff() From 5906be858900e134d99dd94f0ca9e8bd1db14c05 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 20 Aug 2019 15:27:08 +0100 Subject: [PATCH 064/110] Add config option for keys to use to sign keys This allows servers to separate keys that are used to sign remote keys when acting as a notary server. --- docs/sample_config.yaml | 8 ++++++++ synapse/config/key.py | 35 +++++++++++++++++++++++++++++++---- synapse/crypto/keyring.py | 12 +++++++----- 3 files changed, 46 insertions(+), 9 deletions(-) diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml index 0c6be30e51..c96eb0cf2d 100644 --- a/docs/sample_config.yaml +++ b/docs/sample_config.yaml @@ -1027,6 +1027,14 @@ signing_key_path: "CONFDIR/SERVERNAME.signing.key" # #trusted_key_servers: # - server_name: "matrix.org" +# + +# The additional signing keys to use when acting as a trusted key server, on +# top of the normal signing keys. +# +# Can contain multiple keys, one per line. +# +#key_server_signing_keys_path: "key_server_signing_keys.key" # Enable SAML2 for registration and login. Uses pysaml2. diff --git a/synapse/config/key.py b/synapse/config/key.py index fe8386985c..f1a1efcb7f 100644 --- a/synapse/config/key.py +++ b/synapse/config/key.py @@ -76,7 +76,7 @@ class KeyConfig(Config): config_dir_path, config["server_name"] + ".signing.key" ) - self.signing_key = self.read_signing_key(signing_key_path) + self.signing_key = self.read_signing_keys(signing_key_path, "signing_key") self.old_signing_keys = self.read_old_signing_keys( config.get("old_signing_keys", {}) @@ -85,6 +85,15 @@ class KeyConfig(Config): config.get("key_refresh_interval", "1d") ) + self.key_server_signing_keys = list(self.signing_key) + key_server_signing_keys_path = config.get("key_server_signing_keys_path") + if key_server_signing_keys_path: + self.key_server_signing_keys.extend( + self.read_signing_keys( + key_server_signing_keys_path, "key_server_signing_keys_path" + ) + ) + # if neither trusted_key_servers nor perspectives are given, use the default. if "perspectives" not in config and "trusted_key_servers" not in config: key_servers = [{"server_name": "matrix.org"}] @@ -210,16 +219,34 @@ class KeyConfig(Config): # #trusted_key_servers: # - server_name: "matrix.org" + # + + # The additional signing keys to use when acting as a trusted key server, on + # top of the normal signing keys. + # + # Can contain multiple keys, one per line. + # + #key_server_signing_keys_path: "key_server_signing_keys.key" """ % locals() ) - def read_signing_key(self, signing_key_path): - signing_keys = self.read_file(signing_key_path, "signing_key") + def read_signing_keys(self, signing_key_path, name): + """Read the signing keys in the given path. + + Args: + signing_key_path (str) + name (str): Associated config key name + + Returns: + list[SigningKey] + """ + + signing_keys = self.read_file(signing_key_path, name) try: return read_signing_keys(signing_keys.splitlines(True)) except Exception as e: - raise ConfigError("Error reading signing_key: %s" % (str(e))) + raise ConfigError("Error reading %s: %s" % (name, str(e))) def read_old_signing_keys(self, old_signing_keys): keys = {} diff --git a/synapse/crypto/keyring.py b/synapse/crypto/keyring.py index 6c3e885e72..a3b55e349e 100644 --- a/synapse/crypto/keyring.py +++ b/synapse/crypto/keyring.py @@ -540,11 +540,13 @@ class BaseV2KeyFetcher(object): verify_key=verify_key, valid_until_ts=key_data["expired_ts"] ) - # re-sign the json with our own key, so that it is ready if we are asked to - # give it out as a notary server - signed_key_json = sign_json( - response_json, self.config.server_name, self.config.signing_key[0] - ) + # re-sign the json with our own keys, so that it is ready if we are + # asked to give it out as a notary server + signed_key_json = response_json + for signing_key in self.config.key_server_signing_keys: + signed_key_json = sign_json( + signed_key_json, self.config.server_name, signing_key + ) signed_key_json_bytes = encode_canonical_json(signed_key_json) From 97cbc96093dcd878bc823f34d71437a08786a3e4 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 21 Aug 2019 10:39:45 +0100 Subject: [PATCH 065/110] Only sign when we respond to remote key requests --- synapse/crypto/keyring.py | 11 +-------- synapse/rest/key/v2/remote_key_resource.py | 26 ++++++++++++---------- 2 files changed, 15 insertions(+), 22 deletions(-) diff --git a/synapse/crypto/keyring.py b/synapse/crypto/keyring.py index a3b55e349e..abeb0ac26e 100644 --- a/synapse/crypto/keyring.py +++ b/synapse/crypto/keyring.py @@ -30,7 +30,6 @@ from signedjson.key import ( from signedjson.sign import ( SignatureVerifyException, encode_canonical_json, - sign_json, signature_ids, verify_signed_json, ) @@ -540,15 +539,7 @@ class BaseV2KeyFetcher(object): verify_key=verify_key, valid_until_ts=key_data["expired_ts"] ) - # re-sign the json with our own keys, so that it is ready if we are - # asked to give it out as a notary server - signed_key_json = response_json - for signing_key in self.config.key_server_signing_keys: - signed_key_json = sign_json( - signed_key_json, self.config.server_name, signing_key - ) - - signed_key_json_bytes = encode_canonical_json(signed_key_json) + signed_key_json_bytes = encode_canonical_json(response_json) yield make_deferred_yieldable( defer.gatherResults( diff --git a/synapse/rest/key/v2/remote_key_resource.py b/synapse/rest/key/v2/remote_key_resource.py index 031a316693..f3398c9523 100644 --- a/synapse/rest/key/v2/remote_key_resource.py +++ b/synapse/rest/key/v2/remote_key_resource.py @@ -13,7 +13,9 @@ # limitations under the License. import logging -from io import BytesIO + +from canonicaljson import json +from signedjson.sign import sign_json from twisted.internet import defer @@ -95,6 +97,7 @@ class RemoteKey(DirectServeResource): self.store = hs.get_datastore() self.clock = hs.get_clock() self.federation_domain_whitelist = hs.config.federation_domain_whitelist + self.config = hs.config @wrap_json_request_handler async def _async_render_GET(self, request): @@ -214,15 +217,14 @@ class RemoteKey(DirectServeResource): yield self.fetcher.get_keys(cache_misses) yield self.query_keys(request, query, query_remote_on_cache_miss=False) else: - result_io = BytesIO() - result_io.write(b'{"server_keys":') - sep = b"[" - for json_bytes in json_results: - result_io.write(sep) - result_io.write(json_bytes) - sep = b"," - if sep == b"[": - result_io.write(sep) - result_io.write(b"]}") + signed_keys = [] + for key_json in json_results: + key_json = json.loads(key_json) + for signing_key in self.config.key_server_signing_keys: + key_json = sign_json(key_json, self.config.server_name, signing_key) - respond_with_json_bytes(request, 200, result_io.getvalue()) + signed_keys.append(key_json) + + results = {"server_keys": signed_keys} + + respond_with_json_bytes(request, 200, json.dumps(results).encode("utf-8")) From 62fb643cdca80568a404c46a255384cd73b6e16b Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 21 Aug 2019 10:41:29 +0100 Subject: [PATCH 066/110] Newsfile --- changelog.d/5895.feature | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/5895.feature diff --git a/changelog.d/5895.feature b/changelog.d/5895.feature new file mode 100644 index 0000000000..c394a3772c --- /dev/null +++ b/changelog.d/5895.feature @@ -0,0 +1 @@ +Add config option to sign remote key query responses with a separate key. From 4dab867288167881e5d89c8743b633be109bf603 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Wed, 21 Aug 2019 13:16:28 +0100 Subject: [PATCH 067/110] Drop some unused tables. (#5893) These tables are never used, so we may as well drop them. --- changelog.d/5893.misc | 1 + synapse/storage/events.py | 14 ++------ synapse/storage/room.py | 35 ------------------- .../delta/56/drop_unused_event_tables.sql | 20 +++++++++++ 4 files changed, 23 insertions(+), 47 deletions(-) create mode 100644 changelog.d/5893.misc create mode 100644 synapse/storage/schema/delta/56/drop_unused_event_tables.sql diff --git a/changelog.d/5893.misc b/changelog.d/5893.misc new file mode 100644 index 0000000000..07ee4888dc --- /dev/null +++ b/changelog.d/5893.misc @@ -0,0 +1 @@ +Drop some unused tables. diff --git a/synapse/storage/events.py b/synapse/storage/events.py index ac876287fc..6fcfa4d789 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -1302,15 +1302,11 @@ class EventsStore( "event_reference_hashes", "event_search", "event_to_state_groups", - "guest_access", - "history_visibility", "local_invites", - "room_names", "state_events", "rejections", "redactions", "room_memberships", - "topics", ): txn.executemany( "DELETE FROM %s WHERE event_id = ?" % (table,), @@ -1454,10 +1450,10 @@ class EventsStore( for event, _ in events_and_contexts: if event.type == EventTypes.Name: - # Insert into the room_names and event_search tables. + # Insert into the event_search table. self._store_room_name_txn(txn, event) elif event.type == EventTypes.Topic: - # Insert into the topics table and event_search table. + # Insert into the event_search table. self._store_room_topic_txn(txn, event) elif event.type == EventTypes.Message: # Insert into the event_search table. @@ -1465,12 +1461,6 @@ class EventsStore( elif event.type == EventTypes.Redaction: # Insert into the redactions table. self._store_redaction(txn, event) - elif event.type == EventTypes.RoomHistoryVisibility: - # Insert into the event_search table. - self._store_history_visibility_txn(txn, event) - elif event.type == EventTypes.GuestAccess: - # Insert into the event_search table. - self._store_guest_access_txn(txn, event) self._handle_event_relations(txn, event) diff --git a/synapse/storage/room.py b/synapse/storage/room.py index bc606292b8..08e13f3a3b 100644 --- a/synapse/storage/room.py +++ b/synapse/storage/room.py @@ -386,32 +386,12 @@ class RoomStore(RoomWorkerStore, SearchStore): def _store_room_topic_txn(self, txn, event): if hasattr(event, "content") and "topic" in event.content: - self._simple_insert_txn( - txn, - "topics", - { - "event_id": event.event_id, - "room_id": event.room_id, - "topic": event.content["topic"], - }, - ) - self.store_event_search_txn( txn, event, "content.topic", event.content["topic"] ) def _store_room_name_txn(self, txn, event): if hasattr(event, "content") and "name" in event.content: - self._simple_insert_txn( - txn, - "room_names", - { - "event_id": event.event_id, - "room_id": event.room_id, - "name": event.content["name"], - }, - ) - self.store_event_search_txn( txn, event, "content.name", event.content["name"] ) @@ -422,21 +402,6 @@ class RoomStore(RoomWorkerStore, SearchStore): txn, event, "content.body", event.content["body"] ) - def _store_history_visibility_txn(self, txn, event): - self._store_content_index_txn(txn, event, "history_visibility") - - def _store_guest_access_txn(self, txn, event): - self._store_content_index_txn(txn, event, "guest_access") - - def _store_content_index_txn(self, txn, event, key): - if hasattr(event, "content") and key in event.content: - sql = ( - "INSERT INTO %(key)s" - " (event_id, room_id, %(key)s)" - " VALUES (?, ?, ?)" % {"key": key} - ) - txn.execute(sql, (event.event_id, event.room_id, event.content[key])) - def add_event_report( self, room_id, event_id, user_id, reason, content, received_ts ): diff --git a/synapse/storage/schema/delta/56/drop_unused_event_tables.sql b/synapse/storage/schema/delta/56/drop_unused_event_tables.sql new file mode 100644 index 0000000000..9f09922c67 --- /dev/null +++ b/synapse/storage/schema/delta/56/drop_unused_event_tables.sql @@ -0,0 +1,20 @@ +/* Copyright 2019 The Matrix.org Foundation C.I.C. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +-- these tables are never used. +DROP TABLE IF EXISTS room_names; +DROP TABLE IF EXISTS topics; +DROP TABLE IF EXISTS history_visibility; +DROP TABLE IF EXISTS guest_access; From ef1c524bb381545761fdd1ad2a61db1693ddbd3d Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Thu, 22 Aug 2019 10:42:06 +0100 Subject: [PATCH 068/110] Improve error msg when key-fetch fails (#5896) There's no point doing a raise_from here, because the exception is always logged at warn with no stacktrace in the caller. Instead, let's try to give better messages to reduce confusion. In particular, this means that we won't log 'Failed to connect to remote server' when we don't even attempt to connect to the remote server due to blacklisting. --- changelog.d/5896.misc | 1 + synapse/crypto/keyring.py | 12 +++++++----- 2 files changed, 8 insertions(+), 5 deletions(-) create mode 100644 changelog.d/5896.misc diff --git a/changelog.d/5896.misc b/changelog.d/5896.misc new file mode 100644 index 0000000000..ed47c747bd --- /dev/null +++ b/changelog.d/5896.misc @@ -0,0 +1 @@ +Improve the logging when we have an error when fetching signing keys. diff --git a/synapse/crypto/keyring.py b/synapse/crypto/keyring.py index 6c3e885e72..654accc843 100644 --- a/synapse/crypto/keyring.py +++ b/synapse/crypto/keyring.py @@ -18,7 +18,6 @@ import logging from collections import defaultdict import six -from six import raise_from from six.moves import urllib import attr @@ -657,9 +656,10 @@ class PerspectivesKeyFetcher(BaseV2KeyFetcher): }, ) except (NotRetryingDestination, RequestSendFailed) as e: - raise_from(KeyLookupError("Failed to connect to remote server"), e) + # these both have str() representations which we can't really improve upon + raise KeyLookupError(str(e)) except HttpResponseException as e: - raise_from(KeyLookupError("Remote server returned an error"), e) + raise KeyLookupError("Remote server returned an error: %s" % (e,)) keys = {} added_keys = [] @@ -821,9 +821,11 @@ class ServerKeyFetcher(BaseV2KeyFetcher): timeout=10000, ) except (NotRetryingDestination, RequestSendFailed) as e: - raise_from(KeyLookupError("Failed to connect to remote server"), e) + # these both have str() representations which we can't really improve + # upon + raise KeyLookupError(str(e)) except HttpResponseException as e: - raise_from(KeyLookupError("Remote server returned an error"), e) + raise KeyLookupError("Remote server returned an error: %s" % (e,)) if response["server_name"] != server_name: raise KeyLookupError( From 119aa31b105705390e87f87186f787b32e04ba21 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Thu, 22 Aug 2019 10:42:59 +0100 Subject: [PATCH 069/110] Servlet to purge old rooms (#5845) --- changelog.d/5845.feature | 1 + docs/admin_api/purge_room.md | 18 +++ synapse/handlers/pagination.py | 17 +++ synapse/rest/admin/__init__.py | 2 + synapse/rest/admin/purge_room_servlet.py | 57 ++++++++++ synapse/storage/events.py | 137 +++++++++++++++++++++++ 6 files changed, 232 insertions(+) create mode 100644 changelog.d/5845.feature create mode 100644 docs/admin_api/purge_room.md create mode 100644 synapse/rest/admin/purge_room_servlet.py diff --git a/changelog.d/5845.feature b/changelog.d/5845.feature new file mode 100644 index 0000000000..7b0dc9a95e --- /dev/null +++ b/changelog.d/5845.feature @@ -0,0 +1 @@ +Add an admin API to purge old rooms from the database. diff --git a/docs/admin_api/purge_room.md b/docs/admin_api/purge_room.md new file mode 100644 index 0000000000..64ea7b6a64 --- /dev/null +++ b/docs/admin_api/purge_room.md @@ -0,0 +1,18 @@ +Purge room API +============== + +This API will remove all trace of a room from your database. + +All local users must have left the room before it can be removed. + +The API is: + +``` +POST /_synapse/admin/v1/purge_room + +{ + "room_id": "!room:id" +} +``` + +You must authenticate using the access token of an admin user. diff --git a/synapse/handlers/pagination.py b/synapse/handlers/pagination.py index d83aab3f74..5744f4579d 100644 --- a/synapse/handlers/pagination.py +++ b/synapse/handlers/pagination.py @@ -70,6 +70,7 @@ class PaginationHandler(object): self.auth = hs.get_auth() self.store = hs.get_datastore() self.clock = hs.get_clock() + self._server_name = hs.hostname self.pagination_lock = ReadWriteLock() self._purges_in_progress_by_room = set() @@ -153,6 +154,22 @@ class PaginationHandler(object): """ return self._purges_by_id.get(purge_id) + async def purge_room(self, room_id): + """Purge the given room from the database""" + with (await self.pagination_lock.write(room_id)): + # check we know about the room + await self.store.get_room_version(room_id) + + # first check that we have no users in this room + joined = await defer.maybeDeferred( + self.store.is_host_joined, room_id, self._server_name + ) + + if joined: + raise SynapseError(400, "Users are still joined to this room") + + await self.store.purge_room(room_id) + @defer.inlineCallbacks def get_messages( self, diff --git a/synapse/rest/admin/__init__.py b/synapse/rest/admin/__init__.py index 5720cab425..0dce256840 100644 --- a/synapse/rest/admin/__init__.py +++ b/synapse/rest/admin/__init__.py @@ -42,6 +42,7 @@ from synapse.rest.admin._base import ( historical_admin_path_patterns, ) from synapse.rest.admin.media import register_servlets_for_media_repo +from synapse.rest.admin.purge_room_servlet import PurgeRoomServlet from synapse.rest.admin.server_notice_servlet import SendServerNoticeServlet from synapse.types import UserID, create_requester from synapse.util.versionstring import get_version_string @@ -738,6 +739,7 @@ def register_servlets(hs, http_server): Register all the admin servlets. """ register_servlets_for_client_rest_resource(hs, http_server) + PurgeRoomServlet(hs).register(http_server) SendServerNoticeServlet(hs).register(http_server) VersionServlet(hs).register(http_server) diff --git a/synapse/rest/admin/purge_room_servlet.py b/synapse/rest/admin/purge_room_servlet.py new file mode 100644 index 0000000000..2922eb543e --- /dev/null +++ b/synapse/rest/admin/purge_room_servlet.py @@ -0,0 +1,57 @@ +# -*- coding: utf-8 -*- +# Copyright 2019 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import re + +from synapse.http.servlet import ( + RestServlet, + assert_params_in_dict, + parse_json_object_from_request, +) +from synapse.rest.admin import assert_requester_is_admin + + +class PurgeRoomServlet(RestServlet): + """Servlet which will remove all trace of a room from the database + + POST /_synapse/admin/v1/purge_room + { + "room_id": "!room:id" + } + + returns: + + {} + """ + + PATTERNS = (re.compile("^/_synapse/admin/v1/purge_room$"),) + + def __init__(self, hs): + """ + Args: + hs (synapse.server.HomeServer): server + """ + self.hs = hs + self.auth = hs.get_auth() + self.pagination_handler = hs.get_pagination_handler() + + async def on_POST(self, request): + await assert_requester_is_admin(self.auth, request) + + body = parse_json_object_from_request(request) + assert_params_in_dict(body, ("room_id",)) + + await self.pagination_handler.purge_room(body["room_id"]) + + return (200, {}) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 6fcfa4d789..5a95c36a8b 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -2181,6 +2181,143 @@ class EventsStore( return to_delete, to_dedelta + def purge_room(self, room_id): + """Deletes all record of a room + + Args: + room_id (str): + """ + + return self.runInteraction("purge_room", self._purge_room_txn, room_id) + + def _purge_room_txn(self, txn, room_id): + # first we have to delete the state groups states + logger.info("[purge] removing %s from state_groups_state", room_id) + + txn.execute( + """ + DELETE FROM state_groups_state WHERE state_group IN ( + SELECT state_group FROM events JOIN event_to_state_groups USING(event_id) + WHERE events.room_id=? + ) + """, + (room_id,), + ) + + # ... and the state group edges + logger.info("[purge] removing %s from state_group_edges", room_id) + + txn.execute( + """ + DELETE FROM state_group_edges WHERE state_group IN ( + SELECT state_group FROM events JOIN event_to_state_groups USING(event_id) + WHERE events.room_id=? + ) + """, + (room_id,), + ) + + # ... and the state groups + logger.info("[purge] removing %s from state_groups", room_id) + + txn.execute( + """ + DELETE FROM state_groups WHERE id IN ( + SELECT state_group FROM events JOIN event_to_state_groups USING(event_id) + WHERE events.room_id=? + ) + """, + (room_id,), + ) + + # and then tables which lack an index on room_id but have one on event_id + for table in ( + "event_auth", + "event_edges", + "event_push_actions_staging", + "event_reference_hashes", + "event_relations", + "event_to_state_groups", + "redactions", + "rejections", + "state_events", + ): + logger.info("[purge] removing %s from %s", room_id, table) + + txn.execute( + """ + DELETE FROM %s WHERE event_id IN ( + SELECT event_id FROM events WHERE room_id=? + ) + """ + % (table,), + (room_id,), + ) + + # and finally, the tables with an index on room_id (or no useful index) + for table in ( + "current_state_events", + "event_backward_extremities", + "event_forward_extremities", + "event_json", + "event_push_actions", + "event_search", + "events", + "group_rooms", + "public_room_list_stream", + "receipts_graph", + "receipts_linearized", + "room_aliases", + "room_depth", + "room_memberships", + "room_state", + "room_stats", + "room_stats_earliest_token", + "rooms", + "stream_ordering_to_exterm", + "topics", + "users_in_public_rooms", + "users_who_share_private_rooms", + # no useful index, but let's clear them anyway + "appservice_room_list", + "e2e_room_keys", + "event_push_summary", + "pusher_throttle", + "group_summary_rooms", + "local_invites", + "room_account_data", + "room_tags", + ): + logger.info("[purge] removing %s from %s", room_id, table) + txn.execute("DELETE FROM %s WHERE room_id=?" % (table,), (room_id,)) + + # Other tables we do NOT need to clear out: + # + # - blocked_rooms + # This is important, to make sure that we don't accidentally rejoin a blocked + # room after it was purged + # + # - user_directory + # This has a room_id column, but it is unused + # + + # Other tables that we might want to consider clearing out include: + # + # - event_reports + # Given that these are intended for abuse management my initial + # inclination is to leave them in place. + # + # - current_state_delta_stream + # - ex_outlier_stream + # - room_tags_revisions + # The problem with these is that they are largeish and there is no room_id + # index on them. In any case we should be clearing out 'stream' tables + # periodically anyway (#5888) + + # TODO: we could probably usefully do a bunch of cache invalidation here + + logger.info("[purge] done") + @defer.inlineCallbacks def is_event_after(self, event_id1, event_id2): """Returns True if event_id1 is after event_id2 in the stream From c9f11d09fc85470cf9a36909104734a3682c4b39 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Thu, 22 Aug 2019 10:43:13 +0100 Subject: [PATCH 070/110] Add missing index on users_in_public_rooms. (#5894) --- changelog.d/5894.misc | 1 + .../delta/56/users_in_public_rooms_idx.sql | 17 +++++++++++++++++ 2 files changed, 18 insertions(+) create mode 100644 changelog.d/5894.misc create mode 100644 synapse/storage/schema/delta/56/users_in_public_rooms_idx.sql diff --git a/changelog.d/5894.misc b/changelog.d/5894.misc new file mode 100644 index 0000000000..fca4485ff7 --- /dev/null +++ b/changelog.d/5894.misc @@ -0,0 +1 @@ +Add missing index on users_in_public_rooms to improve the performance of directory queries. diff --git a/synapse/storage/schema/delta/56/users_in_public_rooms_idx.sql b/synapse/storage/schema/delta/56/users_in_public_rooms_idx.sql new file mode 100644 index 0000000000..149f8be8b6 --- /dev/null +++ b/synapse/storage/schema/delta/56/users_in_public_rooms_idx.sql @@ -0,0 +1,17 @@ +/* Copyright 2019 Matrix.org Foundation CIC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +-- this was apparently forgotten when the table was created back in delta 53. +CREATE INDEX users_in_public_rooms_r_idx ON users_in_public_rooms(room_id); From 9a6f2be5724bb0ed53a4b04e7fbb7ccee39050bd Mon Sep 17 00:00:00 2001 From: Jorik Schellekens Date: Thu, 22 Aug 2019 11:28:12 +0100 Subject: [PATCH 071/110] Opentrace e2e keys (#5855) Add opentracing tags and logs for e2e keys --- changelog.d/5855.misc | 1 + synapse/federation/federation_server.py | 3 ++ synapse/handlers/e2e_keys.py | 52 ++++++++++++++++++++++++- synapse/handlers/e2e_room_keys.py | 28 ++++++++++++- synapse/rest/client/v2_alpha/keys.py | 13 ++++++- synapse/storage/e2e_room_keys.py | 14 +++++++ synapse/storage/end_to_end_keys.py | 38 ++++++++++++++++-- 7 files changed, 142 insertions(+), 7 deletions(-) create mode 100644 changelog.d/5855.misc diff --git a/changelog.d/5855.misc b/changelog.d/5855.misc new file mode 100644 index 0000000000..32db7fbe37 --- /dev/null +++ b/changelog.d/5855.misc @@ -0,0 +1 @@ +Opentracing for room and e2e keys. diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py index d216c46dfe..9286ca3202 100644 --- a/synapse/federation/federation_server.py +++ b/synapse/federation/federation_server.py @@ -43,6 +43,7 @@ from synapse.federation.persistence import TransactionActions from synapse.federation.units import Edu, Transaction from synapse.http.endpoint import parse_server_name from synapse.logging.context import nested_logging_context +from synapse.logging.opentracing import log_kv, trace from synapse.logging.utils import log_function from synapse.replication.http.federation import ( ReplicationFederationSendEduRestServlet, @@ -507,6 +508,7 @@ class FederationServer(FederationBase): def on_query_user_devices(self, origin, user_id): return self.on_query_request("user_devices", user_id) + @trace @defer.inlineCallbacks @log_function def on_claim_client_keys(self, origin, content): @@ -515,6 +517,7 @@ class FederationServer(FederationBase): for device_id, algorithm in device_keys.items(): query.append((user_id, device_id, algorithm)) + log_kv({"message": "Claiming one time keys.", "user, device pairs": query}) results = yield self.store.claim_e2e_one_time_keys(query) json_result = {} diff --git a/synapse/handlers/e2e_keys.py b/synapse/handlers/e2e_keys.py index 1f90b0d278..056fb97acb 100644 --- a/synapse/handlers/e2e_keys.py +++ b/synapse/handlers/e2e_keys.py @@ -24,6 +24,7 @@ from twisted.internet import defer from synapse.api.errors import CodeMessageException, SynapseError from synapse.logging.context import make_deferred_yieldable, run_in_background +from synapse.logging.opentracing import log_kv, set_tag, tag_args, trace from synapse.types import UserID, get_domain_from_id from synapse.util import unwrapFirstError from synapse.util.retryutils import NotRetryingDestination @@ -46,6 +47,7 @@ class E2eKeysHandler(object): "client_keys", self.on_federation_query_client_keys ) + @trace @defer.inlineCallbacks def query_devices(self, query_body, timeout): """ Handle a device key query from a client @@ -81,6 +83,9 @@ class E2eKeysHandler(object): else: remote_queries[user_id] = device_ids + set_tag("local_key_query", local_query) + set_tag("remote_key_query", remote_queries) + # First get local devices. failures = {} results = {} @@ -121,6 +126,7 @@ class E2eKeysHandler(object): r[user_id] = remote_queries[user_id] # Now fetch any devices that we don't have in our cache + @trace @defer.inlineCallbacks def do_remote_query(destination): """This is called when we are querying the device list of a user on @@ -185,6 +191,8 @@ class E2eKeysHandler(object): except Exception as e: failure = _exception_to_failure(e) failures[destination] = failure + set_tag("error", True) + set_tag("reason", failure) yield make_deferred_yieldable( defer.gatherResults( @@ -198,6 +206,7 @@ class E2eKeysHandler(object): return {"device_keys": results, "failures": failures} + @trace @defer.inlineCallbacks def query_local_devices(self, query): """Get E2E device keys for local users @@ -210,6 +219,7 @@ class E2eKeysHandler(object): defer.Deferred: (resolves to dict[string, dict[string, dict]]): map from user_id -> device_id -> device details """ + set_tag("local_query", query) local_query = [] result_dict = {} @@ -217,6 +227,14 @@ class E2eKeysHandler(object): # we use UserID.from_string to catch invalid user ids if not self.is_mine(UserID.from_string(user_id)): logger.warning("Request for keys for non-local user %s", user_id) + log_kv( + { + "message": "Requested a local key for a user which" + " was not local to the homeserver", + "user_id": user_id, + } + ) + set_tag("error", True) raise SynapseError(400, "Not a user here") if not device_ids: @@ -241,6 +259,7 @@ class E2eKeysHandler(object): r["unsigned"]["device_display_name"] = display_name result_dict[user_id][device_id] = r + log_kv(results) return result_dict @defer.inlineCallbacks @@ -251,6 +270,7 @@ class E2eKeysHandler(object): res = yield self.query_local_devices(device_keys_query) return {"device_keys": res} + @trace @defer.inlineCallbacks def claim_one_time_keys(self, query, timeout): local_query = [] @@ -265,6 +285,9 @@ class E2eKeysHandler(object): domain = get_domain_from_id(user_id) remote_queries.setdefault(domain, {})[user_id] = device_keys + set_tag("local_key_query", local_query) + set_tag("remote_key_query", remote_queries) + results = yield self.store.claim_e2e_one_time_keys(local_query) json_result = {} @@ -276,8 +299,10 @@ class E2eKeysHandler(object): key_id: json.loads(json_bytes) } + @trace @defer.inlineCallbacks def claim_client_keys(destination): + set_tag("destination", destination) device_keys = remote_queries[destination] try: remote_result = yield self.federation.claim_client_keys( @@ -290,6 +315,8 @@ class E2eKeysHandler(object): except Exception as e: failure = _exception_to_failure(e) failures[destination] = failure + set_tag("error", True) + set_tag("reason", failure) yield make_deferred_yieldable( defer.gatherResults( @@ -313,9 +340,11 @@ class E2eKeysHandler(object): ), ) + log_kv({"one_time_keys": json_result, "failures": failures}) return {"one_time_keys": json_result, "failures": failures} @defer.inlineCallbacks + @tag_args def upload_keys_for_user(self, user_id, device_id, keys): time_now = self.clock.time_msec() @@ -329,6 +358,13 @@ class E2eKeysHandler(object): user_id, time_now, ) + log_kv( + { + "message": "Updating device_keys for user.", + "user_id": user_id, + "device_id": device_id, + } + ) # TODO: Sign the JSON with the server key changed = yield self.store.set_e2e_device_keys( user_id, device_id, time_now, device_keys @@ -336,12 +372,24 @@ class E2eKeysHandler(object): if changed: # Only notify about device updates *if* the keys actually changed yield self.device_handler.notify_device_update(user_id, [device_id]) - + else: + log_kv({"message": "Not updating device_keys for user", "user_id": user_id}) one_time_keys = keys.get("one_time_keys", None) if one_time_keys: + log_kv( + { + "message": "Updating one_time_keys for device.", + "user_id": user_id, + "device_id": device_id, + } + ) yield self._upload_one_time_keys_for_user( user_id, device_id, time_now, one_time_keys ) + else: + log_kv( + {"message": "Did not update one_time_keys", "reason": "no keys given"} + ) # the device should have been registered already, but it may have been # deleted due to a race with a DELETE request. Or we may be using an @@ -352,6 +400,7 @@ class E2eKeysHandler(object): result = yield self.store.count_e2e_one_time_keys(user_id, device_id) + set_tag("one_time_key_counts", result) return {"one_time_key_counts": result} @defer.inlineCallbacks @@ -395,6 +444,7 @@ class E2eKeysHandler(object): (algorithm, key_id, encode_canonical_json(key).decode("ascii")) ) + log_kv({"message": "Inserting new one_time_keys.", "keys": new_keys}) yield self.store.add_e2e_one_time_keys(user_id, device_id, time_now, new_keys) diff --git a/synapse/handlers/e2e_room_keys.py b/synapse/handlers/e2e_room_keys.py index 41b871fc59..a9d80f708c 100644 --- a/synapse/handlers/e2e_room_keys.py +++ b/synapse/handlers/e2e_room_keys.py @@ -26,6 +26,7 @@ from synapse.api.errors import ( StoreError, SynapseError, ) +from synapse.logging.opentracing import log_kv, trace from synapse.util.async_helpers import Linearizer logger = logging.getLogger(__name__) @@ -49,6 +50,7 @@ class E2eRoomKeysHandler(object): # changed. self._upload_linearizer = Linearizer("upload_room_keys_lock") + @trace @defer.inlineCallbacks def get_room_keys(self, user_id, version, room_id=None, session_id=None): """Bulk get the E2E room keys for a given backup, optionally filtered to a given @@ -84,8 +86,10 @@ class E2eRoomKeysHandler(object): user_id, version, room_id, session_id ) + log_kv(results) return results + @trace @defer.inlineCallbacks def delete_room_keys(self, user_id, version, room_id=None, session_id=None): """Bulk delete the E2E room keys for a given backup, optionally filtered to a given @@ -107,6 +111,7 @@ class E2eRoomKeysHandler(object): with (yield self._upload_linearizer.queue(user_id)): yield self.store.delete_e2e_room_keys(user_id, version, room_id, session_id) + @trace @defer.inlineCallbacks def upload_room_keys(self, user_id, version, room_keys): """Bulk upload a list of room keys into a given backup version, asserting @@ -186,7 +191,14 @@ class E2eRoomKeysHandler(object): session_id(str): the session whose room_key we're setting room_key(dict): the room_key being set """ - + log_kv( + { + "message": "Trying to upload room key", + "room_id": room_id, + "session_id": session_id, + "user_id": user_id, + } + ) # get the room_key for this particular row current_room_key = None try: @@ -195,14 +207,23 @@ class E2eRoomKeysHandler(object): ) except StoreError as e: if e.code == 404: - pass + log_kv( + { + "message": "Room key not found.", + "room_id": room_id, + "user_id": user_id, + } + ) else: raise if self._should_replace_room_key(current_room_key, room_key): + log_kv({"message": "Replacing room key."}) yield self.store.set_e2e_room_key( user_id, version, room_id, session_id, room_key ) + else: + log_kv({"message": "Not replacing room_key."}) @staticmethod def _should_replace_room_key(current_room_key, room_key): @@ -236,6 +257,7 @@ class E2eRoomKeysHandler(object): return False return True + @trace @defer.inlineCallbacks def create_version(self, user_id, version_info): """Create a new backup version. This automatically becomes the new @@ -294,6 +316,7 @@ class E2eRoomKeysHandler(object): raise return res + @trace @defer.inlineCallbacks def delete_version(self, user_id, version=None): """Deletes a given version of the user's e2e_room_keys backup @@ -314,6 +337,7 @@ class E2eRoomKeysHandler(object): else: raise + @trace @defer.inlineCallbacks def update_version(self, user_id, version, version_info): """Update the info about a given version of the user's backup diff --git a/synapse/rest/client/v2_alpha/keys.py b/synapse/rest/client/v2_alpha/keys.py index 6008adec7c..b218a3f334 100644 --- a/synapse/rest/client/v2_alpha/keys.py +++ b/synapse/rest/client/v2_alpha/keys.py @@ -24,6 +24,7 @@ from synapse.http.servlet import ( parse_json_object_from_request, parse_string, ) +from synapse.logging.opentracing import log_kv, set_tag, trace_using_operation_name from synapse.types import StreamToken from ._base import client_patterns @@ -68,6 +69,7 @@ class KeyUploadServlet(RestServlet): self.auth = hs.get_auth() self.e2e_keys_handler = hs.get_e2e_keys_handler() + @trace_using_operation_name("upload_keys") @defer.inlineCallbacks def on_POST(self, request, device_id): requester = yield self.auth.get_user_by_req(request, allow_guest=True) @@ -78,6 +80,14 @@ class KeyUploadServlet(RestServlet): # passing the device_id here is deprecated; however, we allow it # for now for compatibility with older clients. if requester.device_id is not None and device_id != requester.device_id: + set_tag("error", True) + log_kv( + { + "message": "Client uploading keys for a different device", + "logged_in_id": requester.device_id, + "key_being_uploaded": device_id, + } + ) logger.warning( "Client uploading keys for a different device " "(logged in as %s, uploading for %s)", @@ -178,10 +188,11 @@ class KeyChangesServlet(RestServlet): requester = yield self.auth.get_user_by_req(request, allow_guest=True) from_token_string = parse_string(request, "from") + set_tag("from", from_token_string) # We want to enforce they do pass us one, but we ignore it and return # changes after the "to" as well as before. - parse_string(request, "to") + set_tag("to", parse_string(request, "to")) from_token = StreamToken.from_string(from_token_string) diff --git a/synapse/storage/e2e_room_keys.py b/synapse/storage/e2e_room_keys.py index b1901404af..be2fe2bab6 100644 --- a/synapse/storage/e2e_room_keys.py +++ b/synapse/storage/e2e_room_keys.py @@ -18,6 +18,7 @@ import json from twisted.internet import defer from synapse.api.errors import StoreError +from synapse.logging.opentracing import log_kv, trace from ._base import SQLBaseStore @@ -94,7 +95,16 @@ class EndToEndRoomKeyStore(SQLBaseStore): }, lock=False, ) + log_kv( + { + "message": "Set room key", + "room_id": room_id, + "session_id": session_id, + "room_key": room_key, + } + ) + @trace @defer.inlineCallbacks def get_e2e_room_keys(self, user_id, version, room_id=None, session_id=None): """Bulk get the E2E room keys for a given backup, optionally filtered to a given @@ -153,6 +163,7 @@ class EndToEndRoomKeyStore(SQLBaseStore): return sessions + @trace @defer.inlineCallbacks def delete_e2e_room_keys(self, user_id, version, room_id=None, session_id=None): """Bulk delete the E2E room keys for a given backup, optionally filtered to a given @@ -236,6 +247,7 @@ class EndToEndRoomKeyStore(SQLBaseStore): "get_e2e_room_keys_version_info", _get_e2e_room_keys_version_info_txn ) + @trace def create_e2e_room_keys_version(self, user_id, info): """Atomically creates a new version of this user's e2e_room_keys store with the given version info. @@ -276,6 +288,7 @@ class EndToEndRoomKeyStore(SQLBaseStore): "create_e2e_room_keys_version_txn", _create_e2e_room_keys_version_txn ) + @trace def update_e2e_room_keys_version(self, user_id, version, info): """Update a given backup version @@ -292,6 +305,7 @@ class EndToEndRoomKeyStore(SQLBaseStore): desc="update_e2e_room_keys_version", ) + @trace def delete_e2e_room_keys_version(self, user_id, version=None): """Delete a given backup version of the user's room keys. Doesn't delete their actual key data. diff --git a/synapse/storage/end_to_end_keys.py b/synapse/storage/end_to_end_keys.py index 1e07474e70..33e3a84933 100644 --- a/synapse/storage/end_to_end_keys.py +++ b/synapse/storage/end_to_end_keys.py @@ -18,12 +18,14 @@ from canonicaljson import encode_canonical_json from twisted.internet import defer +from synapse.logging.opentracing import log_kv, set_tag, trace from synapse.util.caches.descriptors import cached from ._base import SQLBaseStore, db_to_json class EndToEndKeyWorkerStore(SQLBaseStore): + @trace @defer.inlineCallbacks def get_e2e_device_keys( self, query_list, include_all_devices=False, include_deleted_devices=False @@ -40,6 +42,7 @@ class EndToEndKeyWorkerStore(SQLBaseStore): Dict mapping from user-id to dict mapping from device_id to dict containing "key_json", "device_display_name". """ + set_tag("query_list", query_list) if not query_list: return {} @@ -57,9 +60,13 @@ class EndToEndKeyWorkerStore(SQLBaseStore): return results + @trace def _get_e2e_device_keys_txn( self, txn, query_list, include_all_devices=False, include_deleted_devices=False ): + set_tag("include_all_devices", include_all_devices) + set_tag("include_deleted_devices", include_deleted_devices) + query_clauses = [] query_params = [] @@ -104,6 +111,7 @@ class EndToEndKeyWorkerStore(SQLBaseStore): for user_id, device_id in deleted_devices: result.setdefault(user_id, {})[device_id] = None + log_kv(result) return result @defer.inlineCallbacks @@ -129,8 +137,9 @@ class EndToEndKeyWorkerStore(SQLBaseStore): keyvalues={"user_id": user_id, "device_id": device_id}, desc="add_e2e_one_time_keys_check", ) - - return {(row["algorithm"], row["key_id"]): row["key_json"] for row in rows} + result = {(row["algorithm"], row["key_id"]): row["key_json"] for row in rows} + log_kv({"message": "Fetched one time keys for user", "one_time_keys": result}) + return result @defer.inlineCallbacks def add_e2e_one_time_keys(self, user_id, device_id, time_now, new_keys): @@ -146,6 +155,9 @@ class EndToEndKeyWorkerStore(SQLBaseStore): """ def _add_e2e_one_time_keys(txn): + set_tag("user_id", user_id) + set_tag("device_id", device_id) + set_tag("new_keys", new_keys) # We are protected from race between lookup and insertion due to # a unique constraint. If there is a race of two calls to # `add_e2e_one_time_keys` then they'll conflict and we will only @@ -202,6 +214,11 @@ class EndToEndKeyStore(EndToEndKeyWorkerStore, SQLBaseStore): """ def _set_e2e_device_keys_txn(txn): + set_tag("user_id", user_id) + set_tag("device_id", device_id) + set_tag("time_now", time_now) + set_tag("device_keys", device_keys) + old_key_json = self._simple_select_one_onecol_txn( txn, table="e2e_device_keys_json", @@ -215,6 +232,7 @@ class EndToEndKeyStore(EndToEndKeyWorkerStore, SQLBaseStore): new_key_json = encode_canonical_json(device_keys).decode("utf-8") if old_key_json == new_key_json: + log_kv({"Message": "Device key already stored."}) return False self._simple_upsert_txn( @@ -223,7 +241,7 @@ class EndToEndKeyStore(EndToEndKeyWorkerStore, SQLBaseStore): keyvalues={"user_id": user_id, "device_id": device_id}, values={"ts_added_ms": time_now, "key_json": new_key_json}, ) - + log_kv({"message": "Device keys stored."}) return True return self.runInteraction("set_e2e_device_keys", _set_e2e_device_keys_txn) @@ -231,6 +249,7 @@ class EndToEndKeyStore(EndToEndKeyWorkerStore, SQLBaseStore): def claim_e2e_one_time_keys(self, query_list): """Take a list of one time keys out of the database""" + @trace def _claim_e2e_one_time_keys(txn): sql = ( "SELECT key_id, key_json FROM e2e_one_time_keys_json" @@ -252,7 +271,13 @@ class EndToEndKeyStore(EndToEndKeyWorkerStore, SQLBaseStore): " AND key_id = ?" ) for user_id, device_id, algorithm, key_id in delete: + log_kv( + { + "message": "Executing claim e2e_one_time_keys transaction on database." + } + ) txn.execute(sql, (user_id, device_id, algorithm, key_id)) + log_kv({"message": "finished executing and invalidating cache"}) self._invalidate_cache_and_stream( txn, self.count_e2e_one_time_keys, (user_id, device_id) ) @@ -262,6 +287,13 @@ class EndToEndKeyStore(EndToEndKeyWorkerStore, SQLBaseStore): def delete_e2e_keys_by_device(self, user_id, device_id): def delete_e2e_keys_by_device_txn(txn): + log_kv( + { + "message": "Deleting keys for device", + "device_id": device_id, + "user_id": user_id, + } + ) self._simple_delete_txn( txn, table="e2e_device_keys_json", From 3320aaab3a9bba3f5872371aba7053b41af9d0a0 Mon Sep 17 00:00:00 2001 From: Half-Shot Date: Thu, 22 Aug 2019 14:17:57 +0100 Subject: [PATCH 072/110] Add "require_consent" parameter for registration --- synapse/handlers/register.py | 14 ++++++++++++-- synapse/replication/http/register.py | 2 ++ synapse/rest/client/v2_alpha/register.py | 5 ++++- 3 files changed, 18 insertions(+), 3 deletions(-) diff --git a/synapse/handlers/register.py b/synapse/handlers/register.py index 4631fab94e..5c92960d25 100644 --- a/synapse/handlers/register.py +++ b/synapse/handlers/register.py @@ -150,6 +150,7 @@ class RegistrationHandler(BaseHandler): threepid=None, user_type=None, default_display_name=None, + require_consent=True, address=None, bind_emails=[], ): @@ -167,6 +168,7 @@ class RegistrationHandler(BaseHandler): will be set to this. Defaults to 'localpart'. address (str|None): the IP address used to perform the registration. bind_emails (List[str]): list of emails to bind to this account. + require_consent (bool): Should the user be required to give consent. Returns: Deferred[str]: user_id Raises: @@ -211,6 +213,7 @@ class RegistrationHandler(BaseHandler): admin=admin, user_type=user_type, address=address, + require_consent=require_consent, ) if self.hs.config.user_directory_search_all_users: @@ -244,7 +247,7 @@ class RegistrationHandler(BaseHandler): user_id = None attempts += 1 - if not self.hs.config.user_consent_at_registration: + if not self.hs.config.user_consent_at_registration and require_consent: yield self._auto_join_rooms(user_id) else: logger.info( @@ -525,6 +528,7 @@ class RegistrationHandler(BaseHandler): ratelimit=False, ) + @defer.inlineCallbacks def register_with_store( self, user_id, @@ -536,6 +540,7 @@ class RegistrationHandler(BaseHandler): admin=False, user_type=None, address=None, + require_consent=True, ): """Register user in the datastore. @@ -553,7 +558,7 @@ class RegistrationHandler(BaseHandler): user_type (str|None): type of user. One of the values from api.constants.UserTypes, or None for a normal user. address (str|None): the IP address used to perform the registration. - + require_consent (bool): Should the user be required to give consent. Returns: Deferred """ @@ -584,8 +589,12 @@ class RegistrationHandler(BaseHandler): admin=admin, user_type=user_type, address=address, + require_consent=require_consent, ) else: + if require_consent is False: + yield self.store.user_set_consent_version(user_id, "no-consent-required") + return self.store.register_user( user_id=user_id, password_hash=password_hash, @@ -597,6 +606,7 @@ class RegistrationHandler(BaseHandler): user_type=user_type, ) + @defer.inlineCallbacks def register_device(self, user_id, device_id, initial_display_name, is_guest=False): """Register a device for a user and generate an access token. diff --git a/synapse/replication/http/register.py b/synapse/replication/http/register.py index 3341320a87..65702de082 100644 --- a/synapse/replication/http/register.py +++ b/synapse/replication/http/register.py @@ -72,6 +72,7 @@ class ReplicationRegisterServlet(ReplicationEndpoint): "admin": admin, "user_type": user_type, "address": address, + "require_consent": require_consent, } @defer.inlineCallbacks @@ -88,6 +89,7 @@ class ReplicationRegisterServlet(ReplicationEndpoint): admin=content["admin"], user_type=content["user_type"], address=content["address"], + require_consent=content["require_consent"], ) return (200, {}) diff --git a/synapse/rest/client/v2_alpha/register.py b/synapse/rest/client/v2_alpha/register.py index 05ea1459e3..724231f364 100644 --- a/synapse/rest/client/v2_alpha/register.py +++ b/synapse/rest/client/v2_alpha/register.py @@ -525,6 +525,9 @@ class RegisterRestServlet(RestServlet): # downcased one in `username` for the mac calculation user = body["username"].encode("utf-8") + # do not require consent for this user (for example, bots) + require_consent = body.get("require_consent", True) + # str() because otherwise hmac complains that 'unicode' does not # have the buffer interface got_mac = str(body["mac"]) @@ -542,7 +545,7 @@ class RegisterRestServlet(RestServlet): raise SynapseError(403, "HMAC incorrect") user_id = yield self.registration_handler.register_user( - localpart=username, password=password + localpart=username, password=password, require_consent=require_consent, ) result = yield self._create_registration_details(user_id, body) From 27a686e53b8ba3f2e2f102fae73e598c00ec0086 Mon Sep 17 00:00:00 2001 From: Half-Shot Date: Thu, 22 Aug 2019 14:22:04 +0100 Subject: [PATCH 073/110] Do not send consent notices if "no-consent-required" is set --- synapse/server_notices/consent_server_notices.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/synapse/server_notices/consent_server_notices.py b/synapse/server_notices/consent_server_notices.py index 415e9c17d8..8e82ee32b2 100644 --- a/synapse/server_notices/consent_server_notices.py +++ b/synapse/server_notices/consent_server_notices.py @@ -80,6 +80,10 @@ class ConsentServerNotices(object): try: u = yield self._store.get_user_by_id(user_id) + if u["consent_version"] == "no-consent-required": + # user is exempt + return + if u["is_guest"] and not self._send_to_guests: # don't send to guests return From 1c5b8c622248d4ee3b38b01a997eaa8844859beb Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Thu, 22 Aug 2019 14:47:34 +0100 Subject: [PATCH 074/110] Revert "Add "require_consent" parameter for registration" This reverts commit 3320aaab3a9bba3f5872371aba7053b41af9d0a0. --- synapse/handlers/register.py | 14 ++------------ synapse/replication/http/register.py | 2 -- synapse/rest/client/v2_alpha/register.py | 5 +---- 3 files changed, 3 insertions(+), 18 deletions(-) diff --git a/synapse/handlers/register.py b/synapse/handlers/register.py index 5c92960d25..4631fab94e 100644 --- a/synapse/handlers/register.py +++ b/synapse/handlers/register.py @@ -150,7 +150,6 @@ class RegistrationHandler(BaseHandler): threepid=None, user_type=None, default_display_name=None, - require_consent=True, address=None, bind_emails=[], ): @@ -168,7 +167,6 @@ class RegistrationHandler(BaseHandler): will be set to this. Defaults to 'localpart'. address (str|None): the IP address used to perform the registration. bind_emails (List[str]): list of emails to bind to this account. - require_consent (bool): Should the user be required to give consent. Returns: Deferred[str]: user_id Raises: @@ -213,7 +211,6 @@ class RegistrationHandler(BaseHandler): admin=admin, user_type=user_type, address=address, - require_consent=require_consent, ) if self.hs.config.user_directory_search_all_users: @@ -247,7 +244,7 @@ class RegistrationHandler(BaseHandler): user_id = None attempts += 1 - if not self.hs.config.user_consent_at_registration and require_consent: + if not self.hs.config.user_consent_at_registration: yield self._auto_join_rooms(user_id) else: logger.info( @@ -528,7 +525,6 @@ class RegistrationHandler(BaseHandler): ratelimit=False, ) - @defer.inlineCallbacks def register_with_store( self, user_id, @@ -540,7 +536,6 @@ class RegistrationHandler(BaseHandler): admin=False, user_type=None, address=None, - require_consent=True, ): """Register user in the datastore. @@ -558,7 +553,7 @@ class RegistrationHandler(BaseHandler): user_type (str|None): type of user. One of the values from api.constants.UserTypes, or None for a normal user. address (str|None): the IP address used to perform the registration. - require_consent (bool): Should the user be required to give consent. + Returns: Deferred """ @@ -589,12 +584,8 @@ class RegistrationHandler(BaseHandler): admin=admin, user_type=user_type, address=address, - require_consent=require_consent, ) else: - if require_consent is False: - yield self.store.user_set_consent_version(user_id, "no-consent-required") - return self.store.register_user( user_id=user_id, password_hash=password_hash, @@ -606,7 +597,6 @@ class RegistrationHandler(BaseHandler): user_type=user_type, ) - @defer.inlineCallbacks def register_device(self, user_id, device_id, initial_display_name, is_guest=False): """Register a device for a user and generate an access token. diff --git a/synapse/replication/http/register.py b/synapse/replication/http/register.py index 65702de082..3341320a87 100644 --- a/synapse/replication/http/register.py +++ b/synapse/replication/http/register.py @@ -72,7 +72,6 @@ class ReplicationRegisterServlet(ReplicationEndpoint): "admin": admin, "user_type": user_type, "address": address, - "require_consent": require_consent, } @defer.inlineCallbacks @@ -89,7 +88,6 @@ class ReplicationRegisterServlet(ReplicationEndpoint): admin=content["admin"], user_type=content["user_type"], address=content["address"], - require_consent=content["require_consent"], ) return (200, {}) diff --git a/synapse/rest/client/v2_alpha/register.py b/synapse/rest/client/v2_alpha/register.py index 724231f364..05ea1459e3 100644 --- a/synapse/rest/client/v2_alpha/register.py +++ b/synapse/rest/client/v2_alpha/register.py @@ -525,9 +525,6 @@ class RegisterRestServlet(RestServlet): # downcased one in `username` for the mac calculation user = body["username"].encode("utf-8") - # do not require consent for this user (for example, bots) - require_consent = body.get("require_consent", True) - # str() because otherwise hmac complains that 'unicode' does not # have the buffer interface got_mac = str(body["mac"]) @@ -545,7 +542,7 @@ class RegisterRestServlet(RestServlet): raise SynapseError(403, "HMAC incorrect") user_id = yield self.registration_handler.register_user( - localpart=username, password=password, require_consent=require_consent, + localpart=username, password=password ) result = yield self._create_registration_details(user_id, body) From dbd46decad5f47208171b73949714d9dcb1a87b1 Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Thu, 22 Aug 2019 14:47:43 +0100 Subject: [PATCH 075/110] Revert "Do not send consent notices if "no-consent-required" is set" This reverts commit 27a686e53b8ba3f2e2f102fae73e598c00ec0086. --- synapse/server_notices/consent_server_notices.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/synapse/server_notices/consent_server_notices.py b/synapse/server_notices/consent_server_notices.py index 8e82ee32b2..415e9c17d8 100644 --- a/synapse/server_notices/consent_server_notices.py +++ b/synapse/server_notices/consent_server_notices.py @@ -80,10 +80,6 @@ class ConsentServerNotices(object): try: u = yield self._store.get_user_by_id(user_id) - if u["consent_version"] == "no-consent-required": - # user is exempt - return - if u["is_guest"] and not self._send_to_guests: # don't send to guests return From 0bab582fd6f4b42b64ecf09f5d8dbab568172d55 Mon Sep 17 00:00:00 2001 From: Manuel Stahl Date: Tue, 23 Jul 2019 11:55:18 +0200 Subject: [PATCH 076/110] Remove shared secret registration from client/r0/register endpoint This type of registration was probably never used. It only includes the user name in the HMAC but not the password. Shared secret registration is still available via client/r0/admin/register. Signed-off-by: Manuel Stahl --- changelog.d/5877.removal | 1 + synapse/rest/client/v2_alpha/register.py | 57 ++---------------------- 2 files changed, 5 insertions(+), 53 deletions(-) create mode 100644 changelog.d/5877.removal diff --git a/changelog.d/5877.removal b/changelog.d/5877.removal new file mode 100644 index 0000000000..b6d84fb401 --- /dev/null +++ b/changelog.d/5877.removal @@ -0,0 +1 @@ +Remove shared secret registration from client/r0/register endpoint. Contributed by Awesome Technologies Innovationslabor GmbH. diff --git a/synapse/rest/client/v2_alpha/register.py b/synapse/rest/client/v2_alpha/register.py index 05ea1459e3..9510a1e2b0 100644 --- a/synapse/rest/client/v2_alpha/register.py +++ b/synapse/rest/client/v2_alpha/register.py @@ -16,7 +16,6 @@ import hmac import logging -from hashlib import sha1 from six import string_types @@ -239,14 +238,12 @@ class RegisterRestServlet(RestServlet): # we do basic sanity checks here because the auth layer will store these # in sessions. Pull out the username/password provided to us. - desired_password = None if "password" in body: if ( not isinstance(body["password"], string_types) or len(body["password"]) > 512 ): raise SynapseError(400, "Invalid password") - desired_password = body["password"] desired_username = None if "username" in body: @@ -261,8 +258,8 @@ class RegisterRestServlet(RestServlet): if self.auth.has_access_token(request): appservice = yield self.auth.get_appservice_by_req(request) - # fork off as soon as possible for ASes and shared secret auth which - # have completely different registration flows to normal users + # fork off as soon as possible for ASes which have completely + # different registration flows to normal users # == Application Service Registration == if appservice: @@ -285,8 +282,8 @@ class RegisterRestServlet(RestServlet): return (200, result) # we throw for non 200 responses return - # for either shared secret or regular registration, downcase the - # provided username before attempting to register it. This should mean + # for regular registration, downcase the provided username before + # attempting to register it. This should mean # that people who try to register with upper-case in their usernames # don't get a nasty surprise. (Note that we treat username # case-insenstively in login, so they are free to carry on imagining @@ -294,16 +291,6 @@ class RegisterRestServlet(RestServlet): if desired_username is not None: desired_username = desired_username.lower() - # == Shared Secret Registration == (e.g. create new user scripts) - if "mac" in body: - # FIXME: Should we really be determining if this is shared secret - # auth based purely on the 'mac' key? - result = yield self._do_shared_secret_registration( - desired_username, desired_password, body - ) - return (200, result) # we throw for non 200 responses - return - # == Normal User Registration == (everyone else) if not self.hs.config.enable_registration: raise SynapseError(403, "Registration has been disabled") @@ -512,42 +499,6 @@ class RegisterRestServlet(RestServlet): ) return (yield self._create_registration_details(user_id, body)) - @defer.inlineCallbacks - def _do_shared_secret_registration(self, username, password, body): - if not self.hs.config.registration_shared_secret: - raise SynapseError(400, "Shared secret registration is not enabled") - if not username: - raise SynapseError( - 400, "username must be specified", errcode=Codes.BAD_JSON - ) - - # use the username from the original request rather than the - # downcased one in `username` for the mac calculation - user = body["username"].encode("utf-8") - - # str() because otherwise hmac complains that 'unicode' does not - # have the buffer interface - got_mac = str(body["mac"]) - - # FIXME this is different to the /v1/register endpoint, which - # includes the password and admin flag in the hashed text. Why are - # these different? - want_mac = hmac.new( - key=self.hs.config.registration_shared_secret.encode(), - msg=user, - digestmod=sha1, - ).hexdigest() - - if not compare_digest(want_mac, got_mac): - raise SynapseError(403, "HMAC incorrect") - - user_id = yield self.registration_handler.register_user( - localpart=username, password=password - ) - - result = yield self._create_registration_details(user_id, body) - return result - @defer.inlineCallbacks def _create_registration_details(self, user_id, params): """Complete registration of newly-registered user From 812ed6b0d5b2c682d8032fc83e3041a9da93f670 Mon Sep 17 00:00:00 2001 From: Jorik Schellekens Date: Thu, 22 Aug 2019 18:08:07 +0100 Subject: [PATCH 077/110] Opentracing across workers (#5771) Propagate opentracing contexts across workers Also includes some Convenience modifications to opentracing for servlets, notably: - Add boolean to skip the whitelisting check on inject extract methods. - useful when injecting into carriers locally. Otherwise we'd always have to include our own servername and whitelist our servername - start_active_span_from_request instead of header - Add boolean to decide whether to extract context from a request to a servlet --- changelog.d/5771.feature | 1 + synapse/federation/transport/server.py | 43 +++++--- synapse/http/servlet.py | 2 +- synapse/logging/opentracing.py | 144 ++++++++++++++----------- synapse/replication/http/_base.py | 16 ++- 5 files changed, 123 insertions(+), 83 deletions(-) create mode 100644 changelog.d/5771.feature diff --git a/changelog.d/5771.feature b/changelog.d/5771.feature new file mode 100644 index 0000000000..f2f4de1fdd --- /dev/null +++ b/changelog.d/5771.feature @@ -0,0 +1 @@ +Make Opentracing work in worker mode. diff --git a/synapse/federation/transport/server.py b/synapse/federation/transport/server.py index a17148fc3c..dc53b4b170 100644 --- a/synapse/federation/transport/server.py +++ b/synapse/federation/transport/server.py @@ -38,7 +38,12 @@ from synapse.http.servlet import ( parse_string_from_args, ) from synapse.logging.context import run_in_background -from synapse.logging.opentracing import start_active_span_from_context, tags +from synapse.logging.opentracing import ( + start_active_span, + start_active_span_from_request, + tags, + whitelisted_homeserver, +) from synapse.types import ThirdPartyInstanceID, get_domain_from_id from synapse.util.ratelimitutils import FederationRateLimiter from synapse.util.versionstring import get_version_string @@ -288,20 +293,28 @@ class BaseFederationServlet(object): logger.warn("authenticate_request failed: %s", e) raise - # Start an opentracing span - with start_active_span_from_context( - request.requestHeaders, - "incoming-federation-request", - tags={ - "request_id": request.get_request_id(), - tags.SPAN_KIND: tags.SPAN_KIND_RPC_SERVER, - tags.HTTP_METHOD: request.get_method(), - tags.HTTP_URL: request.get_redacted_uri(), - tags.PEER_HOST_IPV6: request.getClientIP(), - "authenticated_entity": origin, - "servlet_name": request.request_metrics.name, - }, - ): + request_tags = { + "request_id": request.get_request_id(), + tags.SPAN_KIND: tags.SPAN_KIND_RPC_SERVER, + tags.HTTP_METHOD: request.get_method(), + tags.HTTP_URL: request.get_redacted_uri(), + tags.PEER_HOST_IPV6: request.getClientIP(), + "authenticated_entity": origin, + "servlet_name": request.request_metrics.name, + } + + # Only accept the span context if the origin is authenticated + # and whitelisted + if origin and whitelisted_homeserver(origin): + scope = start_active_span_from_request( + request, "incoming-federation-request", tags=request_tags + ) + else: + scope = start_active_span( + "incoming-federation-request", tags=request_tags + ) + + with scope: if origin: with ratelimiter.ratelimit(origin) as d: await d diff --git a/synapse/http/servlet.py b/synapse/http/servlet.py index fd07bf7b8e..c186b31f59 100644 --- a/synapse/http/servlet.py +++ b/synapse/http/servlet.py @@ -300,7 +300,7 @@ class RestServlet(object): http_server.register_paths( method, patterns, - trace_servlet(servlet_classname, method_handler), + trace_servlet(servlet_classname)(method_handler), servlet_classname, ) diff --git a/synapse/logging/opentracing.py b/synapse/logging/opentracing.py index 6b706e1892..4abea4474b 100644 --- a/synapse/logging/opentracing.py +++ b/synapse/logging/opentracing.py @@ -174,10 +174,48 @@ from twisted.internet import defer from synapse.config import ConfigError +# Helper class + + +class _DummyTagNames(object): + """wrapper of opentracings tags. We need to have them if we + want to reference them without opentracing around. Clearly they + should never actually show up in a trace. `set_tags` overwrites + these with the correct ones.""" + + INVALID_TAG = "invalid-tag" + COMPONENT = INVALID_TAG + DATABASE_INSTANCE = INVALID_TAG + DATABASE_STATEMENT = INVALID_TAG + DATABASE_TYPE = INVALID_TAG + DATABASE_USER = INVALID_TAG + ERROR = INVALID_TAG + HTTP_METHOD = INVALID_TAG + HTTP_STATUS_CODE = INVALID_TAG + HTTP_URL = INVALID_TAG + MESSAGE_BUS_DESTINATION = INVALID_TAG + PEER_ADDRESS = INVALID_TAG + PEER_HOSTNAME = INVALID_TAG + PEER_HOST_IPV4 = INVALID_TAG + PEER_HOST_IPV6 = INVALID_TAG + PEER_PORT = INVALID_TAG + PEER_SERVICE = INVALID_TAG + SAMPLING_PRIORITY = INVALID_TAG + SERVICE = INVALID_TAG + SPAN_KIND = INVALID_TAG + SPAN_KIND_CONSUMER = INVALID_TAG + SPAN_KIND_PRODUCER = INVALID_TAG + SPAN_KIND_RPC_CLIENT = INVALID_TAG + SPAN_KIND_RPC_SERVER = INVALID_TAG + + try: import opentracing + + tags = opentracing.tags except ImportError: opentracing = None + tags = _DummyTagNames try: from jaeger_client import Config as JaegerConfig from synapse.logging.scopecontextmanager import LogContextScopeManager @@ -252,10 +290,6 @@ def init_tracer(config): scope_manager=LogContextScopeManager(config), ).initialize_tracer() - # Set up tags to be opentracing's tags - global tags - tags = opentracing.tags - # Whitelisting @@ -334,8 +368,8 @@ def start_active_span_follows_from(operation_name, contexts): return scope -def start_active_span_from_context( - headers, +def start_active_span_from_request( + request, operation_name, references=None, tags=None, @@ -344,9 +378,9 @@ def start_active_span_from_context( finish_on_close=True, ): """ - Extracts a span context from Twisted Headers. + Extracts a span context from a Twisted Request. args: - headers (twisted.web.http_headers.Headers) + headers (twisted.web.http.Request) For the other args see opentracing.tracer @@ -360,7 +394,9 @@ def start_active_span_from_context( if opentracing is None: return _noop_context_manager() - header_dict = {k.decode(): v[0].decode() for k, v in headers.getAllRawHeaders()} + header_dict = { + k.decode(): v[0].decode() for k, v in request.requestHeaders.getAllRawHeaders() + } context = opentracing.tracer.extract(opentracing.Format.HTTP_HEADERS, header_dict) return opentracing.tracer.start_active_span( @@ -448,7 +484,7 @@ def set_operation_name(operation_name): @only_if_tracing -def inject_active_span_twisted_headers(headers, destination): +def inject_active_span_twisted_headers(headers, destination, check_destination=True): """ Injects a span context into twisted headers in-place @@ -467,7 +503,7 @@ def inject_active_span_twisted_headers(headers, destination): https://github.com/jaegertracing/jaeger-client-python/blob/master/jaeger_client/constants.py """ - if not whitelisted_homeserver(destination): + if check_destination and not whitelisted_homeserver(destination): return span = opentracing.tracer.active_span @@ -479,7 +515,7 @@ def inject_active_span_twisted_headers(headers, destination): @only_if_tracing -def inject_active_span_byte_dict(headers, destination): +def inject_active_span_byte_dict(headers, destination, check_destination=True): """ Injects a span context into a dict where the headers are encoded as byte strings @@ -511,7 +547,7 @@ def inject_active_span_byte_dict(headers, destination): @only_if_tracing -def inject_active_span_text_map(carrier, destination=None): +def inject_active_span_text_map(carrier, destination, check_destination=True): """ Injects a span context into a dict @@ -532,7 +568,7 @@ def inject_active_span_text_map(carrier, destination=None): https://github.com/jaegertracing/jaeger-client-python/blob/master/jaeger_client/constants.py """ - if destination and not whitelisted_homeserver(destination): + if check_destination and not whitelisted_homeserver(destination): return opentracing.tracer.inject( @@ -689,65 +725,43 @@ def tag_args(func): return _tag_args_inner -def trace_servlet(servlet_name, func): +def trace_servlet(servlet_name, extract_context=False): """Decorator which traces a serlet. It starts a span with some servlet specific - tags such as the servlet_name and request information""" - if not opentracing: - return func + tags such as the servlet_name and request information - @wraps(func) - @defer.inlineCallbacks - def _trace_servlet_inner(request, *args, **kwargs): - with start_active_span( - "incoming-client-request", - tags={ + Args: + servlet_name (str): The name to be used for the span's operation_name + extract_context (bool): Whether to attempt to extract the opentracing + context from the request the servlet is handling. + + """ + + def _trace_servlet_inner_1(func): + if not opentracing: + return func + + @wraps(func) + @defer.inlineCallbacks + def _trace_servlet_inner(request, *args, **kwargs): + request_tags = { "request_id": request.get_request_id(), tags.SPAN_KIND: tags.SPAN_KIND_RPC_SERVER, tags.HTTP_METHOD: request.get_method(), tags.HTTP_URL: request.get_redacted_uri(), tags.PEER_HOST_IPV6: request.getClientIP(), - "servlet_name": servlet_name, - }, - ): - result = yield defer.maybeDeferred(func, request, *args, **kwargs) - return result + } - return _trace_servlet_inner + if extract_context: + scope = start_active_span_from_request( + request, servlet_name, tags=request_tags + ) + else: + scope = start_active_span(servlet_name, tags=request_tags) + with scope: + result = yield defer.maybeDeferred(func, request, *args, **kwargs) + return result -# Helper class + return _trace_servlet_inner - -class _DummyTagNames(object): - """wrapper of opentracings tags. We need to have them if we - want to reference them without opentracing around. Clearly they - should never actually show up in a trace. `set_tags` overwrites - these with the correct ones.""" - - INVALID_TAG = "invalid-tag" - COMPONENT = INVALID_TAG - DATABASE_INSTANCE = INVALID_TAG - DATABASE_STATEMENT = INVALID_TAG - DATABASE_TYPE = INVALID_TAG - DATABASE_USER = INVALID_TAG - ERROR = INVALID_TAG - HTTP_METHOD = INVALID_TAG - HTTP_STATUS_CODE = INVALID_TAG - HTTP_URL = INVALID_TAG - MESSAGE_BUS_DESTINATION = INVALID_TAG - PEER_ADDRESS = INVALID_TAG - PEER_HOSTNAME = INVALID_TAG - PEER_HOST_IPV4 = INVALID_TAG - PEER_HOST_IPV6 = INVALID_TAG - PEER_PORT = INVALID_TAG - PEER_SERVICE = INVALID_TAG - SAMPLING_PRIORITY = INVALID_TAG - SERVICE = INVALID_TAG - SPAN_KIND = INVALID_TAG - SPAN_KIND_CONSUMER = INVALID_TAG - SPAN_KIND_PRODUCER = INVALID_TAG - SPAN_KIND_RPC_CLIENT = INVALID_TAG - SPAN_KIND_RPC_SERVER = INVALID_TAG - - -tags = _DummyTagNames + return _trace_servlet_inner_1 diff --git a/synapse/replication/http/_base.py b/synapse/replication/http/_base.py index 2e0594e581..c4be9273f6 100644 --- a/synapse/replication/http/_base.py +++ b/synapse/replication/http/_base.py @@ -22,6 +22,7 @@ from six.moves import urllib from twisted.internet import defer +import synapse.logging.opentracing as opentracing from synapse.api.errors import ( CodeMessageException, HttpResponseException, @@ -165,8 +166,12 @@ class ReplicationEndpoint(object): # have a good idea that the request has either succeeded or failed on # the master, and so whether we should clean up or not. while True: + headers = {} + opentracing.inject_active_span_byte_dict( + headers, None, check_destination=False + ) try: - result = yield request_func(uri, data) + result = yield request_func(uri, data, headers=headers) break except CodeMessageException as e: if e.code != 504 or not cls.RETRY_ON_TIMEOUT: @@ -205,7 +210,14 @@ class ReplicationEndpoint(object): args = "/".join("(?P<%s>[^/]+)" % (arg,) for arg in url_args) pattern = re.compile("^/_synapse/replication/%s/%s$" % (self.NAME, args)) - http_server.register_paths(method, [pattern], handler, self.__class__.__name__) + http_server.register_paths( + method, + [pattern], + opentracing.trace_servlet(self.__class__.__name__, extract_context=True)( + handler + ), + self.__class__.__name__, + ) def _cached_handler(self, request, txn_id, **kwargs): """Called on new incoming requests when caching is enabled. Checks From 8767b63a821eb8612e2ab830534fd6f40eb1aaaa Mon Sep 17 00:00:00 2001 From: Jorik Schellekens Date: Thu, 22 Aug 2019 18:21:10 +0100 Subject: [PATCH 078/110] Propagate opentracing contexts through EDUs (#5852) Propagate opentracing contexts through EDUs Co-Authored-By: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> --- changelog.d/5852.feature | 1 + docs/opentracing.rst | 27 ++- synapse/federation/federation_server.py | 15 +- .../federation/sender/transaction_manager.py | 170 ++++++++++-------- synapse/federation/units.py | 3 + synapse/handlers/devicemessage.py | 27 ++- synapse/logging/opentracing.py | 26 +++ synapse/storage/devices.py | 39 +++- .../delta/56/add_spans_to_device_lists.sql | 20 +++ 9 files changed, 234 insertions(+), 94 deletions(-) create mode 100644 changelog.d/5852.feature create mode 100644 synapse/storage/schema/delta/56/add_spans_to_device_lists.sql diff --git a/changelog.d/5852.feature b/changelog.d/5852.feature new file mode 100644 index 0000000000..4a0fc6c542 --- /dev/null +++ b/changelog.d/5852.feature @@ -0,0 +1 @@ +Pass opentracing contexts between servers when transmitting EDUs. diff --git a/docs/opentracing.rst b/docs/opentracing.rst index b91a2208a8..6e98ab56ba 100644 --- a/docs/opentracing.rst +++ b/docs/opentracing.rst @@ -32,7 +32,7 @@ It is up to the remote server to decide what it does with the spans it creates. This is called the sampling policy and it can be configured through Jaeger's settings. -For OpenTracing concepts see +For OpenTracing concepts see https://opentracing.io/docs/overview/what-is-tracing/. For more information about Jaeger's implementation see @@ -79,7 +79,7 @@ Homeserver whitelisting The homeserver whitelist is configured using regular expressions. A list of regular expressions can be given and their union will be compared when propagating any -spans contexts to another homeserver. +spans contexts to another homeserver. Though it's mostly safe to send and receive span contexts to and from untrusted users since span contexts are usually opaque ids it can lead to @@ -92,6 +92,29 @@ two problems, namely: but that doesn't prevent another server sending you baggage which will be logged to OpenTracing's logs. +========== +EDU FORMAT +========== + +EDUs can contain tracing data in their content. This is not specced but +it could be of interest for other homeservers. + +EDU format (if you're using jaeger): + +.. code-block:: json + + { + "edu_type": "type", + "content": { + "org.matrix.opentracing_context": { + "uber-trace-id": "fe57cf3e65083289" + } + } + } + +Though you don't have to use jaeger you must inject the span context into +`org.matrix.opentracing_context` using the opentracing `Format.TEXT_MAP` inject method. + ================== Configuring Jaeger ================== diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py index 9286ca3202..05fd49f3c1 100644 --- a/synapse/federation/federation_server.py +++ b/synapse/federation/federation_server.py @@ -43,7 +43,7 @@ from synapse.federation.persistence import TransactionActions from synapse.federation.units import Edu, Transaction from synapse.http.endpoint import parse_server_name from synapse.logging.context import nested_logging_context -from synapse.logging.opentracing import log_kv, trace +from synapse.logging.opentracing import log_kv, start_active_span_from_edu, trace from synapse.logging.utils import log_function from synapse.replication.http.federation import ( ReplicationFederationSendEduRestServlet, @@ -811,12 +811,13 @@ class FederationHandlerRegistry(object): if not handler: logger.warn("No handler registered for EDU type %s", edu_type) - try: - yield handler(origin, content) - except SynapseError as e: - logger.info("Failed to handle edu %r: %r", edu_type, e) - except Exception: - logger.exception("Failed to handle edu %r", edu_type) + with start_active_span_from_edu(content, "handle_edu"): + try: + yield handler(origin, content) + except SynapseError as e: + logger.info("Failed to handle edu %r: %r", edu_type, e) + except Exception: + logger.exception("Failed to handle edu %r", edu_type) def on_query(self, query_type, args): handler = self.query_handlers.get(query_type) diff --git a/synapse/federation/sender/transaction_manager.py b/synapse/federation/sender/transaction_manager.py index 52706302f2..62ca6a3e87 100644 --- a/synapse/federation/sender/transaction_manager.py +++ b/synapse/federation/sender/transaction_manager.py @@ -14,11 +14,19 @@ # limitations under the License. import logging +from canonicaljson import json + from twisted.internet import defer from synapse.api.errors import HttpResponseException from synapse.federation.persistence import TransactionActions from synapse.federation.units import Transaction +from synapse.logging.opentracing import ( + extract_text_map, + set_tag, + start_active_span_follows_from, + tags, +) from synapse.util.metrics import measure_func logger = logging.getLogger(__name__) @@ -44,93 +52,109 @@ class TransactionManager(object): @defer.inlineCallbacks def send_new_transaction(self, destination, pending_pdus, pending_edus): - # Sort based on the order field - pending_pdus.sort(key=lambda t: t[1]) - pdus = [x[0] for x in pending_pdus] - edus = pending_edus + # Make a transaction-sending opentracing span. This span follows on from + # all the edus in that transaction. This needs to be done since there is + # no active span here, so if the edus were not received by the remote the + # span would have no causality and it would be forgotten. + # The span_contexts is a generator so that it won't be evaluated if + # opentracing is disabled. (Yay speed!) - success = True - - logger.debug("TX [%s] _attempt_new_transaction", destination) - - txn_id = str(self._next_txn_id) - - logger.debug( - "TX [%s] {%s} Attempting new transaction" " (pdus: %d, edus: %d)", - destination, - txn_id, - len(pdus), - len(edus), + span_contexts = ( + extract_text_map(json.loads(edu.get_context())) for edu in pending_edus ) - transaction = Transaction.create_new( - origin_server_ts=int(self.clock.time_msec()), - transaction_id=txn_id, - origin=self._server_name, - destination=destination, - pdus=pdus, - edus=edus, - ) + with start_active_span_follows_from("send_transaction", span_contexts): - self._next_txn_id += 1 + # Sort based on the order field + pending_pdus.sort(key=lambda t: t[1]) + pdus = [x[0] for x in pending_pdus] + edus = pending_edus - logger.info( - "TX [%s] {%s} Sending transaction [%s]," " (PDUs: %d, EDUs: %d)", - destination, - txn_id, - transaction.transaction_id, - len(pdus), - len(edus), - ) + success = True - # Actually send the transaction + logger.debug("TX [%s] _attempt_new_transaction", destination) - # FIXME (erikj): This is a bit of a hack to make the Pdu age - # keys work - def json_data_cb(): - data = transaction.get_dict() - now = int(self.clock.time_msec()) - if "pdus" in data: - for p in data["pdus"]: - if "age_ts" in p: - unsigned = p.setdefault("unsigned", {}) - unsigned["age"] = now - int(p["age_ts"]) - del p["age_ts"] - return data + txn_id = str(self._next_txn_id) - try: - response = yield self._transport_layer.send_transaction( - transaction, json_data_cb + logger.debug( + "TX [%s] {%s} Attempting new transaction" " (pdus: %d, edus: %d)", + destination, + txn_id, + len(pdus), + len(edus), ) - code = 200 - except HttpResponseException as e: - code = e.code - response = e.response - if e.code in (401, 404, 429) or 500 <= e.code: - logger.info("TX [%s] {%s} got %d response", destination, txn_id, code) - raise e + transaction = Transaction.create_new( + origin_server_ts=int(self.clock.time_msec()), + transaction_id=txn_id, + origin=self._server_name, + destination=destination, + pdus=pdus, + edus=edus, + ) - logger.info("TX [%s] {%s} got %d response", destination, txn_id, code) + self._next_txn_id += 1 - if code == 200: - for e_id, r in response.get("pdus", {}).items(): - if "error" in r: + logger.info( + "TX [%s] {%s} Sending transaction [%s]," " (PDUs: %d, EDUs: %d)", + destination, + txn_id, + transaction.transaction_id, + len(pdus), + len(edus), + ) + + # Actually send the transaction + + # FIXME (erikj): This is a bit of a hack to make the Pdu age + # keys work + def json_data_cb(): + data = transaction.get_dict() + now = int(self.clock.time_msec()) + if "pdus" in data: + for p in data["pdus"]: + if "age_ts" in p: + unsigned = p.setdefault("unsigned", {}) + unsigned["age"] = now - int(p["age_ts"]) + del p["age_ts"] + return data + + try: + response = yield self._transport_layer.send_transaction( + transaction, json_data_cb + ) + code = 200 + except HttpResponseException as e: + code = e.code + response = e.response + + if e.code in (401, 404, 429) or 500 <= e.code: + logger.info( + "TX [%s] {%s} got %d response", destination, txn_id, code + ) + raise e + + logger.info("TX [%s] {%s} got %d response", destination, txn_id, code) + + if code == 200: + for e_id, r in response.get("pdus", {}).items(): + if "error" in r: + logger.warn( + "TX [%s] {%s} Remote returned error for %s: %s", + destination, + txn_id, + e_id, + r, + ) + else: + for p in pdus: logger.warn( - "TX [%s] {%s} Remote returned error for %s: %s", + "TX [%s] {%s} Failed to send event %s", destination, txn_id, - e_id, - r, + p.event_id, ) - else: - for p in pdus: - logger.warn( - "TX [%s] {%s} Failed to send event %s", - destination, - txn_id, - p.event_id, - ) - success = False + success = False - return success + set_tag(tags.ERROR, not success) + return success diff --git a/synapse/federation/units.py b/synapse/federation/units.py index 14aad8f09d..aa84621206 100644 --- a/synapse/federation/units.py +++ b/synapse/federation/units.py @@ -38,6 +38,9 @@ class Edu(JsonEncodedObject): internal_keys = ["origin", "destination"] + def get_context(self): + return getattr(self, "content", {}).get("org.matrix.opentracing_context", "{}") + class Transaction(JsonEncodedObject): """ A transaction is a list of Pdus and Edus to be sent to a remote home diff --git a/synapse/handlers/devicemessage.py b/synapse/handlers/devicemessage.py index e1ebb6346c..c7d56779b8 100644 --- a/synapse/handlers/devicemessage.py +++ b/synapse/handlers/devicemessage.py @@ -15,9 +15,17 @@ import logging +from canonicaljson import json + from twisted.internet import defer from synapse.api.errors import SynapseError +from synapse.logging.opentracing import ( + get_active_span_text_map, + set_tag, + start_active_span, + whitelisted_homeserver, +) from synapse.types import UserID, get_domain_from_id from synapse.util.stringutils import random_string @@ -100,14 +108,21 @@ class DeviceMessageHandler(object): message_id = random_string(16) + context = get_active_span_text_map() + remote_edu_contents = {} for destination, messages in remote_messages.items(): - remote_edu_contents[destination] = { - "messages": messages, - "sender": sender_user_id, - "type": message_type, - "message_id": message_id, - } + with start_active_span("to_device_for_user"): + set_tag("destination", destination) + remote_edu_contents[destination] = { + "messages": messages, + "sender": sender_user_id, + "type": message_type, + "message_id": message_id, + "org.matrix.opentracing_context": json.dumps(context) + if whitelisted_homeserver(destination) + else None, + } stream_id = yield self.store.add_messages_to_device_inbox( local_messages, remote_edu_contents diff --git a/synapse/logging/opentracing.py b/synapse/logging/opentracing.py index 4abea4474b..dd296027a1 100644 --- a/synapse/logging/opentracing.py +++ b/synapse/logging/opentracing.py @@ -149,6 +149,9 @@ unchartered waters will require the enforcement of the whitelist. ``logging/opentracing.py`` has a ``whitelisted_homeserver`` method which takes in a destination and compares it to the whitelist. +Most injection methods take a 'destination' arg. The context will only be injected +if the destination matches the whitelist or the destination is None. + ======= Gotchas ======= @@ -576,6 +579,29 @@ def inject_active_span_text_map(carrier, destination, check_destination=True): ) +def get_active_span_text_map(destination=None): + """ + Gets a span context as a dict. This can be used instead of manually + injecting a span into an empty carrier. + + Args: + destination (str): the name of the remote server. + + Returns: + dict: the active span's context if opentracing is enabled, otherwise empty. + """ + + if not opentracing or (destination and not whitelisted_homeserver(destination)): + return {} + + carrier = {} + opentracing.tracer.inject( + opentracing.tracer.active_span, opentracing.Format.TEXT_MAP, carrier + ) + + return carrier + + def active_span_context_as_string(): """ Returns: diff --git a/synapse/storage/devices.py b/synapse/storage/devices.py index 8f72d92895..e11881161d 100644 --- a/synapse/storage/devices.py +++ b/synapse/storage/devices.py @@ -21,6 +21,11 @@ from canonicaljson import json from twisted.internet import defer from synapse.api.errors import StoreError +from synapse.logging.opentracing import ( + get_active_span_text_map, + trace, + whitelisted_homeserver, +) from synapse.metrics.background_process_metrics import run_as_background_process from synapse.storage._base import Cache, SQLBaseStore, db_to_json from synapse.storage.background_updates import BackgroundUpdateStore @@ -73,6 +78,7 @@ class DeviceWorkerStore(SQLBaseStore): return {d["device_id"]: d for d in devices} + @trace @defer.inlineCallbacks def get_devices_by_remote(self, destination, from_stream_id, limit): """Get stream of updates to send to remote servers @@ -127,8 +133,15 @@ class DeviceWorkerStore(SQLBaseStore): # (user_id, device_id) entries into a map, with the value being # the max stream_id across each set of duplicate entries # - # maps (user_id, device_id) -> stream_id + # maps (user_id, device_id) -> (stream_id, opentracing_context) # as long as their stream_id does not match that of the last row + # + # opentracing_context contains the opentracing metadata for the request + # that created the poke + # + # The most recent request's opentracing_context is used as the + # context which created the Edu. + query_map = {} for update in updates: if stream_id_cutoff is not None and update[2] >= stream_id_cutoff: @@ -136,7 +149,14 @@ class DeviceWorkerStore(SQLBaseStore): break key = (update[0], update[1]) - query_map[key] = max(query_map.get(key, 0), update[2]) + + update_context = update[3] + update_stream_id = update[2] + + previous_update_stream_id, _ = query_map.get(key, (0, None)) + + if update_stream_id > previous_update_stream_id: + query_map[key] = (update_stream_id, update_context) # If we didn't find any updates with a stream_id lower than the cutoff, it # means that there are more than limit updates all of which have the same @@ -171,7 +191,7 @@ class DeviceWorkerStore(SQLBaseStore): List: List of device updates """ sql = """ - SELECT user_id, device_id, stream_id FROM device_lists_outbound_pokes + SELECT user_id, device_id, stream_id, opentracing_context FROM device_lists_outbound_pokes WHERE destination = ? AND ? < stream_id AND stream_id <= ? AND sent = ? ORDER BY stream_id LIMIT ? @@ -187,8 +207,9 @@ class DeviceWorkerStore(SQLBaseStore): Args: destination (str): The host the device updates are intended for from_stream_id (int): The minimum stream_id to filter updates by, exclusive - query_map (Dict[(str, str): int]): Dictionary mapping - user_id/device_id to update stream_id + query_map (Dict[(str, str): (int, str|None)]): Dictionary mapping + user_id/device_id to update stream_id and the relevent json-encoded + opentracing context Returns: List[Dict]: List of objects representing an device update EDU @@ -210,12 +231,13 @@ class DeviceWorkerStore(SQLBaseStore): destination, user_id, from_stream_id ) for device_id, device in iteritems(user_devices): - stream_id = query_map[(user_id, device_id)] + stream_id, opentracing_context = query_map[(user_id, device_id)] result = { "user_id": user_id, "device_id": device_id, "prev_id": [prev_id] if prev_id else [], "stream_id": stream_id, + "org.matrix.opentracing_context": opentracing_context, } prev_id = stream_id @@ -814,6 +836,8 @@ class DeviceStore(DeviceWorkerStore, BackgroundUpdateStore): ], ) + context = get_active_span_text_map() + self._simple_insert_many_txn( txn, table="device_lists_outbound_pokes", @@ -825,6 +849,9 @@ class DeviceStore(DeviceWorkerStore, BackgroundUpdateStore): "device_id": device_id, "sent": False, "ts": now, + "opentracing_context": json.dumps(context) + if whitelisted_homeserver(destination) + else None, } for destination in hosts for device_id in device_ids diff --git a/synapse/storage/schema/delta/56/add_spans_to_device_lists.sql b/synapse/storage/schema/delta/56/add_spans_to_device_lists.sql new file mode 100644 index 0000000000..41807eb1e7 --- /dev/null +++ b/synapse/storage/schema/delta/56/add_spans_to_device_lists.sql @@ -0,0 +1,20 @@ +/* Copyright 2019 The Matrix.org Foundation C.I.C + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * Opentracing context data for inclusion in the device_list_update EDUs, as a + * json-encoded dictionary. NULL if opentracing is disabled (or not enabled for this destination). + */ +ALTER TABLE device_lists_outbound_pokes ADD opentracing_context TEXT; From 886eceba3e134325e6943aa63eda0a70a7aca911 Mon Sep 17 00:00:00 2001 From: Half-Shot Date: Fri, 23 Aug 2019 09:14:52 +0100 Subject: [PATCH 079/110] Return user_type in get_user_by_id --- synapse/handlers/message.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index a5e23c4caf..97a89fe882 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -24,7 +24,7 @@ from twisted.internet import defer from twisted.internet.defer import succeed from synapse import event_auth -from synapse.api.constants import EventTypes, Membership, RelationTypes +from synapse.api.constants import EventTypes, Membership, RelationTypes, UserTypes from synapse.api.errors import ( AuthError, Codes, @@ -469,6 +469,9 @@ class EventCreationHandler(object): u = yield self.store.get_user_by_id(user_id) assert u is not None + if u["user_type"] == UserTypes.SUPPORT: + # support users are not required to consent + return if u["appservice_id"] is not None: # users registered by an appservice are exempt return From ae38e0569fdb592875b45d4b37f200af5f6a86fc Mon Sep 17 00:00:00 2001 From: Half-Shot Date: Fri, 23 Aug 2019 09:15:10 +0100 Subject: [PATCH 080/110] Ignore consent for support users --- synapse/storage/registration.py | 1 + 1 file changed, 1 insertion(+) diff --git a/synapse/storage/registration.py b/synapse/storage/registration.py index 55e4e84d71..938fd00717 100644 --- a/synapse/storage/registration.py +++ b/synapse/storage/registration.py @@ -56,6 +56,7 @@ class RegistrationWorkerStore(SQLBaseStore): "consent_server_notice_sent", "appservice_id", "creation_ts", + "user_type", ], allow_none=True, desc="get_user_by_id", From 80793e813c7c4024ce28ab9d56d00fd4b4812800 Mon Sep 17 00:00:00 2001 From: Half-Shot Date: Fri, 23 Aug 2019 09:20:31 +0100 Subject: [PATCH 081/110] newsfile 5902 --- changelog.d/5902.feature | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/5902.feature diff --git a/changelog.d/5902.feature b/changelog.d/5902.feature new file mode 100644 index 0000000000..c1c5c97b18 --- /dev/null +++ b/changelog.d/5902.feature @@ -0,0 +1 @@ +Support users are no longer required to consent. \ No newline at end of file From 0fb5189072f61416e7f616aa9d90d8c981e6b8b3 Mon Sep 17 00:00:00 2001 From: Half-Shot Date: Fri, 23 Aug 2019 09:25:35 +0100 Subject: [PATCH 082/110] Fix registration test --- tests/storage/test_registration.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/storage/test_registration.py b/tests/storage/test_registration.py index 0253c4ac05..4578cc3b60 100644 --- a/tests/storage/test_registration.py +++ b/tests/storage/test_registration.py @@ -49,6 +49,7 @@ class RegistrationStoreTestCase(unittest.TestCase): "consent_server_notice_sent": None, "appservice_id": None, "creation_ts": 1000, + "user_type": None, }, (yield self.store.get_user_by_id(self.user_id)), ) From d9b8cf81be1a1132d29e89e3b7d2451886783163 Mon Sep 17 00:00:00 2001 From: Half-Shot Date: Fri, 23 Aug 2019 09:52:09 +0100 Subject: [PATCH 083/110] Add bot type --- synapse/api/constants.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/synapse/api/constants.py b/synapse/api/constants.py index 3ffde0d7fc..f29bce560c 100644 --- a/synapse/api/constants.py +++ b/synapse/api/constants.py @@ -122,7 +122,8 @@ class UserTypes(object): """ SUPPORT = "support" - ALL_USER_TYPES = (SUPPORT,) + BOT = "bot" + ALL_USER_TYPES = (SUPPORT, BOT) class RelationTypes(object): From 971c980c6ee4a396a94987c85e5b8db01ceed0d0 Mon Sep 17 00:00:00 2001 From: Half-Shot Date: Fri, 23 Aug 2019 09:53:48 +0100 Subject: [PATCH 084/110] Add changelog --- changelog.d/5903.feature | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/5903.feature diff --git a/changelog.d/5903.feature b/changelog.d/5903.feature new file mode 100644 index 0000000000..fc60d02107 --- /dev/null +++ b/changelog.d/5903.feature @@ -0,0 +1 @@ +Add bot user type. From 9ba32f6573a47368c1ac63ec769c20f73bcc2cc5 Mon Sep 17 00:00:00 2001 From: Half-Shot Date: Fri, 23 Aug 2019 09:56:31 +0100 Subject: [PATCH 085/110] Exempt bot users --- synapse/handlers/message.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 97a89fe882..c656cffc07 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -469,7 +469,7 @@ class EventCreationHandler(object): u = yield self.store.get_user_by_id(user_id) assert u is not None - if u["user_type"] == UserTypes.SUPPORT: + if u["user_type"] == UserTypes.SUPPORT or u["user_type"] == UserTypes.BOT: # support users are not required to consent return if u["appservice_id"] is not None: From 4a2d2c2b6f42cef95c7e70ecbc27e52b9976fd15 Mon Sep 17 00:00:00 2001 From: Half-Shot Date: Fri, 23 Aug 2019 09:57:07 +0100 Subject: [PATCH 086/110] Update changelog --- changelog.d/5902.feature | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/changelog.d/5902.feature b/changelog.d/5902.feature index c1c5c97b18..0660f65cfa 100644 --- a/changelog.d/5902.feature +++ b/changelog.d/5902.feature @@ -1 +1 @@ -Support users are no longer required to consent. \ No newline at end of file +Users with the type of "support" or "bot" are no longer required to consent. \ No newline at end of file From c998f250065bd3c0f51a6e6ac3b333ede98ed6a7 Mon Sep 17 00:00:00 2001 From: Will Hunt Date: Fri, 23 Aug 2019 10:28:54 +0100 Subject: [PATCH 087/110] Apply suggestions from code review Co-Authored-By: Erik Johnston --- synapse/handlers/message.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index c656cffc07..111f7c7e2f 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -469,8 +469,8 @@ class EventCreationHandler(object): u = yield self.store.get_user_by_id(user_id) assert u is not None - if u["user_type"] == UserTypes.SUPPORT or u["user_type"] == UserTypes.BOT: - # support users are not required to consent + if u["user_type"] in (UserTypes.SUPPORT, UserTypes.BOT): + # support and bot users are not required to consent return if u["appservice_id"] is not None: # users registered by an appservice are exempt From 7af5a63063aa69888ab59ee997cc3d1459d25af4 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 23 Aug 2019 14:52:11 +0100 Subject: [PATCH 088/110] Fixup review comments --- docs/sample_config.yaml | 4 ++-- synapse/crypto/keyring.py | 4 ++-- synapse/rest/key/v2/remote_key_resource.py | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml index c96eb0cf2d..ae1cafc5f3 100644 --- a/docs/sample_config.yaml +++ b/docs/sample_config.yaml @@ -1029,8 +1029,8 @@ signing_key_path: "CONFDIR/SERVERNAME.signing.key" # - server_name: "matrix.org" # -# The additional signing keys to use when acting as a trusted key server, on -# top of the normal signing keys. +# The signing keys to use when acting as a trusted key server. If not specified +# defaults to the server signing key. # # Can contain multiple keys, one per line. # diff --git a/synapse/crypto/keyring.py b/synapse/crypto/keyring.py index abeb0ac26e..2d7434fb2f 100644 --- a/synapse/crypto/keyring.py +++ b/synapse/crypto/keyring.py @@ -539,7 +539,7 @@ class BaseV2KeyFetcher(object): verify_key=verify_key, valid_until_ts=key_data["expired_ts"] ) - signed_key_json_bytes = encode_canonical_json(response_json) + key_json_bytes = encode_canonical_json(response_json) yield make_deferred_yieldable( defer.gatherResults( @@ -551,7 +551,7 @@ class BaseV2KeyFetcher(object): from_server=from_server, ts_now_ms=time_added_ms, ts_expires_ms=ts_valid_until_ms, - key_json_bytes=signed_key_json_bytes, + key_json_bytes=key_json_bytes, ) for key_id in verify_keys ], diff --git a/synapse/rest/key/v2/remote_key_resource.py b/synapse/rest/key/v2/remote_key_resource.py index f3398c9523..55580bc59e 100644 --- a/synapse/rest/key/v2/remote_key_resource.py +++ b/synapse/rest/key/v2/remote_key_resource.py @@ -14,7 +14,7 @@ import logging -from canonicaljson import json +from canonicaljson import encode_canonical_json, json from signedjson.sign import sign_json from twisted.internet import defer @@ -227,4 +227,4 @@ class RemoteKey(DirectServeResource): results = {"server_keys": signed_keys} - respond_with_json_bytes(request, 200, json.dumps(results).encode("utf-8")) + respond_with_json_bytes(request, 200, encode_canonical_json(results)) From fe0ac98e6653903cce43b1c5a3be77ef4f626867 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 23 Aug 2019 14:54:20 +0100 Subject: [PATCH 089/110] Don't implicitly include server signing key --- synapse/config/key.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/synapse/config/key.py b/synapse/config/key.py index f1a1efcb7f..ba2199bceb 100644 --- a/synapse/config/key.py +++ b/synapse/config/key.py @@ -85,14 +85,13 @@ class KeyConfig(Config): config.get("key_refresh_interval", "1d") ) - self.key_server_signing_keys = list(self.signing_key) key_server_signing_keys_path = config.get("key_server_signing_keys_path") if key_server_signing_keys_path: - self.key_server_signing_keys.extend( - self.read_signing_keys( - key_server_signing_keys_path, "key_server_signing_keys_path" - ) + self.key_server_signing_keys = self.read_signing_keys( + key_server_signing_keys_path, "key_server_signing_keys_path" ) + else: + self.key_server_signing_keys = list(self.signing_key) # if neither trusted_key_servers nor perspectives are given, use the default. if "perspectives" not in config and "trusted_key_servers" not in config: @@ -221,8 +220,8 @@ class KeyConfig(Config): # - server_name: "matrix.org" # - # The additional signing keys to use when acting as a trusted key server, on - # top of the normal signing keys. + # The signing keys to use when acting as a trusted key server. If not specified + # defaults to the server signing key. # # Can contain multiple keys, one per line. # From e70f0081da5bbea772316dffda5f173e9568d1d3 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 23 Aug 2019 15:05:56 +0100 Subject: [PATCH 090/110] Fix logcontexts --- synapse/http/federation/matrix_federation_agent.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/synapse/http/federation/matrix_federation_agent.py b/synapse/http/federation/matrix_federation_agent.py index a8815f078a..62883c06a4 100644 --- a/synapse/http/federation/matrix_federation_agent.py +++ b/synapse/http/federation/matrix_federation_agent.py @@ -28,7 +28,7 @@ from twisted.web.iweb import IAgent, IAgentEndpointFactory from synapse.http.federation.srv_resolver import Server, SrvResolver from synapse.http.federation.well_known_resolver import WellKnownResolver -from synapse.logging.context import PreserveLoggingContext, make_deferred_yieldable +from synapse.logging.context import make_deferred_yieldable, run_in_background from synapse.util import Clock logger = logging.getLogger(__name__) @@ -158,8 +158,9 @@ class MatrixFederationAgent(object): if not headers.hasHeader(b"host"): headers.addRawHeader(b"host", parsed_uri.netloc) - with PreserveLoggingContext(): - res = yield self._agent.request(method, uri, headers, bodyProducer) + res = yield make_deferred_yieldable( + self._agent.request(method, uri, headers, bodyProducer) + ) return res @@ -214,11 +215,14 @@ class MatrixHostnameEndpoint(object): self._srv_resolver = srv_resolver - @defer.inlineCallbacks def connect(self, protocol_factory): """Implements IStreamClientEndpoint interface """ + return run_in_background(self._do_connect, protocol_factory) + + @defer.inlineCallbacks + def _do_connect(self, protocol_factory): first_exception = None server_list = yield self._resolve_server() From fbb758a7cef9282fee605eb6bc9f1b2d430d8d62 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 23 Aug 2019 15:09:08 +0100 Subject: [PATCH 091/110] Fixup comments --- synapse/http/federation/matrix_federation_agent.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/synapse/http/federation/matrix_federation_agent.py b/synapse/http/federation/matrix_federation_agent.py index 62883c06a4..feae7de5be 100644 --- a/synapse/http/federation/matrix_federation_agent.py +++ b/synapse/http/federation/matrix_federation_agent.py @@ -192,12 +192,19 @@ class MatrixHostnameEndpointFactory(object): class MatrixHostnameEndpoint(object): """An endpoint that resolves matrix:// URLs using Matrix server name resolution (i.e. via SRV). Does not check for well-known delegation. + + Args: + reactor (IReactor) + tls_client_options_factory (ClientTLSOptionsFactory|None): + factory to use for fetching client tls options, or none to disable TLS. + srv_resolver (SrvResolver): The SRV resolver to use + parsed_uri (twisted.web.client.URI): The parsed URI that we're wanting + to connect to. """ def __init__(self, reactor, tls_client_options_factory, srv_resolver, parsed_uri): self._reactor = reactor - # We reparse the URI so that defaultPort is -1 rather than 80 self._parsed_uri = parsed_uri # set up the TLS connection params @@ -272,6 +279,7 @@ class MatrixHostnameEndpoint(object): # before now, due to needing to rewrite the Host header of the HTTP # request. + # We reparse the URI so that defaultPort is -1 rather than 80 parsed_uri = urllib.parse.urlparse(self._parsed_uri.toBytes()) host = parsed_uri.hostname From 27d3fc421ab03361b03e4b9b4dd0d912b09412ba Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Sat, 24 Aug 2019 22:33:43 +0100 Subject: [PATCH 092/110] Increase max display name limit --- changelog.d/5906.feature | 1 + synapse/handlers/profile.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) create mode 100644 changelog.d/5906.feature diff --git a/changelog.d/5906.feature b/changelog.d/5906.feature new file mode 100644 index 0000000000..7c789510a6 --- /dev/null +++ b/changelog.d/5906.feature @@ -0,0 +1 @@ +Increase max display name size to 256. diff --git a/synapse/handlers/profile.py b/synapse/handlers/profile.py index 2cc237e6a5..8690f69d45 100644 --- a/synapse/handlers/profile.py +++ b/synapse/handlers/profile.py @@ -34,7 +34,7 @@ from ._base import BaseHandler logger = logging.getLogger(__name__) -MAX_DISPLAYNAME_LEN = 100 +MAX_DISPLAYNAME_LEN = 256 MAX_AVATAR_URL_LEN = 1000 From e8e3e033eea2947c3746005f876afca55c601f1d Mon Sep 17 00:00:00 2001 From: Aaron Raimist Date: Mon, 26 Aug 2019 21:01:47 -0500 Subject: [PATCH 093/110] public_base_url is actually public_baseurl Signed-off-by: Aaron Raimist --- synapse/config/emailconfig.py | 2 +- synapse/rest/well_known.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/synapse/config/emailconfig.py b/synapse/config/emailconfig.py index 36d01a10af..f83c05df44 100644 --- a/synapse/config/emailconfig.py +++ b/synapse/config/emailconfig.py @@ -115,7 +115,7 @@ class EmailConfig(Config): missing.append("email." + k) if config.get("public_baseurl") is None: - missing.append("public_base_url") + missing.append("public_baseurl") if len(missing) > 0: raise RuntimeError( diff --git a/synapse/rest/well_known.py b/synapse/rest/well_known.py index 5e8fda4b65..20177b44e7 100644 --- a/synapse/rest/well_known.py +++ b/synapse/rest/well_known.py @@ -34,7 +34,7 @@ class WellKnownBuilder(object): self._config = hs.config def get_well_known(self): - # if we don't have a public_base_url, we can't help much here. + # if we don't have a public_baseurl, we can't help much here. if self._config.public_baseurl is None: return None From c25137a99f9dd79ca3f712243997eb6da7614a2f Mon Sep 17 00:00:00 2001 From: Aaron Raimist Date: Mon, 26 Aug 2019 21:06:08 -0500 Subject: [PATCH 094/110] Add changelog Signed-off-by: Aaron Raimist --- changelog.d/5909.misc | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/5909.misc diff --git a/changelog.d/5909.misc b/changelog.d/5909.misc new file mode 100644 index 0000000000..73e35cc48d --- /dev/null +++ b/changelog.d/5909.misc @@ -0,0 +1 @@ +Fix error message which referred to public_base_url instead of public_baseurl. From aefa76f5cd70f20808947605e76e9570aaff58ed Mon Sep 17 00:00:00 2001 From: "Olivier Wilkinson (reivilibre)" Date: Tue, 27 Aug 2019 08:52:20 +0100 Subject: [PATCH 095/110] Allow schema deltas to be engine-specific Signed-off-by: Olivier Wilkinson (reivilibre) --- synapse/storage/prepare_database.py | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/synapse/storage/prepare_database.py b/synapse/storage/prepare_database.py index d20eacda59..0270cd6f6c 100644 --- a/synapse/storage/prepare_database.py +++ b/synapse/storage/prepare_database.py @@ -238,6 +238,15 @@ def _upgrade_existing_database( logger.debug("applied_delta_files: %s", applied_delta_files) + if isinstance(database_engine, PostgresEngine): + specific_engine_extension = ".postgres" + else: + specific_engine_extension = ".sqlite" + + specific_engine_extensions = ( + ".sqlite", ".postgres" + ) + for v in range(start_ver, SCHEMA_VERSION + 1): logger.info("Upgrading schema to v%d", v) @@ -274,15 +283,22 @@ def _upgrade_existing_database( # Sometimes .pyc files turn up anyway even though we've # disabled their generation; e.g. from distribution package # installers. Silently skip it - pass + continue elif ext == ".sql": # A plain old .sql file, just read and execute it logger.info("Applying schema %s", relative_path) executescript(cur, absolute_path) + elif ext == specific_engine_extension and root_name.endswith(".sql"): + # A .sql file specific to our engine; just read and execute it + logger.info("Applying engine-specific schema %s", relative_path) + executescript(cur, absolute_path) + elif ext in specific_engine_extensions and root_name.endswith(".sql"): + # A .sql file for a different engine; skip it. + continue else: # Not a valid delta file. - logger.warn( - "Found directory entry that did not end in .py or" " .sql: %s", + logger.warning( + "Found directory entry that did not end in .py or .sql: %s", relative_path, ) continue @@ -290,7 +306,7 @@ def _upgrade_existing_database( # Mark as done. cur.execute( database_engine.convert_param_style( - "INSERT INTO applied_schema_deltas (version, file)" " VALUES (?,?)" + "INSERT INTO applied_schema_deltas (version, file) VALUES (?,?)" ), (v, relative_path), ) @@ -298,7 +314,7 @@ def _upgrade_existing_database( cur.execute("DELETE FROM schema_version") cur.execute( database_engine.convert_param_style( - "INSERT INTO schema_version (version, upgraded)" " VALUES (?,?)" + "INSERT INTO schema_version (version, upgraded) VALUES (?,?)" ), (v, True), ) From 62a1639287be270c8471a4de33804542b444bb8e Mon Sep 17 00:00:00 2001 From: "Olivier Wilkinson (reivilibre)" Date: Tue, 27 Aug 2019 09:36:12 +0100 Subject: [PATCH 096/110] Newsfile Signed-off-by: Olivier Wilkinson (reivilibre) --- changelog.d/5911.misc | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/5911.misc diff --git a/changelog.d/5911.misc b/changelog.d/5911.misc new file mode 100644 index 0000000000..fe5a8fd59c --- /dev/null +++ b/changelog.d/5911.misc @@ -0,0 +1 @@ +Add support for database engine-specific schema deltas, based on file extension. \ No newline at end of file From d1e0b91083b9dd0dcbb9fa5819c8072c9e8625ef Mon Sep 17 00:00:00 2001 From: "Olivier Wilkinson (reivilibre)" Date: Tue, 27 Aug 2019 09:39:11 +0100 Subject: [PATCH 097/110] Code style (Black) Signed-off-by: Olivier Wilkinson (reivilibre) --- synapse/storage/prepare_database.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/synapse/storage/prepare_database.py b/synapse/storage/prepare_database.py index 0270cd6f6c..e96eed8a6d 100644 --- a/synapse/storage/prepare_database.py +++ b/synapse/storage/prepare_database.py @@ -243,9 +243,7 @@ def _upgrade_existing_database( else: specific_engine_extension = ".sqlite" - specific_engine_extensions = ( - ".sqlite", ".postgres" - ) + specific_engine_extensions = (".sqlite", ".postgres") for v in range(start_ver, SCHEMA_VERSION + 1): logger.info("Upgrading schema to v%d", v) From 1a7e6eb63387704ef379bf962318f710ce5ae5f3 Mon Sep 17 00:00:00 2001 From: reivilibre <38398653+reivilibre@users.noreply.github.com> Date: Tue, 27 Aug 2019 10:14:00 +0100 Subject: [PATCH 098/110] Add Admin API capability to set adminship of a user (#5878) Admin API: Set adminship of a user --- changelog.d/5878.feature | 1 + docs/admin_api/user_admin_api.rst | 20 ++++++++ synapse/handlers/admin.py | 10 ++++ synapse/rest/admin/__init__.py | 2 + synapse/rest/admin/users.py | 76 +++++++++++++++++++++++++++++++ synapse/storage/registration.py | 23 ++++++++++ 6 files changed, 132 insertions(+) create mode 100644 changelog.d/5878.feature create mode 100644 synapse/rest/admin/users.py diff --git a/changelog.d/5878.feature b/changelog.d/5878.feature new file mode 100644 index 0000000000..d9d6df880e --- /dev/null +++ b/changelog.d/5878.feature @@ -0,0 +1 @@ +Add admin API endpoint for setting whether or not a user is a server administrator. diff --git a/docs/admin_api/user_admin_api.rst b/docs/admin_api/user_admin_api.rst index 213359d0c0..6ee5080eed 100644 --- a/docs/admin_api/user_admin_api.rst +++ b/docs/admin_api/user_admin_api.rst @@ -84,3 +84,23 @@ with a body of: } including an ``access_token`` of a server admin. + + +Change whether a user is a server administrator or not +====================================================== + +Note that you cannot demote yourself. + +The api is:: + + PUT /_synapse/admin/v1/users//admin + +with a body of: + +.. code:: json + + { + "admin": true + } + +including an ``access_token`` of a server admin. diff --git a/synapse/handlers/admin.py b/synapse/handlers/admin.py index 2f22f56ca4..d30a68b650 100644 --- a/synapse/handlers/admin.py +++ b/synapse/handlers/admin.py @@ -94,6 +94,16 @@ class AdminHandler(BaseHandler): return ret + def set_user_server_admin(self, user, admin): + """ + Set the admin bit on a user. + + Args: + user_id (UserID): the (necessarily local) user to manipulate + admin (bool): whether or not the user should be an admin of this server + """ + return self.store.set_server_admin(user, admin) + @defer.inlineCallbacks def export_user_data(self, user_id, writer): """Write all data we have on the user to the given writer. diff --git a/synapse/rest/admin/__init__.py b/synapse/rest/admin/__init__.py index 0dce256840..9ab1c2c9e0 100644 --- a/synapse/rest/admin/__init__.py +++ b/synapse/rest/admin/__init__.py @@ -44,6 +44,7 @@ from synapse.rest.admin._base import ( from synapse.rest.admin.media import register_servlets_for_media_repo from synapse.rest.admin.purge_room_servlet import PurgeRoomServlet from synapse.rest.admin.server_notice_servlet import SendServerNoticeServlet +from synapse.rest.admin.users import UserAdminServlet from synapse.types import UserID, create_requester from synapse.util.versionstring import get_version_string @@ -742,6 +743,7 @@ def register_servlets(hs, http_server): PurgeRoomServlet(hs).register(http_server) SendServerNoticeServlet(hs).register(http_server) VersionServlet(hs).register(http_server) + UserAdminServlet(hs).register(http_server) def register_servlets_for_client_rest_resource(hs, http_server): diff --git a/synapse/rest/admin/users.py b/synapse/rest/admin/users.py new file mode 100644 index 0000000000..b0fddb6898 --- /dev/null +++ b/synapse/rest/admin/users.py @@ -0,0 +1,76 @@ +# -*- coding: utf-8 -*- +# Copyright 2019 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import re + +from twisted.internet import defer + +from synapse.api.errors import SynapseError +from synapse.http.servlet import ( + RestServlet, + assert_params_in_dict, + parse_json_object_from_request, +) +from synapse.rest.admin import assert_requester_is_admin +from synapse.types import UserID + + +class UserAdminServlet(RestServlet): + """ + Set whether or not a user is a server administrator. + + Note that only local users can be server administrators, and that an + administrator may not demote themselves. + + Only server administrators can use this API. + + Example: + PUT /_synapse/admin/v1/users/@reivilibre:librepush.net/admin + { + "admin": true + } + """ + + PATTERNS = (re.compile("^/_synapse/admin/v1/users/(?P@[^/]*)/admin$"),) + + def __init__(self, hs): + self.hs = hs + self.auth = hs.get_auth() + self.handlers = hs.get_handlers() + + @defer.inlineCallbacks + def on_PUT(self, request, user_id): + yield assert_requester_is_admin(self.auth, request) + requester = yield self.auth.get_user_by_req(request) + auth_user = requester.user + + target_user = UserID.from_string(user_id) + + body = parse_json_object_from_request(request) + + assert_params_in_dict(body, ["admin"]) + + if not self.hs.is_mine(target_user): + raise SynapseError(400, "Only local users can be admins of this homeserver") + + set_admin_to = bool(body["admin"]) + + if target_user == auth_user and not set_admin_to: + raise SynapseError(400, "You may not demote yourself.") + + yield self.handlers.admin_handler.set_user_server_admin( + target_user, set_admin_to + ) + + return (200, {}) diff --git a/synapse/storage/registration.py b/synapse/storage/registration.py index 55e4e84d71..9027b917c1 100644 --- a/synapse/storage/registration.py +++ b/synapse/storage/registration.py @@ -272,6 +272,14 @@ class RegistrationWorkerStore(SQLBaseStore): @defer.inlineCallbacks def is_server_admin(self, user): + """Determines if a user is an admin of this homeserver. + + Args: + user (UserID): user ID of the user to test + + Returns (bool): + true iff the user is a server admin, false otherwise. + """ res = yield self._simple_select_one_onecol( table="users", keyvalues={"name": user.to_string()}, @@ -282,6 +290,21 @@ class RegistrationWorkerStore(SQLBaseStore): return res if res else False + def set_server_admin(self, user, admin): + """Sets whether a user is an admin of this homeserver. + + Args: + user (UserID): user ID of the user to test + admin (bool): true iff the user is to be a server admin, + false otherwise. + """ + return self._simple_update_one( + table="users", + keyvalues={"name": user.to_string()}, + updatevalues={"admin": 1 if admin else 0}, + desc="set_server_admin", + ) + def _query_for_auth(self, txn, token): sql = ( "SELECT users.name, users.is_guest, access_tokens.id as token_id," From e7577427c90a364601889ac983c760d825d9a530 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Tue, 27 Aug 2019 11:50:52 +0100 Subject: [PATCH 099/110] Update 5909.misc --- changelog.d/5909.misc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/changelog.d/5909.misc b/changelog.d/5909.misc index 73e35cc48d..03d0c4367b 100644 --- a/changelog.d/5909.misc +++ b/changelog.d/5909.misc @@ -1 +1 @@ -Fix error message which referred to public_base_url instead of public_baseurl. +Fix error message which referred to public_base_url instead of public_baseurl. Thanks to @aaronraimist for the fix! From c88a119259c8625b015db4cf8ea08e30ca16cc81 Mon Sep 17 00:00:00 2001 From: "Olivier Wilkinson (reivilibre)" Date: Tue, 27 Aug 2019 13:12:27 +0100 Subject: [PATCH 100/110] Add GET method to admin API /users/@user:dom/admin Signed-off-by: Olivier Wilkinson (reivilibre) --- changelog.d/5914.feature | 1 + synapse/handlers/admin.py | 9 ++++++++ synapse/rest/admin/__init__.py | 2 +- synapse/rest/admin/users.py | 40 +++++++++++++++++++++++++++------- 4 files changed, 43 insertions(+), 9 deletions(-) create mode 100644 changelog.d/5914.feature diff --git a/changelog.d/5914.feature b/changelog.d/5914.feature new file mode 100644 index 0000000000..85c7bf5963 --- /dev/null +++ b/changelog.d/5914.feature @@ -0,0 +1 @@ +Add admin API endpoint for getting whether or not a user is a server administrator. diff --git a/synapse/handlers/admin.py b/synapse/handlers/admin.py index d30a68b650..1a87b58838 100644 --- a/synapse/handlers/admin.py +++ b/synapse/handlers/admin.py @@ -94,6 +94,15 @@ class AdminHandler(BaseHandler): return ret + def get_user_server_admin(self, user): + """ + Get the admin bit on a user. + + Args: + user_id (UserID): the (necessarily local) user to manipulate + """ + return self.store.is_server_admin(user) + def set_user_server_admin(self, user, admin): """ Set the admin bit on a user. diff --git a/synapse/rest/admin/__init__.py b/synapse/rest/admin/__init__.py index 9ab1c2c9e0..fa91cc8dee 100644 --- a/synapse/rest/admin/__init__.py +++ b/synapse/rest/admin/__init__.py @@ -52,7 +52,7 @@ logger = logging.getLogger(__name__) class UsersRestServlet(RestServlet): - PATTERNS = historical_admin_path_patterns("/users/(?P[^/]*)") + PATTERNS = historical_admin_path_patterns("/users/(?P[^/]*)$") def __init__(self, hs): self.hs = hs diff --git a/synapse/rest/admin/users.py b/synapse/rest/admin/users.py index b0fddb6898..5364117420 100644 --- a/synapse/rest/admin/users.py +++ b/synapse/rest/admin/users.py @@ -22,24 +22,34 @@ from synapse.http.servlet import ( assert_params_in_dict, parse_json_object_from_request, ) -from synapse.rest.admin import assert_requester_is_admin +from synapse.rest.admin import assert_requester_is_admin, assert_user_is_admin from synapse.types import UserID class UserAdminServlet(RestServlet): """ - Set whether or not a user is a server administrator. + Get or set whether or not a user is a server administrator. Note that only local users can be server administrators, and that an administrator may not demote themselves. Only server administrators can use this API. - Example: - PUT /_synapse/admin/v1/users/@reivilibre:librepush.net/admin - { - "admin": true - } + Examples: + * Get + GET /_synapse/admin/v1/users/@nonadmin:example.com/admin + response on success: + { + "admin": false + } + * Set + PUT /_synapse/admin/v1/users/@reivilibre:librepush.net/admin + request body: + { + "admin": true + } + response on success: + {} """ PATTERNS = (re.compile("^/_synapse/admin/v1/users/(?P@[^/]*)/admin$"),) @@ -50,9 +60,23 @@ class UserAdminServlet(RestServlet): self.handlers = hs.get_handlers() @defer.inlineCallbacks - def on_PUT(self, request, user_id): + def on_GET(self, request, user_id): yield assert_requester_is_admin(self.auth, request) + + target_user = UserID.from_string(user_id) + + if not self.hs.is_mine(target_user): + raise SynapseError(400, "Only local users can be admins of this homeserver") + + is_admin = yield self.handlers.admin_handler.get_user_server_admin(target_user) + is_admin = bool(is_admin) + + return (200, {"admin": is_admin}) + + @defer.inlineCallbacks + def on_PUT(self, request, user_id): requester = yield self.auth.get_user_by_req(request) + yield assert_user_is_admin(self.auth, requester.user) auth_user = requester.user target_user = UserID.from_string(user_id) From 1b959b6977249c1db198beb6e04e02fa667ebfab Mon Sep 17 00:00:00 2001 From: "Olivier Wilkinson (reivilibre)" Date: Tue, 27 Aug 2019 13:19:19 +0100 Subject: [PATCH 101/110] Document GET method for retrieving admin bit of user in admin API Signed-off-by: Olivier Wilkinson (reivilibre) --- docs/admin_api/user_admin_api.rst | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/docs/admin_api/user_admin_api.rst b/docs/admin_api/user_admin_api.rst index 6ee5080eed..d0871f9438 100644 --- a/docs/admin_api/user_admin_api.rst +++ b/docs/admin_api/user_admin_api.rst @@ -86,6 +86,25 @@ with a body of: including an ``access_token`` of a server admin. +Get whether a user is a server administrator or not +=================================================== + + +The api is:: + + GET /_synapse/admin/v1/users//admin + +including an ``access_token`` of a server admin. + +A response body like the following is returned: + +.. code:: json + + { + "admin": true + } + + Change whether a user is a server administrator or not ====================================================== From 91caa5b4303bfa0b4604ecf95d56ae72a7074b0b Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 27 Aug 2019 13:56:42 +0100 Subject: [PATCH 102/110] Fix off by one error in SRV result shuffling --- synapse/http/federation/srv_resolver.py | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/synapse/http/federation/srv_resolver.py b/synapse/http/federation/srv_resolver.py index c8ca3fd0e9..3fe4ffb9e5 100644 --- a/synapse/http/federation/srv_resolver.py +++ b/synapse/http/federation/srv_resolver.py @@ -66,17 +66,18 @@ def _sort_server_list(server_list): for priority in sorted(priority_map): servers = priority_map[priority] - # This algorithms follows the algorithm described in RFC2782. + # This algorithms roughly follows the algorithm described in RFC2782, + # changed to remove an off-by-one error. # - # N.B. Weights can be zero, which means that you should pick that server - # last *or* that its the only server in this priority. - - # We sort to ensure zero weighted items are first. - servers.sort(key=lambda s: s.weight) + # N.B. Weights can be zero, which means that they should be picked + # rarely. total_weight = sum(s.weight for s in servers) - while servers: - target_weight = random.randint(0, total_weight) + + # Total weight can become zero if there are only zero weight servers + # left, which we handle by just shuffling and appending to the results. + while servers and total_weight: + target_weight = random.randint(1, total_weight) for s in servers: target_weight -= s.weight @@ -88,6 +89,10 @@ def _sort_server_list(server_list): servers.remove(s) total_weight -= s.weight + if servers: + random.shuffle(servers) + results.extend(servers) + return results From 1196ee32b35587af82e7bf60dc60f2ba56c5f93c Mon Sep 17 00:00:00 2001 From: Victor Goff Date: Wed, 28 Aug 2019 04:34:49 -0400 Subject: [PATCH 103/110] Typographical corrections in docker/README (#5921) --- docker/README.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/docker/README.md b/docker/README.md index 46bb9d2d99..d5879c2f2c 100644 --- a/docker/README.md +++ b/docker/README.md @@ -17,7 +17,7 @@ By default, the image expects a single volume, located at ``/data``, that will h * the appservices configuration. You are free to use separate volumes depending on storage endpoints at your -disposal. For instance, ``/data/media`` coud be stored on a large but low +disposal. For instance, ``/data/media`` could be stored on a large but low performance hdd storage while other files could be stored on high performance endpoints. @@ -27,8 +27,8 @@ configuration file there. Multiple application services are supported. ## Generating a configuration file -The first step is to genearte a valid config file. To do this, you can run the -image with the `generate` commandline option. +The first step is to generate a valid config file. To do this, you can run the +image with the `generate` command line option. You will need to specify values for the `SYNAPSE_SERVER_NAME` and `SYNAPSE_REPORT_STATS` environment variable, and mount a docker volume to store @@ -59,7 +59,7 @@ The following environment variables are supported in `generate` mode: * `SYNAPSE_CONFIG_PATH`: path to the file to be generated. Defaults to `/homeserver.yaml`. * `SYNAPSE_DATA_DIR`: where the generated config will put persistent data - such as the datatase and media store. Defaults to `/data`. + such as the database and media store. Defaults to `/data`. * `UID`, `GID`: the user id and group id to use for creating the data directories. Defaults to `991`, `991`. @@ -115,7 +115,7 @@ not given). To migrate from a dynamic configuration file to a static one, run the docker container once with the environment variables set, and `migrate_config` -commandline option. For example: +command line option. For example: ``` docker run -it --rm \ From 49ef8ec3995db4d14e1b1367c9cd96ea231072f4 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Wed, 28 Aug 2019 10:18:16 +0100 Subject: [PATCH 104/110] Fix a cache-invalidation bug for worker-based deployments (#5920) Some of the caches on worker processes were not being correctly invalidated when a room's state was changed in a way that did not affect the membership list of the room. We need to make sure we send out cache invalidations even when no memberships are changing. --- changelog.d/5920.bugfix | 1 + synapse/storage/_base.py | 24 ++++++++++++++++-------- 2 files changed, 17 insertions(+), 8 deletions(-) create mode 100644 changelog.d/5920.bugfix diff --git a/changelog.d/5920.bugfix b/changelog.d/5920.bugfix new file mode 100644 index 0000000000..e45eb0ffee --- /dev/null +++ b/changelog.d/5920.bugfix @@ -0,0 +1 @@ +Fix a cache-invalidation bug for worker-based deployments. diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index 489ce82fae..abe16334ec 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -1395,14 +1395,22 @@ class SQLBaseStore(object): """ txn.call_after(self._invalidate_state_caches, room_id, members_changed) - # We need to be careful that the size of the `members_changed` list - # isn't so large that it causes problems sending over replication, so we - # send them in chunks. - # Max line length is 16K, and max user ID length is 255, so 50 should - # be safe. - for chunk in batch_iter(members_changed, 50): - keys = itertools.chain([room_id], chunk) - self._send_invalidation_to_replication(txn, _CURRENT_STATE_CACHE_NAME, keys) + if members_changed: + # We need to be careful that the size of the `members_changed` list + # isn't so large that it causes problems sending over replication, so we + # send them in chunks. + # Max line length is 16K, and max user ID length is 255, so 50 should + # be safe. + for chunk in batch_iter(members_changed, 50): + keys = itertools.chain([room_id], chunk) + self._send_invalidation_to_replication( + txn, _CURRENT_STATE_CACHE_NAME, keys + ) + else: + # if no members changed, we still need to invalidate the other caches. + self._send_invalidation_to_replication( + txn, _CURRENT_STATE_CACHE_NAME, [room_id] + ) def _invalidate_state_caches(self, room_id, members_changed): """Invalidates caches that are based on the current state, but does From 7dc398586c2156a456d9526ac0e42c1fec9f8143 Mon Sep 17 00:00:00 2001 From: Amber Brown Date: Wed, 28 Aug 2019 21:18:53 +1000 Subject: [PATCH 105/110] Implement a structured logging output system. (#5680) --- .buildkite/docker-compose.py35.pg95.yaml | 1 + .buildkite/docker-compose.py37.pg11.yaml | 1 + .buildkite/docker-compose.py37.pg95.yaml | 1 + .buildkite/pipeline.yml | 10 +- .gitignore | 5 +- changelog.d/5680.misc | 1 + docs/structured_logging.md | 83 +++++ synapse/app/_base.py | 12 +- synapse/app/admin_cmd.py | 4 +- synapse/app/appservice.py | 4 +- synapse/app/client_reader.py | 4 +- synapse/app/event_creator.py | 4 +- synapse/app/federation_reader.py | 4 +- synapse/app/federation_sender.py | 4 +- synapse/app/frontend_proxy.py | 4 +- synapse/app/homeserver.py | 4 +- synapse/app/media_repository.py | 4 +- synapse/app/pusher.py | 4 +- synapse/app/synchrotron.py | 4 +- synapse/app/user_dir.py | 4 +- synapse/config/logger.py | 103 ++++--- synapse/handlers/federation.py | 5 +- synapse/logging/_structured.py | 374 +++++++++++++++++++++++ synapse/logging/_terse_json.py | 278 +++++++++++++++++ synapse/logging/context.py | 14 +- synapse/python_dependencies.py | 6 +- tests/logging/__init__.py | 0 tests/logging/test_structured.py | 197 ++++++++++++ tests/logging/test_terse_json.py | 234 ++++++++++++++ tests/server.py | 27 +- tox.ini | 10 + 31 files changed, 1328 insertions(+), 82 deletions(-) create mode 100644 changelog.d/5680.misc create mode 100644 docs/structured_logging.md create mode 100644 synapse/logging/_structured.py create mode 100644 synapse/logging/_terse_json.py create mode 100644 tests/logging/__init__.py create mode 100644 tests/logging/test_structured.py create mode 100644 tests/logging/test_terse_json.py diff --git a/.buildkite/docker-compose.py35.pg95.yaml b/.buildkite/docker-compose.py35.pg95.yaml index 2f14387fbc..aaea33006b 100644 --- a/.buildkite/docker-compose.py35.pg95.yaml +++ b/.buildkite/docker-compose.py35.pg95.yaml @@ -6,6 +6,7 @@ services: image: postgres:9.5 environment: POSTGRES_PASSWORD: postgres + command: -c fsync=off testenv: image: python:3.5 diff --git a/.buildkite/docker-compose.py37.pg11.yaml b/.buildkite/docker-compose.py37.pg11.yaml index f3eec05ceb..1b32675e78 100644 --- a/.buildkite/docker-compose.py37.pg11.yaml +++ b/.buildkite/docker-compose.py37.pg11.yaml @@ -6,6 +6,7 @@ services: image: postgres:11 environment: POSTGRES_PASSWORD: postgres + command: -c fsync=off testenv: image: python:3.7 diff --git a/.buildkite/docker-compose.py37.pg95.yaml b/.buildkite/docker-compose.py37.pg95.yaml index 2a41db8eba..7679f6508d 100644 --- a/.buildkite/docker-compose.py37.pg95.yaml +++ b/.buildkite/docker-compose.py37.pg95.yaml @@ -6,6 +6,7 @@ services: image: postgres:9.5 environment: POSTGRES_PASSWORD: postgres + command: -c fsync=off testenv: image: python:3.7 diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index b75269a155..d9327227ed 100644 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -45,8 +45,15 @@ steps: - docker#v3.0.1: image: "python:3.6" - - wait + - command: + - "python -m pip install tox" + - "tox -e mypy" + label: ":mypy: mypy" + plugins: + - docker#v3.0.1: + image: "python:3.5" + - wait - command: - "apt-get update && apt-get install -y python3.5 python3.5-dev python3-pip libxml2-dev libxslt-dev zlib1g-dev" @@ -55,6 +62,7 @@ steps: label: ":python: 3.5 / SQLite / Old Deps" env: TRIAL_FLAGS: "-j 2" + LANG: "C.UTF-8" plugins: - docker#v3.0.1: image: "ubuntu:xenial" # We use xenail to get an old sqlite and python diff --git a/.gitignore b/.gitignore index f6168a8819..e53d4908d5 100644 --- a/.gitignore +++ b/.gitignore @@ -20,6 +20,7 @@ _trial_temp*/ /*.signing.key /env/ /homeserver*.yaml +/logs /media_store/ /uploads @@ -29,8 +30,9 @@ _trial_temp*/ /.vscode/ # build products -/.coverage* !/.coveragerc +/.coverage* +/.mypy_cache/ /.tox /build/ /coverage.* @@ -38,4 +40,3 @@ _trial_temp*/ /docs/build/ /htmlcov /pip-wheel-metadata/ - diff --git a/changelog.d/5680.misc b/changelog.d/5680.misc new file mode 100644 index 0000000000..46a403a188 --- /dev/null +++ b/changelog.d/5680.misc @@ -0,0 +1 @@ +Lay the groundwork for structured logging output. diff --git a/docs/structured_logging.md b/docs/structured_logging.md new file mode 100644 index 0000000000..decec9b8fa --- /dev/null +++ b/docs/structured_logging.md @@ -0,0 +1,83 @@ +# Structured Logging + +A structured logging system can be useful when your logs are destined for a machine to parse and process. By maintaining its machine-readable characteristics, it enables more efficient searching and aggregations when consumed by software such as the "ELK stack". + +Synapse's structured logging system is configured via the file that Synapse's `log_config` config option points to. The file must be YAML and contain `structured: true`. It must contain a list of "drains" (places where logs go to). + +A structured logging configuration looks similar to the following: + +```yaml +structured: true + +loggers: + synapse: + level: INFO + synapse.storage.SQL: + level: WARNING + +drains: + console: + type: console + location: stdout + file: + type: file_json + location: homeserver.log +``` + +The above logging config will set Synapse as 'INFO' logging level by default, with the SQL layer at 'WARNING', and will have two logging drains (to the console and to a file, stored as JSON). + +## Drain Types + +Drain types can be specified by the `type` key. + +### `console` + +Outputs human-readable logs to the console. + +Arguments: + +- `location`: Either `stdout` or `stderr`. + +### `console_json` + +Outputs machine-readable JSON logs to the console. + +Arguments: + +- `location`: Either `stdout` or `stderr`. + +### `console_json_terse` + +Outputs machine-readable JSON logs to the console, separated by newlines. This +format is not designed to be read and re-formatted into human-readable text, but +is optimal for a logging aggregation system. + +Arguments: + +- `location`: Either `stdout` or `stderr`. + +### `file` + +Outputs human-readable logs to a file. + +Arguments: + +- `location`: An absolute path to the file to log to. + +### `file_json` + +Outputs machine-readable logs to a file. + +Arguments: + +- `location`: An absolute path to the file to log to. + +### `network_json_terse` + +Delivers machine-readable JSON logs to a log aggregator over TCP. This is +compatible with LogStash's TCP input with the codec set to `json_lines`. + +Arguments: + +- `host`: Hostname or IP address of the log aggregator. +- `port`: Numerical port to contact on the host. \ No newline at end of file diff --git a/synapse/app/_base.py b/synapse/app/_base.py index 69dcf3523f..c30fdeee9a 100644 --- a/synapse/app/_base.py +++ b/synapse/app/_base.py @@ -36,18 +36,20 @@ from synapse.util.versionstring import get_version_string logger = logging.getLogger(__name__) +# list of tuples of function, args list, kwargs dict _sighup_callbacks = [] -def register_sighup(func): +def register_sighup(func, *args, **kwargs): """ Register a function to be called when a SIGHUP occurs. Args: func (function): Function to be called when sent a SIGHUP signal. - Will be called with a single argument, the homeserver. + Will be called with a single default argument, the homeserver. + *args, **kwargs: args and kwargs to be passed to the target function. """ - _sighup_callbacks.append(func) + _sighup_callbacks.append((func, args, kwargs)) def start_worker_reactor(appname, config, run_command=reactor.run): @@ -248,8 +250,8 @@ def start(hs, listeners=None): # we're not using systemd. sdnotify(b"RELOADING=1") - for i in _sighup_callbacks: - i(hs) + for i, args, kwargs in _sighup_callbacks: + i(hs, *args, **kwargs) sdnotify(b"READY=1") diff --git a/synapse/app/admin_cmd.py b/synapse/app/admin_cmd.py index 1fd52a5526..04751a6a5e 100644 --- a/synapse/app/admin_cmd.py +++ b/synapse/app/admin_cmd.py @@ -227,8 +227,6 @@ def start(config_options): config.start_pushers = False config.send_federation = False - setup_logging(config, use_worker_options=True) - synapse.events.USE_FROZEN_DICTS = config.use_frozen_dicts database_engine = create_engine(config.database_config) @@ -241,6 +239,8 @@ def start(config_options): database_engine=database_engine, ) + setup_logging(ss, config, use_worker_options=True) + ss.setup() # We use task.react as the basic run command as it correctly handles tearing diff --git a/synapse/app/appservice.py b/synapse/app/appservice.py index 54bb114dec..767b87d2db 100644 --- a/synapse/app/appservice.py +++ b/synapse/app/appservice.py @@ -141,8 +141,6 @@ def start(config_options): assert config.worker_app == "synapse.app.appservice" - setup_logging(config, use_worker_options=True) - events.USE_FROZEN_DICTS = config.use_frozen_dicts database_engine = create_engine(config.database_config) @@ -167,6 +165,8 @@ def start(config_options): database_engine=database_engine, ) + setup_logging(ps, config, use_worker_options=True) + ps.setup() reactor.addSystemEventTrigger( "before", "startup", _base.start, ps, config.worker_listeners diff --git a/synapse/app/client_reader.py b/synapse/app/client_reader.py index 721bb5b119..86193d35a8 100644 --- a/synapse/app/client_reader.py +++ b/synapse/app/client_reader.py @@ -179,8 +179,6 @@ def start(config_options): assert config.worker_app == "synapse.app.client_reader" - setup_logging(config, use_worker_options=True) - events.USE_FROZEN_DICTS = config.use_frozen_dicts database_engine = create_engine(config.database_config) @@ -193,6 +191,8 @@ def start(config_options): database_engine=database_engine, ) + setup_logging(ss, config, use_worker_options=True) + ss.setup() reactor.addSystemEventTrigger( "before", "startup", _base.start, ss, config.worker_listeners diff --git a/synapse/app/event_creator.py b/synapse/app/event_creator.py index 473c8895d0..c67fe69a50 100644 --- a/synapse/app/event_creator.py +++ b/synapse/app/event_creator.py @@ -175,8 +175,6 @@ def start(config_options): assert config.worker_replication_http_port is not None - setup_logging(config, use_worker_options=True) - # This should only be done on the user directory worker or the master config.update_user_directory = False @@ -192,6 +190,8 @@ def start(config_options): database_engine=database_engine, ) + setup_logging(ss, config, use_worker_options=True) + ss.setup() reactor.addSystemEventTrigger( "before", "startup", _base.start, ss, config.worker_listeners diff --git a/synapse/app/federation_reader.py b/synapse/app/federation_reader.py index 5255d9e8cc..1ef027a88c 100644 --- a/synapse/app/federation_reader.py +++ b/synapse/app/federation_reader.py @@ -160,8 +160,6 @@ def start(config_options): assert config.worker_app == "synapse.app.federation_reader" - setup_logging(config, use_worker_options=True) - events.USE_FROZEN_DICTS = config.use_frozen_dicts database_engine = create_engine(config.database_config) @@ -174,6 +172,8 @@ def start(config_options): database_engine=database_engine, ) + setup_logging(ss, config, use_worker_options=True) + ss.setup() reactor.addSystemEventTrigger( "before", "startup", _base.start, ss, config.worker_listeners diff --git a/synapse/app/federation_sender.py b/synapse/app/federation_sender.py index c5a2880e69..04fbb407af 100644 --- a/synapse/app/federation_sender.py +++ b/synapse/app/federation_sender.py @@ -171,8 +171,6 @@ def start(config_options): assert config.worker_app == "synapse.app.federation_sender" - setup_logging(config, use_worker_options=True) - events.USE_FROZEN_DICTS = config.use_frozen_dicts database_engine = create_engine(config.database_config) @@ -197,6 +195,8 @@ def start(config_options): database_engine=database_engine, ) + setup_logging(ss, config, use_worker_options=True) + ss.setup() reactor.addSystemEventTrigger( "before", "startup", _base.start, ss, config.worker_listeners diff --git a/synapse/app/frontend_proxy.py b/synapse/app/frontend_proxy.py index e2822ca848..611d285421 100644 --- a/synapse/app/frontend_proxy.py +++ b/synapse/app/frontend_proxy.py @@ -232,8 +232,6 @@ def start(config_options): assert config.worker_main_http_uri is not None - setup_logging(config, use_worker_options=True) - events.USE_FROZEN_DICTS = config.use_frozen_dicts database_engine = create_engine(config.database_config) @@ -246,6 +244,8 @@ def start(config_options): database_engine=database_engine, ) + setup_logging(ss, config, use_worker_options=True) + ss.setup() reactor.addSystemEventTrigger( "before", "startup", _base.start, ss, config.worker_listeners diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index 8233905844..04f1ed14f3 100644 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -341,8 +341,6 @@ def setup(config_options): # generating config files and shouldn't try to continue. sys.exit(0) - synapse.config.logger.setup_logging(config, use_worker_options=False) - events.USE_FROZEN_DICTS = config.use_frozen_dicts database_engine = create_engine(config.database_config) @@ -356,6 +354,8 @@ def setup(config_options): database_engine=database_engine, ) + synapse.config.logger.setup_logging(hs, config, use_worker_options=False) + logger.info("Preparing database: %s...", config.database_config["name"]) try: diff --git a/synapse/app/media_repository.py b/synapse/app/media_repository.py index 3a168577c7..2ac783ffa3 100644 --- a/synapse/app/media_repository.py +++ b/synapse/app/media_repository.py @@ -155,8 +155,6 @@ def start(config_options): "Please add ``enable_media_repo: false`` to the main config\n" ) - setup_logging(config, use_worker_options=True) - events.USE_FROZEN_DICTS = config.use_frozen_dicts database_engine = create_engine(config.database_config) @@ -169,6 +167,8 @@ def start(config_options): database_engine=database_engine, ) + setup_logging(ss, config, use_worker_options=True) + ss.setup() reactor.addSystemEventTrigger( "before", "startup", _base.start, ss, config.worker_listeners diff --git a/synapse/app/pusher.py b/synapse/app/pusher.py index 692ffa2f04..d84732ee3c 100644 --- a/synapse/app/pusher.py +++ b/synapse/app/pusher.py @@ -184,8 +184,6 @@ def start(config_options): assert config.worker_app == "synapse.app.pusher" - setup_logging(config, use_worker_options=True) - events.USE_FROZEN_DICTS = config.use_frozen_dicts if config.start_pushers: @@ -210,6 +208,8 @@ def start(config_options): database_engine=database_engine, ) + setup_logging(ps, config, use_worker_options=True) + ps.setup() def start(): diff --git a/synapse/app/synchrotron.py b/synapse/app/synchrotron.py index a1c3b162f7..473026fce5 100644 --- a/synapse/app/synchrotron.py +++ b/synapse/app/synchrotron.py @@ -435,8 +435,6 @@ def start(config_options): assert config.worker_app == "synapse.app.synchrotron" - setup_logging(config, use_worker_options=True) - synapse.events.USE_FROZEN_DICTS = config.use_frozen_dicts database_engine = create_engine(config.database_config) @@ -450,6 +448,8 @@ def start(config_options): application_service_handler=SynchrotronApplicationService(), ) + setup_logging(ss, config, use_worker_options=True) + ss.setup() reactor.addSystemEventTrigger( "before", "startup", _base.start, ss, config.worker_listeners diff --git a/synapse/app/user_dir.py b/synapse/app/user_dir.py index cb29a1afab..e01afb39f2 100644 --- a/synapse/app/user_dir.py +++ b/synapse/app/user_dir.py @@ -197,8 +197,6 @@ def start(config_options): assert config.worker_app == "synapse.app.user_dir" - setup_logging(config, use_worker_options=True) - events.USE_FROZEN_DICTS = config.use_frozen_dicts database_engine = create_engine(config.database_config) @@ -223,6 +221,8 @@ def start(config_options): database_engine=database_engine, ) + setup_logging(ss, config, use_worker_options=True) + ss.setup() reactor.addSystemEventTrigger( "before", "startup", _base.start, ss, config.worker_listeners diff --git a/synapse/config/logger.py b/synapse/config/logger.py index d321d00b80..981df5a10c 100644 --- a/synapse/config/logger.py +++ b/synapse/config/logger.py @@ -25,6 +25,10 @@ from twisted.logger import STDLibLogObserver, globalLogBeginner import synapse from synapse.app import _base as appbase +from synapse.logging._structured import ( + reload_structured_logging, + setup_structured_logging, +) from synapse.logging.context import LoggingContextFilter from synapse.util.versionstring import get_version_string @@ -119,21 +123,10 @@ class LoggingConfig(Config): log_config_file.write(DEFAULT_LOG_CONFIG.substitute(log_file=log_file)) -def setup_logging(config, use_worker_options=False): - """ Set up python logging - - Args: - config (LoggingConfig | synapse.config.workers.WorkerConfig): - configuration data - - use_worker_options (bool): True to use the 'worker_log_config' option - instead of 'log_config'. - - register_sighup (func | None): Function to call to register a - sighup handler. +def _setup_stdlib_logging(config, log_config): + """ + Set up Python stdlib logging. """ - log_config = config.worker_log_config if use_worker_options else config.log_config - if log_config is None: log_format = ( "%(asctime)s - %(name)s - %(lineno)d - %(levelname)s - %(request)s" @@ -151,35 +144,10 @@ def setup_logging(config, use_worker_options=False): handler.addFilter(LoggingContextFilter(request="")) logger.addHandler(handler) else: + logging.config.dictConfig(log_config) - def load_log_config(): - with open(log_config, "r") as f: - logging.config.dictConfig(yaml.safe_load(f)) - - def sighup(*args): - # it might be better to use a file watcher or something for this. - load_log_config() - logging.info("Reloaded log config from %s due to SIGHUP", log_config) - - load_log_config() - appbase.register_sighup(sighup) - - # make sure that the first thing we log is a thing we can grep backwards - # for - logging.warn("***** STARTING SERVER *****") - logging.warn("Server %s version %s", sys.argv[0], get_version_string(synapse)) - logging.info("Server hostname: %s", config.server_name) - - # It's critical to point twisted's internal logging somewhere, otherwise it - # stacks up and leaks kup to 64K object; - # see: https://twistedmatrix.com/trac/ticket/8164 - # - # Routing to the python logging framework could be a performance problem if - # the handlers blocked for a long time as python.logging is a blocking API - # see https://twistedmatrix.com/documents/current/core/howto/logger.html - # filed as https://github.com/matrix-org/synapse/issues/1727 - # - # However this may not be too much of a problem if we are just writing to a file. + # Route Twisted's native logging through to the standard library logging + # system. observer = STDLibLogObserver() def _log(event): @@ -201,3 +169,54 @@ def setup_logging(config, use_worker_options=False): ) if not config.no_redirect_stdio: print("Redirected stdout/stderr to logs") + + +def _reload_stdlib_logging(*args, log_config=None): + logger = logging.getLogger("") + + if not log_config: + logger.warn("Reloaded a blank config?") + + logging.config.dictConfig(log_config) + + +def setup_logging(hs, config, use_worker_options=False): + """ + Set up the logging subsystem. + + Args: + config (LoggingConfig | synapse.config.workers.WorkerConfig): + configuration data + + use_worker_options (bool): True to use the 'worker_log_config' option + instead of 'log_config'. + """ + log_config = config.worker_log_config if use_worker_options else config.log_config + + def read_config(*args, callback=None): + if log_config is None: + return None + + with open(log_config, "rb") as f: + log_config_body = yaml.safe_load(f.read()) + + if callback: + callback(log_config=log_config_body) + logging.info("Reloaded log config from %s due to SIGHUP", log_config) + + return log_config_body + + log_config_body = read_config() + + if log_config_body and log_config_body.get("structured") is True: + setup_structured_logging(hs, config, log_config_body) + appbase.register_sighup(read_config, callback=reload_structured_logging) + else: + _setup_stdlib_logging(config, log_config_body) + appbase.register_sighup(read_config, callback=_reload_stdlib_logging) + + # make sure that the first thing we log is a thing we can grep backwards + # for + logging.warn("***** STARTING SERVER *****") + logging.warn("Server %s version %s", sys.argv[0], get_version_string(synapse)) + logging.info("Server hostname: %s", config.server_name) diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index c86903b98b..94306c94a9 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -326,8 +326,9 @@ class FederationHandler(BaseHandler): ours = yield self.store.get_state_groups_ids(room_id, seen) # state_maps is a list of mappings from (type, state_key) to event_id - # type: list[dict[tuple[str, str], str]] - state_maps = list(ours.values()) + state_maps = list( + ours.values() + ) # type: list[dict[tuple[str, str], str]] # we don't need this any more, let's delete it. del ours diff --git a/synapse/logging/_structured.py b/synapse/logging/_structured.py new file mode 100644 index 0000000000..0367d6dfc4 --- /dev/null +++ b/synapse/logging/_structured.py @@ -0,0 +1,374 @@ +# -*- coding: utf-8 -*- +# Copyright 2019 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +import os.path +import sys +import typing +import warnings + +import attr +from constantly import NamedConstant, Names, ValueConstant, Values +from zope.interface import implementer + +from twisted.logger import ( + FileLogObserver, + FilteringLogObserver, + ILogObserver, + LogBeginner, + Logger, + LogLevel, + LogLevelFilterPredicate, + LogPublisher, + eventAsText, + globalLogBeginner, + jsonFileLogObserver, +) + +from synapse.config._base import ConfigError +from synapse.logging._terse_json import ( + TerseJSONToConsoleLogObserver, + TerseJSONToTCPLogObserver, +) +from synapse.logging.context import LoggingContext + + +def stdlib_log_level_to_twisted(level: str) -> LogLevel: + """ + Convert a stdlib log level to Twisted's log level. + """ + lvl = level.lower().replace("warning", "warn") + return LogLevel.levelWithName(lvl) + + +@attr.s +@implementer(ILogObserver) +class LogContextObserver(object): + """ + An ILogObserver which adds Synapse-specific log context information. + + Attributes: + observer (ILogObserver): The target parent observer. + """ + + observer = attr.ib() + + def __call__(self, event: dict) -> None: + """ + Consume a log event and emit it to the parent observer after filtering + and adding log context information. + + Args: + event (dict) + """ + # Filter out some useless events that Twisted outputs + if "log_text" in event: + if event["log_text"].startswith("DNSDatagramProtocol starting on "): + return + + if event["log_text"].startswith("(UDP Port "): + return + + if event["log_text"].startswith("Timing out client") or event[ + "log_format" + ].startswith("Timing out client"): + return + + context = LoggingContext.current_context() + + # Copy the context information to the log event. + if context is not None: + context.copy_to_twisted_log_entry(event) + else: + # If there's no logging context, not even the root one, we might be + # starting up or it might be from non-Synapse code. Log it as if it + # came from the root logger. + event["request"] = None + event["scope"] = None + + self.observer(event) + + +class PythonStdlibToTwistedLogger(logging.Handler): + """ + Transform a Python stdlib log message into a Twisted one. + """ + + def __init__(self, observer, *args, **kwargs): + """ + Args: + observer (ILogObserver): A Twisted logging observer. + *args, **kwargs: Args/kwargs to be passed to logging.Handler. + """ + self.observer = observer + super().__init__(*args, **kwargs) + + def emit(self, record: logging.LogRecord) -> None: + """ + Emit a record to Twisted's observer. + + Args: + record (logging.LogRecord) + """ + + self.observer( + { + "log_time": record.created, + "log_text": record.getMessage(), + "log_format": "{log_text}", + "log_namespace": record.name, + "log_level": stdlib_log_level_to_twisted(record.levelname), + } + ) + + +def SynapseFileLogObserver(outFile: typing.io.TextIO) -> FileLogObserver: + """ + A log observer that formats events like the traditional log formatter and + sends them to `outFile`. + + Args: + outFile (file object): The file object to write to. + """ + + def formatEvent(_event: dict) -> str: + event = dict(_event) + event["log_level"] = event["log_level"].name.upper() + event["log_format"] = "- {log_namespace} - {log_level} - {request} - " + ( + event.get("log_format", "{log_text}") or "{log_text}" + ) + return eventAsText(event, includeSystem=False) + "\n" + + return FileLogObserver(outFile, formatEvent) + + +class DrainType(Names): + CONSOLE = NamedConstant() + CONSOLE_JSON = NamedConstant() + CONSOLE_JSON_TERSE = NamedConstant() + FILE = NamedConstant() + FILE_JSON = NamedConstant() + NETWORK_JSON_TERSE = NamedConstant() + + +class OutputPipeType(Values): + stdout = ValueConstant(sys.__stdout__) + stderr = ValueConstant(sys.__stderr__) + + +@attr.s +class DrainConfiguration(object): + name = attr.ib() + type = attr.ib() + location = attr.ib() + options = attr.ib(default=None) + + +@attr.s +class NetworkJSONTerseOptions(object): + maximum_buffer = attr.ib(type=int) + + +DEFAULT_LOGGERS = {"synapse": {"level": "INFO"}} + + +def parse_drain_configs( + drains: dict +) -> typing.Generator[DrainConfiguration, None, None]: + """ + Parse the drain configurations. + + Args: + drains (dict): A list of drain configurations. + + Yields: + DrainConfiguration instances. + + Raises: + ConfigError: If any of the drain configuration items are invalid. + """ + for name, config in drains.items(): + if "type" not in config: + raise ConfigError("Logging drains require a 'type' key.") + + try: + logging_type = DrainType.lookupByName(config["type"].upper()) + except ValueError: + raise ConfigError( + "%s is not a known logging drain type." % (config["type"],) + ) + + if logging_type in [ + DrainType.CONSOLE, + DrainType.CONSOLE_JSON, + DrainType.CONSOLE_JSON_TERSE, + ]: + location = config.get("location") + if location is None or location not in ["stdout", "stderr"]: + raise ConfigError( + ( + "The %s drain needs the 'location' key set to " + "either 'stdout' or 'stderr'." + ) + % (logging_type,) + ) + + pipe = OutputPipeType.lookupByName(location).value + + yield DrainConfiguration(name=name, type=logging_type, location=pipe) + + elif logging_type in [DrainType.FILE, DrainType.FILE_JSON]: + if "location" not in config: + raise ConfigError( + "The %s drain needs the 'location' key set." % (logging_type,) + ) + + location = config.get("location") + if os.path.abspath(location) != location: + raise ConfigError( + "File paths need to be absolute, '%s' is a relative path" + % (location,) + ) + yield DrainConfiguration(name=name, type=logging_type, location=location) + + elif logging_type in [DrainType.NETWORK_JSON_TERSE]: + host = config.get("host") + port = config.get("port") + maximum_buffer = config.get("maximum_buffer", 1000) + yield DrainConfiguration( + name=name, + type=logging_type, + location=(host, port), + options=NetworkJSONTerseOptions(maximum_buffer=maximum_buffer), + ) + + else: + raise ConfigError( + "The %s drain type is currently not implemented." + % (config["type"].upper(),) + ) + + +def setup_structured_logging( + hs, + config, + log_config: dict, + logBeginner: LogBeginner = globalLogBeginner, + redirect_stdlib_logging: bool = True, +) -> LogPublisher: + """ + Set up Twisted's structured logging system. + + Args: + hs: The homeserver to use. + config (HomeserverConfig): The configuration of the Synapse homeserver. + log_config (dict): The log configuration to use. + """ + if config.no_redirect_stdio: + raise ConfigError( + "no_redirect_stdio cannot be defined using structured logging." + ) + + logger = Logger() + + if "drains" not in log_config: + raise ConfigError("The logging configuration requires a list of drains.") + + observers = [] + + for observer in parse_drain_configs(log_config["drains"]): + # Pipe drains + if observer.type == DrainType.CONSOLE: + logger.debug( + "Starting up the {name} console logger drain", name=observer.name + ) + observers.append(SynapseFileLogObserver(observer.location)) + elif observer.type == DrainType.CONSOLE_JSON: + logger.debug( + "Starting up the {name} JSON console logger drain", name=observer.name + ) + observers.append(jsonFileLogObserver(observer.location)) + elif observer.type == DrainType.CONSOLE_JSON_TERSE: + logger.debug( + "Starting up the {name} terse JSON console logger drain", + name=observer.name, + ) + observers.append( + TerseJSONToConsoleLogObserver(observer.location, metadata={}) + ) + + # File drains + elif observer.type == DrainType.FILE: + logger.debug("Starting up the {name} file logger drain", name=observer.name) + log_file = open(observer.location, "at", buffering=1, encoding="utf8") + observers.append(SynapseFileLogObserver(log_file)) + elif observer.type == DrainType.FILE_JSON: + logger.debug( + "Starting up the {name} JSON file logger drain", name=observer.name + ) + log_file = open(observer.location, "at", buffering=1, encoding="utf8") + observers.append(jsonFileLogObserver(log_file)) + + elif observer.type == DrainType.NETWORK_JSON_TERSE: + metadata = {"server_name": hs.config.server_name} + log_observer = TerseJSONToTCPLogObserver( + hs=hs, + host=observer.location[0], + port=observer.location[1], + metadata=metadata, + maximum_buffer=observer.options.maximum_buffer, + ) + log_observer.start() + observers.append(log_observer) + else: + # We should never get here, but, just in case, throw an error. + raise ConfigError("%s drain type cannot be configured" % (observer.type,)) + + publisher = LogPublisher(*observers) + log_filter = LogLevelFilterPredicate() + + for namespace, namespace_config in log_config.get( + "loggers", DEFAULT_LOGGERS + ).items(): + # Set the log level for twisted.logger.Logger namespaces + log_filter.setLogLevelForNamespace( + namespace, + stdlib_log_level_to_twisted(namespace_config.get("level", "INFO")), + ) + + # Also set the log levels for the stdlib logger namespaces, to prevent + # them getting to PythonStdlibToTwistedLogger and having to be formatted + if "level" in namespace_config: + logging.getLogger(namespace).setLevel(namespace_config.get("level")) + + f = FilteringLogObserver(publisher, [log_filter]) + lco = LogContextObserver(f) + + if redirect_stdlib_logging: + stuff_into_twisted = PythonStdlibToTwistedLogger(lco) + stdliblogger = logging.getLogger() + stdliblogger.addHandler(stuff_into_twisted) + + # Always redirect standard I/O, otherwise other logging outputs might miss + # it. + logBeginner.beginLoggingTo([lco], redirectStandardIO=True) + + return publisher + + +def reload_structured_logging(*args, log_config=None) -> None: + warnings.warn( + "Currently the structured logging system can not be reloaded, doing nothing" + ) diff --git a/synapse/logging/_terse_json.py b/synapse/logging/_terse_json.py new file mode 100644 index 0000000000..7f1e8f23fe --- /dev/null +++ b/synapse/logging/_terse_json.py @@ -0,0 +1,278 @@ +# -*- coding: utf-8 -*- +# Copyright 2019 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Log formatters that output terse JSON. +""" + +import sys +from collections import deque +from ipaddress import IPv4Address, IPv6Address, ip_address +from math import floor +from typing.io import TextIO + +import attr +from simplejson import dumps + +from twisted.application.internet import ClientService +from twisted.internet.endpoints import ( + HostnameEndpoint, + TCP4ClientEndpoint, + TCP6ClientEndpoint, +) +from twisted.internet.protocol import Factory, Protocol +from twisted.logger import FileLogObserver, Logger +from twisted.python.failure import Failure + + +def flatten_event(event: dict, metadata: dict, include_time: bool = False): + """ + Flatten a Twisted logging event to an dictionary capable of being sent + as a log event to a logging aggregation system. + + The format is vastly simplified and is not designed to be a "human readable + string" in the sense that traditional logs are. Instead, the structure is + optimised for searchability and filtering, with human-understandable log + keys. + + Args: + event (dict): The Twisted logging event we are flattening. + metadata (dict): Additional data to include with each log message. This + can be information like the server name. Since the target log + consumer does not know who we are other than by host IP, this + allows us to forward through static information. + include_time (bool): Should we include the `time` key? If False, the + event time is stripped from the event. + """ + new_event = {} + + # If it's a failure, make the new event's log_failure be the traceback text. + if "log_failure" in event: + new_event["log_failure"] = event["log_failure"].getTraceback() + + # If it's a warning, copy over a string representation of the warning. + if "warning" in event: + new_event["warning"] = str(event["warning"]) + + # Stdlib logging events have "log_text" as their human-readable portion, + # Twisted ones have "log_format". For now, include the log_format, so that + # context only given in the log format (e.g. what is being logged) is + # available. + if "log_text" in event: + new_event["log"] = event["log_text"] + else: + new_event["log"] = event["log_format"] + + # We want to include the timestamp when forwarding over the network, but + # exclude it when we are writing to stdout. This is because the log ingester + # (e.g. logstash, fluentd) can add its own timestamp. + if include_time: + new_event["time"] = round(event["log_time"], 2) + + # Convert the log level to a textual representation. + new_event["level"] = event["log_level"].name.upper() + + # Ignore these keys, and do not transfer them over to the new log object. + # They are either useless (isError), transferred manually above (log_time, + # log_level, etc), or contain Python objects which are not useful for output + # (log_logger, log_source). + keys_to_delete = [ + "isError", + "log_failure", + "log_format", + "log_level", + "log_logger", + "log_source", + "log_system", + "log_time", + "log_text", + "observer", + "warning", + ] + + # If it's from the Twisted legacy logger (twisted.python.log), it adds some + # more keys we want to purge. + if event.get("log_namespace") == "log_legacy": + keys_to_delete.extend(["message", "system", "time"]) + + # Rather than modify the dictionary in place, construct a new one with only + # the content we want. The original event should be considered 'frozen'. + for key in event.keys(): + + if key in keys_to_delete: + continue + + if isinstance(event[key], (str, int, bool, float)) or event[key] is None: + # If it's a plain type, include it as is. + new_event[key] = event[key] + else: + # If it's not one of those basic types, write out a string + # representation. This should probably be a warning in development, + # so that we are sure we are only outputting useful data. + new_event[key] = str(event[key]) + + # Add the metadata information to the event (e.g. the server_name). + new_event.update(metadata) + + return new_event + + +def TerseJSONToConsoleLogObserver(outFile: TextIO, metadata: dict) -> FileLogObserver: + """ + A log observer that formats events to a flattened JSON representation. + + Args: + outFile: The file object to write to. + metadata: Metadata to be added to each log object. + """ + + def formatEvent(_event: dict) -> str: + flattened = flatten_event(_event, metadata) + return dumps(flattened, ensure_ascii=False, separators=(",", ":")) + "\n" + + return FileLogObserver(outFile, formatEvent) + + +@attr.s +class TerseJSONToTCPLogObserver(object): + """ + An IObserver that writes JSON logs to a TCP target. + + Args: + hs (HomeServer): The Homeserver that is being logged for. + host: The host of the logging target. + port: The logging target's port. + metadata: Metadata to be added to each log entry. + """ + + hs = attr.ib() + host = attr.ib(type=str) + port = attr.ib(type=int) + metadata = attr.ib(type=dict) + maximum_buffer = attr.ib(type=int) + _buffer = attr.ib(default=attr.Factory(deque), type=deque) + _writer = attr.ib(default=None) + _logger = attr.ib(default=attr.Factory(Logger)) + + def start(self) -> None: + + # Connect without DNS lookups if it's a direct IP. + try: + ip = ip_address(self.host) + if isinstance(ip, IPv4Address): + endpoint = TCP4ClientEndpoint( + self.hs.get_reactor(), self.host, self.port + ) + elif isinstance(ip, IPv6Address): + endpoint = TCP6ClientEndpoint( + self.hs.get_reactor(), self.host, self.port + ) + except ValueError: + endpoint = HostnameEndpoint(self.hs.get_reactor(), self.host, self.port) + + factory = Factory.forProtocol(Protocol) + self._service = ClientService(endpoint, factory, clock=self.hs.get_reactor()) + self._service.startService() + + def _write_loop(self) -> None: + """ + Implement the write loop. + """ + if self._writer: + return + + self._writer = self._service.whenConnected() + + @self._writer.addBoth + def writer(r): + if isinstance(r, Failure): + r.printTraceback(file=sys.__stderr__) + self._writer = None + self.hs.get_reactor().callLater(1, self._write_loop) + return + + try: + for event in self._buffer: + r.transport.write( + dumps(event, ensure_ascii=False, separators=(",", ":")).encode( + "utf8" + ) + ) + r.transport.write(b"\n") + self._buffer.clear() + except Exception as e: + sys.__stderr__.write("Failed writing out logs with %s\n" % (str(e),)) + + self._writer = False + self.hs.get_reactor().callLater(1, self._write_loop) + + def _handle_pressure(self) -> None: + """ + Handle backpressure by shedding events. + + The buffer will, in this order, until the buffer is below the maximum: + - Shed DEBUG events + - Shed INFO events + - Shed the middle 50% of the events. + """ + if len(self._buffer) <= self.maximum_buffer: + return + + # Strip out DEBUGs + self._buffer = deque( + filter(lambda event: event["level"] != "DEBUG", self._buffer) + ) + + if len(self._buffer) <= self.maximum_buffer: + return + + # Strip out INFOs + self._buffer = deque( + filter(lambda event: event["level"] != "INFO", self._buffer) + ) + + if len(self._buffer) <= self.maximum_buffer: + return + + # Cut the middle entries out + buffer_split = floor(self.maximum_buffer / 2) + + old_buffer = self._buffer + self._buffer = deque() + + for i in range(buffer_split): + self._buffer.append(old_buffer.popleft()) + + end_buffer = [] + for i in range(buffer_split): + end_buffer.append(old_buffer.pop()) + + self._buffer.extend(reversed(end_buffer)) + + def __call__(self, event: dict) -> None: + flattened = flatten_event(event, self.metadata, include_time=True) + self._buffer.append(flattened) + + # Handle backpressure, if it exists. + try: + self._handle_pressure() + except Exception: + # If handling backpressure fails,clear the buffer and log the + # exception. + self._buffer.clear() + self._logger.failure("Failed clearing backpressure") + + # Try and write immediately. + self._write_loop() diff --git a/synapse/logging/context.py b/synapse/logging/context.py index b456c31f70..63379bfb93 100644 --- a/synapse/logging/context.py +++ b/synapse/logging/context.py @@ -25,6 +25,7 @@ See doc/log_contexts.rst for details on how this works. import logging import threading import types +from typing import Any, List from twisted.internet import defer, threads @@ -194,7 +195,7 @@ class LoggingContext(object): class Sentinel(object): """Sentinel to represent the root context""" - __slots__ = [] + __slots__ = [] # type: List[Any] def __str__(self): return "sentinel" @@ -202,6 +203,10 @@ class LoggingContext(object): def copy_to(self, record): pass + def copy_to_twisted_log_entry(self, record): + record["request"] = None + record["scope"] = None + def start(self): pass @@ -330,6 +335,13 @@ class LoggingContext(object): # we also track the current scope: record.scope = self.scope + def copy_to_twisted_log_entry(self, record): + """ + Copy logging fields from this context to a Twisted log record. + """ + record["request"] = self.request + record["scope"] = self.scope + def start(self): if get_thread_id() != self.main_thread: logger.warning("Started logcontext %s on different thread", self) diff --git a/synapse/python_dependencies.py b/synapse/python_dependencies.py index c6465c0386..ec0ac547c1 100644 --- a/synapse/python_dependencies.py +++ b/synapse/python_dependencies.py @@ -47,9 +47,9 @@ REQUIREMENTS = [ "idna>=2.5", # validating SSL certs for IP addresses requires service_identity 18.1. "service_identity>=18.1.0", - # our logcontext handling relies on the ability to cancel inlineCallbacks - # (https://twistedmatrix.com/trac/ticket/4632) which landed in Twisted 18.7. - "Twisted>=18.7.0", + # Twisted 18.9 introduces some logger improvements that the structured + # logger utilises + "Twisted>=18.9.0", "treq>=15.1", # Twisted has required pyopenssl 16.0 since about Twisted 16.6. "pyopenssl>=16.0.0", diff --git a/tests/logging/__init__.py b/tests/logging/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/logging/test_structured.py b/tests/logging/test_structured.py new file mode 100644 index 0000000000..a786de0233 --- /dev/null +++ b/tests/logging/test_structured.py @@ -0,0 +1,197 @@ +# -*- coding: utf-8 -*- +# Copyright 2019 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import os.path +import shutil +import sys +import textwrap + +from twisted.logger import Logger, eventAsText, eventsFromJSONLogFile + +from synapse.config.logger import setup_logging +from synapse.logging._structured import setup_structured_logging +from synapse.logging.context import LoggingContext + +from tests.unittest import DEBUG, HomeserverTestCase + + +class FakeBeginner(object): + def beginLoggingTo(self, observers, **kwargs): + self.observers = observers + + +class StructuredLoggingTestCase(HomeserverTestCase): + """ + Tests for Synapse's structured logging support. + """ + + def test_output_to_json_round_trip(self): + """ + Synapse logs can be outputted to JSON and then read back again. + """ + temp_dir = self.mktemp() + os.mkdir(temp_dir) + self.addCleanup(shutil.rmtree, temp_dir) + + json_log_file = os.path.abspath(os.path.join(temp_dir, "out.json")) + + log_config = { + "drains": {"jsonfile": {"type": "file_json", "location": json_log_file}} + } + + # Begin the logger with our config + beginner = FakeBeginner() + setup_structured_logging( + self.hs, self.hs.config, log_config, logBeginner=beginner + ) + + # Make a logger and send an event + logger = Logger( + namespace="tests.logging.test_structured", observer=beginner.observers[0] + ) + logger.info("Hello there, {name}!", name="wally") + + # Read the log file and check it has the event we sent + with open(json_log_file, "r") as f: + logged_events = list(eventsFromJSONLogFile(f)) + self.assertEqual(len(logged_events), 1) + + # The event pulled from the file should render fine + self.assertEqual( + eventAsText(logged_events[0], includeTimestamp=False), + "[tests.logging.test_structured#info] Hello there, wally!", + ) + + def test_output_to_text(self): + """ + Synapse logs can be outputted to text. + """ + temp_dir = self.mktemp() + os.mkdir(temp_dir) + self.addCleanup(shutil.rmtree, temp_dir) + + log_file = os.path.abspath(os.path.join(temp_dir, "out.log")) + + log_config = {"drains": {"file": {"type": "file", "location": log_file}}} + + # Begin the logger with our config + beginner = FakeBeginner() + setup_structured_logging( + self.hs, self.hs.config, log_config, logBeginner=beginner + ) + + # Make a logger and send an event + logger = Logger( + namespace="tests.logging.test_structured", observer=beginner.observers[0] + ) + logger.info("Hello there, {name}!", name="wally") + + # Read the log file and check it has the event we sent + with open(log_file, "r") as f: + logged_events = f.read().strip().split("\n") + self.assertEqual(len(logged_events), 1) + + # The event pulled from the file should render fine + self.assertTrue( + logged_events[0].endswith( + " - tests.logging.test_structured - INFO - None - Hello there, wally!" + ) + ) + + def test_collects_logcontext(self): + """ + Test that log outputs have the attached logging context. + """ + log_config = {"drains": {}} + + # Begin the logger with our config + beginner = FakeBeginner() + publisher = setup_structured_logging( + self.hs, self.hs.config, log_config, logBeginner=beginner + ) + + logs = [] + + publisher.addObserver(logs.append) + + # Make a logger and send an event + logger = Logger( + namespace="tests.logging.test_structured", observer=beginner.observers[0] + ) + + with LoggingContext("testcontext", request="somereq"): + logger.info("Hello there, {name}!", name="steve") + + self.assertEqual(len(logs), 1) + self.assertEqual(logs[0]["request"], "somereq") + + +class StructuredLoggingConfigurationFileTestCase(HomeserverTestCase): + def make_homeserver(self, reactor, clock): + + tempdir = self.mktemp() + os.mkdir(tempdir) + log_config_file = os.path.abspath(os.path.join(tempdir, "log.config.yaml")) + self.homeserver_log = os.path.abspath(os.path.join(tempdir, "homeserver.log")) + + config = self.default_config() + config["log_config"] = log_config_file + + with open(log_config_file, "w") as f: + f.write( + textwrap.dedent( + """\ + structured: true + + drains: + file: + type: file_json + location: %s + """ + % (self.homeserver_log,) + ) + ) + + self.addCleanup(self._sys_cleanup) + + return self.setup_test_homeserver(config=config) + + def _sys_cleanup(self): + sys.stdout = sys.__stdout__ + sys.stderr = sys.__stderr__ + + # Do not remove! We need the logging system to be set other than WARNING. + @DEBUG + def test_log_output(self): + """ + When a structured logging config is given, Synapse will use it. + """ + setup_logging(self.hs, self.hs.config) + + # Make a logger and send an event + logger = Logger(namespace="tests.logging.test_structured") + + with LoggingContext("testcontext", request="somereq"): + logger.info("Hello there, {name}!", name="steve") + + with open(self.homeserver_log, "r") as f: + logged_events = [ + eventAsText(x, includeTimestamp=False) for x in eventsFromJSONLogFile(f) + ] + + logs = "\n".join(logged_events) + self.assertTrue("***** STARTING SERVER *****" in logs) + self.assertTrue("Hello there, steve!" in logs) diff --git a/tests/logging/test_terse_json.py b/tests/logging/test_terse_json.py new file mode 100644 index 0000000000..514282591d --- /dev/null +++ b/tests/logging/test_terse_json.py @@ -0,0 +1,234 @@ +# -*- coding: utf-8 -*- +# Copyright 2019 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json +from collections import Counter + +from twisted.logger import Logger + +from synapse.logging._structured import setup_structured_logging + +from tests.server import connect_client +from tests.unittest import HomeserverTestCase + +from .test_structured import FakeBeginner + + +class TerseJSONTCPTestCase(HomeserverTestCase): + def test_log_output(self): + """ + The Terse JSON outputter delivers simplified structured logs over TCP. + """ + log_config = { + "drains": { + "tersejson": { + "type": "network_json_terse", + "host": "127.0.0.1", + "port": 8000, + } + } + } + + # Begin the logger with our config + beginner = FakeBeginner() + setup_structured_logging( + self.hs, self.hs.config, log_config, logBeginner=beginner + ) + + logger = Logger( + namespace="tests.logging.test_terse_json", observer=beginner.observers[0] + ) + logger.info("Hello there, {name}!", name="wally") + + # Trigger the connection + self.pump() + + _, server = connect_client(self.reactor, 0) + + # Trigger data being sent + self.pump() + + # One log message, with a single trailing newline + logs = server.data.decode("utf8").splitlines() + self.assertEqual(len(logs), 1) + self.assertEqual(server.data.count(b"\n"), 1) + + log = json.loads(logs[0]) + + # The terse logger should give us these keys. + expected_log_keys = [ + "log", + "time", + "level", + "log_namespace", + "request", + "scope", + "server_name", + "name", + ] + self.assertEqual(set(log.keys()), set(expected_log_keys)) + + # It contains the data we expect. + self.assertEqual(log["name"], "wally") + + def test_log_backpressure_debug(self): + """ + When backpressure is hit, DEBUG logs will be shed. + """ + log_config = { + "loggers": {"synapse": {"level": "DEBUG"}}, + "drains": { + "tersejson": { + "type": "network_json_terse", + "host": "127.0.0.1", + "port": 8000, + "maximum_buffer": 10, + } + }, + } + + # Begin the logger with our config + beginner = FakeBeginner() + setup_structured_logging( + self.hs, + self.hs.config, + log_config, + logBeginner=beginner, + redirect_stdlib_logging=False, + ) + + logger = Logger( + namespace="synapse.logging.test_terse_json", observer=beginner.observers[0] + ) + + # Send some debug messages + for i in range(0, 3): + logger.debug("debug %s" % (i,)) + + # Send a bunch of useful messages + for i in range(0, 7): + logger.info("test message %s" % (i,)) + + # The last debug message pushes it past the maximum buffer + logger.debug("too much debug") + + # Allow the reconnection + _, server = connect_client(self.reactor, 0) + self.pump() + + # Only the 7 infos made it through, the debugs were elided + logs = server.data.splitlines() + self.assertEqual(len(logs), 7) + + def test_log_backpressure_info(self): + """ + When backpressure is hit, DEBUG and INFO logs will be shed. + """ + log_config = { + "loggers": {"synapse": {"level": "DEBUG"}}, + "drains": { + "tersejson": { + "type": "network_json_terse", + "host": "127.0.0.1", + "port": 8000, + "maximum_buffer": 10, + } + }, + } + + # Begin the logger with our config + beginner = FakeBeginner() + setup_structured_logging( + self.hs, + self.hs.config, + log_config, + logBeginner=beginner, + redirect_stdlib_logging=False, + ) + + logger = Logger( + namespace="synapse.logging.test_terse_json", observer=beginner.observers[0] + ) + + # Send some debug messages + for i in range(0, 3): + logger.debug("debug %s" % (i,)) + + # Send a bunch of useful messages + for i in range(0, 10): + logger.warn("test warn %s" % (i,)) + + # Send a bunch of info messages + for i in range(0, 3): + logger.info("test message %s" % (i,)) + + # The last debug message pushes it past the maximum buffer + logger.debug("too much debug") + + # Allow the reconnection + client, server = connect_client(self.reactor, 0) + self.pump() + + # The 10 warnings made it through, the debugs and infos were elided + logs = list(map(json.loads, server.data.decode("utf8").splitlines())) + self.assertEqual(len(logs), 10) + + self.assertEqual(Counter([x["level"] for x in logs]), {"WARN": 10}) + + def test_log_backpressure_cut_middle(self): + """ + When backpressure is hit, and no more DEBUG and INFOs cannot be culled, + it will cut the middle messages out. + """ + log_config = { + "loggers": {"synapse": {"level": "DEBUG"}}, + "drains": { + "tersejson": { + "type": "network_json_terse", + "host": "127.0.0.1", + "port": 8000, + "maximum_buffer": 10, + } + }, + } + + # Begin the logger with our config + beginner = FakeBeginner() + setup_structured_logging( + self.hs, + self.hs.config, + log_config, + logBeginner=beginner, + redirect_stdlib_logging=False, + ) + + logger = Logger( + namespace="synapse.logging.test_terse_json", observer=beginner.observers[0] + ) + + # Send a bunch of useful messages + for i in range(0, 20): + logger.warn("test warn", num=i) + + # Allow the reconnection + client, server = connect_client(self.reactor, 0) + self.pump() + + # The first five and last five warnings made it through, the debugs and + # infos were elided + logs = list(map(json.loads, server.data.decode("utf8").splitlines())) + self.assertEqual(len(logs), 10) + self.assertEqual(Counter([x["level"] for x in logs]), {"WARN": 10}) + self.assertEqual([0, 1, 2, 3, 4, 15, 16, 17, 18, 19], [x["num"] for x in logs]) diff --git a/tests/server.py b/tests/server.py index e573c4e4c5..c8269619b1 100644 --- a/tests/server.py +++ b/tests/server.py @@ -11,9 +11,13 @@ from twisted.internet import address, threads, udp from twisted.internet._resolver import SimpleResolverComplexifier from twisted.internet.defer import Deferred, fail, succeed from twisted.internet.error import DNSLookupError -from twisted.internet.interfaces import IReactorPluggableNameResolver, IResolverSimple +from twisted.internet.interfaces import ( + IReactorPluggableNameResolver, + IReactorTCP, + IResolverSimple, +) from twisted.python.failure import Failure -from twisted.test.proto_helpers import MemoryReactorClock +from twisted.test.proto_helpers import AccumulatingProtocol, MemoryReactorClock from twisted.web.http import unquote from twisted.web.http_headers import Headers @@ -465,3 +469,22 @@ class FakeTransport(object): self.buffer = self.buffer[len(to_write) :] if self.buffer and self.autoflush: self._reactor.callLater(0.0, self.flush) + + +def connect_client(reactor: IReactorTCP, client_id: int) -> AccumulatingProtocol: + """ + Connect a client to a fake TCP transport. + + Args: + reactor + factory: The connecting factory to build. + """ + factory = reactor.tcpClients[client_id][2] + client = factory.buildProtocol(None) + server = AccumulatingProtocol() + server.makeConnection(FakeTransport(client, reactor)) + client.makeConnection(FakeTransport(server, reactor)) + + reactor.tcpClients.pop(client_id) + + return client, server diff --git a/tox.ini b/tox.ini index 09b4b8fc3c..f9a3b7e49a 100644 --- a/tox.ini +++ b/tox.ini @@ -146,3 +146,13 @@ commands = coverage combine coverage xml codecov -X gcov + +[testenv:mypy] +basepython = python3.5 +deps = + {[base]deps} + mypy +extras = all +commands = mypy --ignore-missing-imports \ + synapse/logging/_structured.py \ + synapse/logging/_terse_json.py \ No newline at end of file From 6d97843793d59bc5d307475a6a6185ff107e116b Mon Sep 17 00:00:00 2001 From: Jorik Schellekens Date: Wed, 28 Aug 2019 13:12:22 +0100 Subject: [PATCH 106/110] Config templating (#5900) Template config files * Imagine a system composed entirely of x, y, z etc and the basic operations.. Wait George, why XOR? Why not just neq? George: Eh, I didn't think of that.. Co-Authored-By: Erik Johnston --- changelog.d/5900.feature | 1 + docs/sample_config.yaml | 16 +++--- synapse/config/_base.py | 37 +++++++++++++ synapse/config/database.py | 27 ++++++--- synapse/config/server.py | 84 ++++++++++++++++++++++------ synapse/config/tls.py | 50 +++++++++++++---- tests/config/test_database.py | 52 +++++++++++++++++ tests/config/test_server.py | 101 +++++++++++++++++++++++++++++++++- tests/config/test_tls.py | 44 +++++++++++++++ 9 files changed, 366 insertions(+), 46 deletions(-) create mode 100644 changelog.d/5900.feature create mode 100644 tests/config/test_database.py diff --git a/changelog.d/5900.feature b/changelog.d/5900.feature new file mode 100644 index 0000000000..b62d88a76b --- /dev/null +++ b/changelog.d/5900.feature @@ -0,0 +1 @@ +Add support for config templating. diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml index ae1cafc5f3..6da1167632 100644 --- a/docs/sample_config.yaml +++ b/docs/sample_config.yaml @@ -205,9 +205,9 @@ listeners: # - port: 8008 tls: false - bind_addresses: ['::1', '127.0.0.1'] type: http x_forwarded: true + bind_addresses: ['::1', '127.0.0.1'] resources: - names: [client, federation] @@ -392,10 +392,10 @@ listeners: # permission to listen on port 80. # acme: - # ACME support is disabled by default. Uncomment the following line - # (and tls_certificate_path and tls_private_key_path above) to enable it. + # ACME support is disabled by default. Set this to `true` and uncomment + # tls_certificate_path and tls_private_key_path above to enable it. # - #enabled: true + enabled: False # Endpoint to use to request certificates. If you only want to test, # use Let's Encrypt's staging url: @@ -406,17 +406,17 @@ acme: # Port number to listen on for the HTTP-01 challenge. Change this if # you are forwarding connections through Apache/Nginx/etc. # - #port: 80 + port: 80 # Local addresses to listen on for incoming connections. # Again, you may want to change this if you are forwarding connections # through Apache/Nginx/etc. # - #bind_addresses: ['::', '0.0.0.0'] + bind_addresses: ['::', '0.0.0.0'] # How many days remaining on a certificate before it is renewed. # - #reprovision_threshold: 30 + reprovision_threshold: 30 # The domain that the certificate should be for. Normally this # should be the same as your Matrix domain (i.e., 'server_name'), but, @@ -430,7 +430,7 @@ acme: # # If not set, defaults to your 'server_name'. # - #domain: matrix.example.com + domain: matrix.example.com # file to use for the account key. This will be generated if it doesn't # exist. diff --git a/synapse/config/_base.py b/synapse/config/_base.py index 6ce5cd07fb..31f6530978 100644 --- a/synapse/config/_base.py +++ b/synapse/config/_base.py @@ -181,6 +181,11 @@ class Config(object): generate_secrets=False, report_stats=None, open_private_ports=False, + listeners=None, + database_conf=None, + tls_certificate_path=None, + tls_private_key_path=None, + acme_domain=None, ): """Build a default configuration file @@ -207,6 +212,33 @@ class Config(object): open_private_ports (bool): True to leave private ports (such as the non-TLS HTTP listener) open to the internet. + listeners (list(dict)|None): A list of descriptions of the listeners + synapse should start with each of which specifies a port (str), a list of + resources (list(str)), tls (bool) and type (str). For example: + [{ + "port": 8448, + "resources": [{"names": ["federation"]}], + "tls": True, + "type": "http", + }, + { + "port": 443, + "resources": [{"names": ["client"]}], + "tls": False, + "type": "http", + }], + + + database (str|None): The database type to configure, either `psycog2` + or `sqlite3`. + + tls_certificate_path (str|None): The path to the tls certificate. + + tls_private_key_path (str|None): The path to the tls private key. + + acme_domain (str|None): The domain acme will try to validate. If + specified acme will be enabled. + Returns: str: the yaml config file """ @@ -220,6 +252,11 @@ class Config(object): generate_secrets=generate_secrets, report_stats=report_stats, open_private_ports=open_private_ports, + listeners=listeners, + database_conf=database_conf, + tls_certificate_path=tls_certificate_path, + tls_private_key_path=tls_private_key_path, + acme_domain=acme_domain, ) ) diff --git a/synapse/config/database.py b/synapse/config/database.py index 746a6cd1f4..118aafbd4a 100644 --- a/synapse/config/database.py +++ b/synapse/config/database.py @@ -13,6 +13,9 @@ # See the License for the specific language governing permissions and # limitations under the License. import os +from textwrap import indent + +import yaml from ._base import Config @@ -38,20 +41,28 @@ class DatabaseConfig(Config): self.set_databasepath(config.get("database_path")) - def generate_config_section(self, data_dir_path, **kwargs): - database_path = os.path.join(data_dir_path, "homeserver.db") - return ( - """\ - ## Database ## - - database: - # The database engine name + def generate_config_section(self, data_dir_path, database_conf, **kwargs): + if not database_conf: + database_path = os.path.join(data_dir_path, "homeserver.db") + database_conf = ( + """# The database engine name name: "sqlite3" # Arguments to pass to the engine args: # Path to the database database: "%(database_path)s" + """ + % locals() + ) + else: + database_conf = indent(yaml.dump(database_conf), " " * 10).lstrip() + return ( + """\ + ## Database ## + + database: + %(database_conf)s # Number of events to cache in memory. # #event_cache_size: 10K diff --git a/synapse/config/server.py b/synapse/config/server.py index 15449695d1..2abdef0971 100644 --- a/synapse/config/server.py +++ b/synapse/config/server.py @@ -17,8 +17,11 @@ import logging import os.path +import re +from textwrap import indent import attr +import yaml from netaddr import IPSet from synapse.api.room_versions import KNOWN_ROOM_VERSIONS @@ -352,7 +355,7 @@ class ServerConfig(Config): return any(l["tls"] for l in self.listeners) def generate_config_section( - self, server_name, data_dir_path, open_private_ports, **kwargs + self, server_name, data_dir_path, open_private_ports, listeners, **kwargs ): _, bind_port = parse_and_validate_server_name(server_name) if bind_port is not None: @@ -366,11 +369,68 @@ class ServerConfig(Config): # Bring DEFAULT_ROOM_VERSION into the local-scope for use in the # default config string default_room_version = DEFAULT_ROOM_VERSION + secure_listeners = [] + unsecure_listeners = [] + private_addresses = ["::1", "127.0.0.1"] + if listeners: + for listener in listeners: + if listener["tls"]: + secure_listeners.append(listener) + else: + # If we don't want open ports we need to bind the listeners + # to some address other than 0.0.0.0. Here we chose to use + # localhost. + # If the addresses are already bound we won't overwrite them + # however. + if not open_private_ports: + listener.setdefault("bind_addresses", private_addresses) - unsecure_http_binding = "port: %i\n tls: false" % (unsecure_port,) - if not open_private_ports: - unsecure_http_binding += ( - "\n bind_addresses: ['::1', '127.0.0.1']" + unsecure_listeners.append(listener) + + secure_http_bindings = indent( + yaml.dump(secure_listeners), " " * 10 + ).lstrip() + + unsecure_http_bindings = indent( + yaml.dump(unsecure_listeners), " " * 10 + ).lstrip() + + if not unsecure_listeners: + unsecure_http_bindings = ( + """- port: %(unsecure_port)s + tls: false + type: http + x_forwarded: true""" + % locals() + ) + + if not open_private_ports: + unsecure_http_bindings += ( + "\n bind_addresses: ['::1', '127.0.0.1']" + ) + + unsecure_http_bindings += """ + + resources: + - names: [client, federation] + compress: false""" + + if listeners: + # comment out this block + unsecure_http_bindings = "#" + re.sub( + "\n {10}", + lambda match: match.group(0) + "#", + unsecure_http_bindings, + ) + + if not secure_listeners: + secure_http_bindings = ( + """#- port: %(bind_port)s + # type: http + # tls: true + # resources: + # - names: [client, federation]""" + % locals() ) return ( @@ -556,11 +616,7 @@ class ServerConfig(Config): # will also need to give Synapse a TLS key and certificate: see the TLS section # below.) # - #- port: %(bind_port)s - # type: http - # tls: true - # resources: - # - names: [client, federation] + %(secure_http_bindings)s # Unsecure HTTP listener: for when matrix traffic passes through a reverse proxy # that unwraps TLS. @@ -568,13 +624,7 @@ class ServerConfig(Config): # If you plan to use a reverse proxy, please see # https://github.com/matrix-org/synapse/blob/master/docs/reverse_proxy.rst. # - - %(unsecure_http_binding)s - type: http - x_forwarded: true - - resources: - - names: [client, federation] - compress: false + %(unsecure_http_bindings)s # example additional_resources: # diff --git a/synapse/config/tls.py b/synapse/config/tls.py index ca508a224f..c0148aa95c 100644 --- a/synapse/config/tls.py +++ b/synapse/config/tls.py @@ -239,12 +239,38 @@ class TlsConfig(Config): self.tls_fingerprints.append({"sha256": sha256_fingerprint}) def generate_config_section( - self, config_dir_path, server_name, data_dir_path, **kwargs + self, + config_dir_path, + server_name, + data_dir_path, + tls_certificate_path, + tls_private_key_path, + acme_domain, + **kwargs ): + """If the acme_domain is specified acme will be enabled. + If the TLS paths are not specified the default will be certs in the + config directory""" + base_key_name = os.path.join(config_dir_path, server_name) - tls_certificate_path = base_key_name + ".tls.crt" - tls_private_key_path = base_key_name + ".tls.key" + if bool(tls_certificate_path) != bool(tls_private_key_path): + raise ConfigError( + "Please specify both a cert path and a key path or neither." + ) + + tls_enabled = ( + "" if tls_certificate_path and tls_private_key_path or acme_domain else "#" + ) + + if not tls_certificate_path: + tls_certificate_path = base_key_name + ".tls.crt" + if not tls_private_key_path: + tls_private_key_path = base_key_name + ".tls.key" + + acme_enabled = bool(acme_domain) + acme_domain = "matrix.example.com" + default_acme_account_file = os.path.join(data_dir_path, "acme_account.key") # this is to avoid the max line length. Sorrynotsorry @@ -269,11 +295,11 @@ class TlsConfig(Config): # instance, if using certbot, use `fullchain.pem` as your certificate, # not `cert.pem`). # - #tls_certificate_path: "%(tls_certificate_path)s" + %(tls_enabled)stls_certificate_path: "%(tls_certificate_path)s" # PEM-encoded private key for TLS # - #tls_private_key_path: "%(tls_private_key_path)s" + %(tls_enabled)stls_private_key_path: "%(tls_private_key_path)s" # Whether to verify TLS server certificates for outbound federation requests. # @@ -340,10 +366,10 @@ class TlsConfig(Config): # permission to listen on port 80. # acme: - # ACME support is disabled by default. Uncomment the following line - # (and tls_certificate_path and tls_private_key_path above) to enable it. + # ACME support is disabled by default. Set this to `true` and uncomment + # tls_certificate_path and tls_private_key_path above to enable it. # - #enabled: true + enabled: %(acme_enabled)s # Endpoint to use to request certificates. If you only want to test, # use Let's Encrypt's staging url: @@ -354,17 +380,17 @@ class TlsConfig(Config): # Port number to listen on for the HTTP-01 challenge. Change this if # you are forwarding connections through Apache/Nginx/etc. # - #port: 80 + port: 80 # Local addresses to listen on for incoming connections. # Again, you may want to change this if you are forwarding connections # through Apache/Nginx/etc. # - #bind_addresses: ['::', '0.0.0.0'] + bind_addresses: ['::', '0.0.0.0'] # How many days remaining on a certificate before it is renewed. # - #reprovision_threshold: 30 + reprovision_threshold: 30 # The domain that the certificate should be for. Normally this # should be the same as your Matrix domain (i.e., 'server_name'), but, @@ -378,7 +404,7 @@ class TlsConfig(Config): # # If not set, defaults to your 'server_name'. # - #domain: matrix.example.com + domain: %(acme_domain)s # file to use for the account key. This will be generated if it doesn't # exist. diff --git a/tests/config/test_database.py b/tests/config/test_database.py new file mode 100644 index 0000000000..151d3006ac --- /dev/null +++ b/tests/config/test_database.py @@ -0,0 +1,52 @@ +# -*- coding: utf-8 -*- +# Copyright 2019 New Vector Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import yaml + +from synapse.config.database import DatabaseConfig + +from tests import unittest + + +class DatabaseConfigTestCase(unittest.TestCase): + def test_database_configured_correctly_no_database_conf_param(self): + conf = yaml.safe_load( + DatabaseConfig().generate_config_section("/data_dir_path", None) + ) + + expected_database_conf = { + "name": "sqlite3", + "args": {"database": "/data_dir_path/homeserver.db"}, + } + + self.assertEqual(conf["database"], expected_database_conf) + + def test_database_configured_correctly_database_conf_param(self): + + database_conf = { + "name": "my super fast datastore", + "args": { + "user": "matrix", + "password": "synapse_database_password", + "host": "synapse_database_host", + "database": "matrix", + }, + } + + conf = yaml.safe_load( + DatabaseConfig().generate_config_section("/data_dir_path", database_conf) + ) + + self.assertEqual(conf["database"], database_conf) diff --git a/tests/config/test_server.py b/tests/config/test_server.py index 1ca5ea54ca..a10d017120 100644 --- a/tests/config/test_server.py +++ b/tests/config/test_server.py @@ -13,7 +13,9 @@ # See the License for the specific language governing permissions and # limitations under the License. -from synapse.config.server import is_threepid_reserved +import yaml + +from synapse.config.server import ServerConfig, is_threepid_reserved from tests import unittest @@ -29,3 +31,100 @@ class ServerConfigTestCase(unittest.TestCase): self.assertTrue(is_threepid_reserved(config, user1)) self.assertFalse(is_threepid_reserved(config, user3)) self.assertFalse(is_threepid_reserved(config, user1_msisdn)) + + def test_unsecure_listener_no_listeners_open_private_ports_false(self): + conf = yaml.safe_load( + ServerConfig().generate_config_section( + "che.org", "/data_dir_path", False, None + ) + ) + + expected_listeners = [ + { + "port": 8008, + "tls": False, + "type": "http", + "x_forwarded": True, + "bind_addresses": ["::1", "127.0.0.1"], + "resources": [{"names": ["client", "federation"], "compress": False}], + } + ] + + self.assertEqual(conf["listeners"], expected_listeners) + + def test_unsecure_listener_no_listeners_open_private_ports_true(self): + conf = yaml.safe_load( + ServerConfig().generate_config_section( + "che.org", "/data_dir_path", True, None + ) + ) + + expected_listeners = [ + { + "port": 8008, + "tls": False, + "type": "http", + "x_forwarded": True, + "resources": [{"names": ["client", "federation"], "compress": False}], + } + ] + + self.assertEqual(conf["listeners"], expected_listeners) + + def test_listeners_set_correctly_open_private_ports_false(self): + listeners = [ + { + "port": 8448, + "resources": [{"names": ["federation"]}], + "tls": True, + "type": "http", + }, + { + "port": 443, + "resources": [{"names": ["client"]}], + "tls": False, + "type": "http", + }, + ] + + conf = yaml.safe_load( + ServerConfig().generate_config_section( + "this.one.listens", "/data_dir_path", True, listeners + ) + ) + + self.assertEqual(conf["listeners"], listeners) + + def test_listeners_set_correctly_open_private_ports_true(self): + listeners = [ + { + "port": 8448, + "resources": [{"names": ["federation"]}], + "tls": True, + "type": "http", + }, + { + "port": 443, + "resources": [{"names": ["client"]}], + "tls": False, + "type": "http", + }, + { + "port": 1243, + "resources": [{"names": ["client"]}], + "tls": False, + "type": "http", + "bind_addresses": ["this_one_is_bound"], + }, + ] + + expected_listeners = listeners.copy() + expected_listeners[1]["bind_addresses"] = ["::1", "127.0.0.1"] + + conf = yaml.safe_load( + ServerConfig().generate_config_section( + "this.one.listens", "/data_dir_path", True, listeners + ) + ) + + self.assertEqual(conf["listeners"], expected_listeners) diff --git a/tests/config/test_tls.py b/tests/config/test_tls.py index 4f8a87a3df..8e0c4b9533 100644 --- a/tests/config/test_tls.py +++ b/tests/config/test_tls.py @@ -16,6 +16,8 @@ import os +import yaml + from OpenSSL import SSL from synapse.config.tls import ConfigError, TlsConfig @@ -191,3 +193,45 @@ s4niecZKPBizL6aucT59CsunNmmb5Glq8rlAcU+1ZTZZzGYqVYhF6axB9Qg= self.assertEqual(cf._verify_ssl._options & SSL.OP_NO_TLSv1, 0) self.assertEqual(cf._verify_ssl._options & SSL.OP_NO_TLSv1_1, 0) self.assertEqual(cf._verify_ssl._options & SSL.OP_NO_TLSv1_2, 0) + + def test_acme_disabled_in_generated_config_no_acme_domain_provied(self): + """ + Checks acme is disabled by default. + """ + conf = TestConfig() + conf.read_config( + yaml.safe_load( + TestConfig().generate_config_section( + "/config_dir_path", + "my_super_secure_server", + "/data_dir_path", + "/tls_cert_path", + "tls_private_key", + None, # This is the acme_domain + ) + ), + "/config_dir_path", + ) + + self.assertFalse(conf.acme_enabled) + + def test_acme_enabled_in_generated_config_domain_provided(self): + """ + Checks acme is enabled if the acme_domain arg is set to some string. + """ + conf = TestConfig() + conf.read_config( + yaml.safe_load( + TestConfig().generate_config_section( + "/config_dir_path", + "my_super_secure_server", + "/data_dir_path", + "/tls_cert_path", + "tls_private_key", + "my_supe_secure_server", # This is the acme_domain + ) + ), + "/config_dir_path", + ) + + self.assertTrue(conf.acme_enabled) From 71fc04069a5770a204c3514e0237d7374df257a8 Mon Sep 17 00:00:00 2001 From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> Date: Wed, 28 Aug 2019 14:59:26 +0200 Subject: [PATCH 107/110] Use the v2 lookup API for 3PID invites (#5897) Fixes https://github.com/matrix-org/synapse/issues/5861 Adds support for the v2 lookup API as defined in [MSC2134](https://github.com/matrix-org/matrix-doc/pull/2134). Currently this is only used for 3PID invites. Sytest PR: https://github.com/matrix-org/sytest/pull/679 --- changelog.d/5897.feature | 1 + synapse/handlers/identity.py | 13 ++++ synapse/handlers/room_member.py | 128 +++++++++++++++++++++++++++++--- synapse/util/hash.py | 33 ++++++++ 4 files changed, 166 insertions(+), 9 deletions(-) create mode 100644 changelog.d/5897.feature create mode 100644 synapse/util/hash.py diff --git a/changelog.d/5897.feature b/changelog.d/5897.feature new file mode 100644 index 0000000000..7b10774c96 --- /dev/null +++ b/changelog.d/5897.feature @@ -0,0 +1 @@ +Switch to the v2 lookup API for 3PID invites. \ No newline at end of file diff --git a/synapse/handlers/identity.py b/synapse/handlers/identity.py index d199521b58..97daca5fee 100644 --- a/synapse/handlers/identity.py +++ b/synapse/handlers/identity.py @@ -282,3 +282,16 @@ class IdentityHandler(BaseHandler): except HttpResponseException as e: logger.info("Proxied requestToken failed: %r", e) raise e.to_synapse_error() + + +class LookupAlgorithm: + """ + Supported hashing algorithms when performing a 3PID lookup. + + SHA256 - Hashing an (address, medium, pepper) combo with sha256, then url-safe base64 + encoding + NONE - Not performing any hashing. Simply sending an (address, medium) combo in plaintext + """ + + SHA256 = "sha256" + NONE = "none" diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py index 249a6d9c5d..4605cb9c0b 100644 --- a/synapse/handlers/room_member.py +++ b/synapse/handlers/room_member.py @@ -29,9 +29,11 @@ from twisted.internet import defer from synapse import types from synapse.api.constants import EventTypes, Membership from synapse.api.errors import AuthError, Codes, HttpResponseException, SynapseError +from synapse.handlers.identity import LookupAlgorithm from synapse.types import RoomID, UserID from synapse.util.async_helpers import Linearizer from synapse.util.distributor import user_joined_room, user_left_room +from synapse.util.hash import sha256_and_url_safe_base64 from ._base import BaseHandler @@ -523,7 +525,7 @@ class RoomMemberHandler(object): event (SynapseEvent): The membership event. context: The context of the event. is_guest (bool): Whether the sender is a guest. - room_hosts ([str]): Homeservers which are likely to already be in + remote_room_hosts (list[str]|None): Homeservers which are likely to already be in the room, and could be danced with in order to join this homeserver for the first time. ratelimit (bool): Whether to rate limit this request. @@ -634,7 +636,7 @@ class RoomMemberHandler(object): servers.remove(room_alias.domain) servers.insert(0, room_alias.domain) - return (RoomID.from_string(room_id), servers) + return RoomID.from_string(room_id), servers @defer.inlineCallbacks def _get_inviter(self, user_id, room_id): @@ -697,6 +699,44 @@ class RoomMemberHandler(object): raise SynapseError( 403, "Looking up third-party identifiers is denied from this server" ) + + # Check what hashing details are supported by this identity server + use_v1 = False + hash_details = None + try: + hash_details = yield self.simple_http_client.get_json( + "%s%s/_matrix/identity/v2/hash_details" % (id_server_scheme, id_server) + ) + except (HttpResponseException, ValueError) as e: + # Catch HttpResponseExcept for a non-200 response code + # Catch ValueError for non-JSON response body + + # Check if this identity server does not know about v2 lookups + if e.code == 404: + # This is an old identity server that does not yet support v2 lookups + use_v1 = True + else: + logger.warn("Error when looking up hashing details: %s" % (e,)) + return None + + if use_v1: + return (yield self._lookup_3pid_v1(id_server, medium, address)) + + return (yield self._lookup_3pid_v2(id_server, medium, address, hash_details)) + + @defer.inlineCallbacks + def _lookup_3pid_v1(self, id_server, medium, address): + """Looks up a 3pid in the passed identity server using v1 lookup. + + Args: + id_server (str): The server name (including port, if required) + of the identity server to use. + medium (str): The type of the third party identifier (e.g. "email"). + address (str): The third party identifier (e.g. "foo@example.com"). + + Returns: + str: the matrix ID of the 3pid, or None if it is not recognized. + """ try: data = yield self.simple_http_client.get_json( "%s%s/_matrix/identity/api/v1/lookup" % (id_server_scheme, id_server), @@ -711,8 +751,83 @@ class RoomMemberHandler(object): except IOError as e: logger.warn("Error from identity server lookup: %s" % (e,)) + + return None + + @defer.inlineCallbacks + def _lookup_3pid_v2(self, id_server, medium, address, hash_details): + """Looks up a 3pid in the passed identity server using v2 lookup. + + Args: + id_server (str): The server name (including port, if required) + of the identity server to use. + medium (str): The type of the third party identifier (e.g. "email"). + address (str): The third party identifier (e.g. "foo@example.com"). + hash_details (dict[str, str|list]): A dictionary containing hashing information + provided by an identity server. + + Returns: + Deferred[str|None]: the matrix ID of the 3pid, or None if it is not recognised. + """ + # Extract information from hash_details + supported_lookup_algorithms = hash_details["algorithms"] + lookup_pepper = hash_details["lookup_pepper"] + + # Check if any of the supported lookup algorithms are present + if LookupAlgorithm.SHA256 in supported_lookup_algorithms: + # Perform a hashed lookup + lookup_algorithm = LookupAlgorithm.SHA256 + + # Hash address, medium and the pepper with sha256 + to_hash = "%s %s %s" % (address, medium, lookup_pepper) + lookup_value = sha256_and_url_safe_base64(to_hash) + + elif LookupAlgorithm.NONE in supported_lookup_algorithms: + # Perform a non-hashed lookup + lookup_algorithm = LookupAlgorithm.NONE + + # Combine together plaintext address and medium + lookup_value = "%s %s" % (address, medium) + + else: + logger.warn( + "None of the provided lookup algorithms of %s%s are supported: %s", + id_server_scheme, + id_server, + hash_details["algorithms"], + ) + raise SynapseError( + 400, + "Provided identity server does not support any v2 lookup " + "algorithms that this homeserver supports.", + ) + + try: + lookup_results = yield self.simple_http_client.post_json_get_json( + "%s%s/_matrix/identity/v2/lookup" % (id_server_scheme, id_server), + { + "addresses": [lookup_value], + "algorithm": lookup_algorithm, + "pepper": lookup_pepper, + }, + ) + except (HttpResponseException, ValueError) as e: + # Catch HttpResponseExcept for a non-200 response code + # Catch ValueError for non-JSON response body + logger.warn("Error when performing a 3pid lookup: %s" % (e,)) return None + # Check for a mapping from what we looked up to an MXID + if "mappings" not in lookup_results or not isinstance( + lookup_results["mappings"], dict + ): + logger.debug("No results from 3pid lookup") + return None + + # Return the MXID if it's available, or None otherwise + mxid = lookup_results["mappings"].get(lookup_value) + return mxid + @defer.inlineCallbacks def _verify_any_signature(self, data, server_hostname): if server_hostname not in data["signatures"]: @@ -962,9 +1077,7 @@ class RoomMemberMasterHandler(RoomMemberHandler): ) if complexity: - if complexity["v1"] > max_complexity: - return True - return False + return complexity["v1"] > max_complexity return None @defer.inlineCallbacks @@ -980,10 +1093,7 @@ class RoomMemberMasterHandler(RoomMemberHandler): max_complexity = self.hs.config.limit_remote_rooms.complexity complexity = yield self.store.get_room_complexity(room_id) - if complexity["v1"] > max_complexity: - return True - - return False + return complexity["v1"] > max_complexity @defer.inlineCallbacks def _remote_join(self, requester, remote_room_hosts, room_id, user, content): diff --git a/synapse/util/hash.py b/synapse/util/hash.py new file mode 100644 index 0000000000..359168704e --- /dev/null +++ b/synapse/util/hash.py @@ -0,0 +1,33 @@ +# -*- coding: utf-8 -*- + +# Copyright 2019 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import hashlib + +import unpaddedbase64 + + +def sha256_and_url_safe_base64(input_text): + """SHA256 hash an input string, encode the digest as url-safe base64, and + return + + :param input_text: string to hash + :type input_text: str + + :returns a sha256 hashed and url-safe base64 encoded digest + :rtype: str + """ + digest = hashlib.sha256(input_text.encode()).digest() + return unpaddedbase64.encode_base64(digest, urlsafe=True) From 5798a134c0a98b9b5b15808a3d1b0e3e63fe4030 Mon Sep 17 00:00:00 2001 From: Will Hunt Date: Wed, 28 Aug 2019 14:25:05 +0100 Subject: [PATCH 108/110] Removing entry for 5903 --- changelog.d/5903.feature | 1 - 1 file changed, 1 deletion(-) delete mode 100644 changelog.d/5903.feature diff --git a/changelog.d/5903.feature b/changelog.d/5903.feature deleted file mode 100644 index fc60d02107..0000000000 --- a/changelog.d/5903.feature +++ /dev/null @@ -1 +0,0 @@ -Add bot user type. From deca277d0972c98a643997d7f6a388b313d2d2fb Mon Sep 17 00:00:00 2001 From: Jorik Schellekens Date: Wed, 28 Aug 2019 15:55:58 +0100 Subject: [PATCH 109/110] Let synctl use a config directory. (#5904) * Let synctl use a config directory. --- changelog.d/5904.feature | 1 + synapse/config/__init__.py | 7 ++++--- synctl | 13 ++++++++++--- 3 files changed, 15 insertions(+), 6 deletions(-) create mode 100644 changelog.d/5904.feature diff --git a/changelog.d/5904.feature b/changelog.d/5904.feature new file mode 100644 index 0000000000..43b5304f39 --- /dev/null +++ b/changelog.d/5904.feature @@ -0,0 +1 @@ +Let synctl accept a directory of config files. diff --git a/synapse/config/__init__.py b/synapse/config/__init__.py index f2a5a41e92..1e76e9559d 100644 --- a/synapse/config/__init__.py +++ b/synapse/config/__init__.py @@ -13,8 +13,9 @@ # See the License for the specific language governing permissions and # limitations under the License. -from ._base import ConfigError +from ._base import ConfigError, find_config_files -# export ConfigError if somebody does import * +# export ConfigError and find_config_files if somebody does +# import * # this is largely a fudge to stop PEP8 moaning about the import -__all__ = ["ConfigError"] +__all__ = ["ConfigError", "find_config_files"] diff --git a/synctl b/synctl index 794de99ea3..a9629cf0e8 100755 --- a/synctl +++ b/synctl @@ -30,6 +30,8 @@ from six import iteritems import yaml +from synapse.config import find_config_files + SYNAPSE = [sys.executable, "-B", "-m", "synapse.app.homeserver"] GREEN = "\x1b[1;32m" @@ -135,7 +137,8 @@ def main(): "configfile", nargs="?", default="homeserver.yaml", - help="the homeserver config file, defaults to homeserver.yaml", + help="the homeserver config file. Defaults to homeserver.yaml. May also be" + " a directory with *.yaml files", ) parser.add_argument( "-w", "--worker", metavar="WORKERCONFIG", help="start or stop a single worker" @@ -176,8 +179,12 @@ def main(): ) sys.exit(1) - with open(configfile) as stream: - config = yaml.safe_load(stream) + config_files = find_config_files([configfile]) + config = {} + for config_file in config_files: + with open(config_file) as file_stream: + yaml_config = yaml.safe_load(file_stream) + config.update(yaml_config) pidfile = config["pid_file"] cache_factor = config.get("synctl_cache_factor") From 92c1550f4abe1aa8495b0e1fc6dc38d338a4ecd1 Mon Sep 17 00:00:00 2001 From: Jorik Schellekens Date: Wed, 28 Aug 2019 19:08:32 +0100 Subject: [PATCH 110/110] Add a link to python's logging config schema (#5926) --- changelog.d/5926.misc | 1 + docs/sample_config.yaml | 3 ++- synapse/config/logger.py | 3 ++- 3 files changed, 5 insertions(+), 2 deletions(-) create mode 100644 changelog.d/5926.misc diff --git a/changelog.d/5926.misc b/changelog.d/5926.misc new file mode 100644 index 0000000000..4383c302ec --- /dev/null +++ b/changelog.d/5926.misc @@ -0,0 +1 @@ +Add link in sample config to the logging config schema. diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml index 6da1167632..43969bbb70 100644 --- a/docs/sample_config.yaml +++ b/docs/sample_config.yaml @@ -485,7 +485,8 @@ database: ## Logging ## -# A yaml python logging config file +# A yaml python logging config file as described by +# https://docs.python.org/3.7/library/logging.config.html#configuration-dictionary-schema # log_config: "CONFDIR/SERVERNAME.log.config" diff --git a/synapse/config/logger.py b/synapse/config/logger.py index 981df5a10c..2704c18720 100644 --- a/synapse/config/logger.py +++ b/synapse/config/logger.py @@ -89,7 +89,8 @@ class LoggingConfig(Config): """\ ## Logging ## - # A yaml python logging config file + # A yaml python logging config file as described by + # https://docs.python.org/3.7/library/logging.config.html#configuration-dictionary-schema # log_config: "%(log_config)s" """