From 986615b0b21271959adb9d64291761244e4175bd Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Mon, 1 Aug 2016 18:02:07 +0100 Subject: [PATCH 01/38] Move e2e query logic into a handler --- synapse/handlers/e2e_keys.py | 67 ++++++++++++++++++++++++++++ synapse/rest/client/v2_alpha/keys.py | 46 +++---------------- synapse/server.py | 65 ++++++++++++++------------- synapse/server.pyi | 4 ++ 4 files changed, 112 insertions(+), 70 deletions(-) create mode 100644 synapse/handlers/e2e_keys.py diff --git a/synapse/handlers/e2e_keys.py b/synapse/handlers/e2e_keys.py new file mode 100644 index 0000000000..73a14cf952 --- /dev/null +++ b/synapse/handlers/e2e_keys.py @@ -0,0 +1,67 @@ +# -*- coding: utf-8 -*- +# Copyright 2016 OpenMarket Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json +import logging + +from twisted.internet import defer + +import synapse.types +from ._base import BaseHandler + +logger = logging.getLogger(__name__) + + +class E2eKeysHandler(BaseHandler): + def __init__(self, hs): + super(E2eKeysHandler, self).__init__(hs) + self.store = hs.get_datastore() + self.federation = hs.get_replication_layer() + self.is_mine = hs.is_mine + + @defer.inlineCallbacks + def query_devices(self, query_body): + local_query = [] + remote_queries = {} + for user_id, device_ids in query_body.get("device_keys", {}).items(): + user = synapse.types.UserID.from_string(user_id) + if self.is_mine(user): + if not device_ids: + local_query.append((user_id, None)) + else: + for device_id in device_ids: + local_query.append((user_id, device_id)) + else: + remote_queries.setdefault(user.domain, {})[user_id] = list( + device_ids + ) + results = yield self.store.get_e2e_device_keys(local_query) + + json_result = {} + for user_id, device_keys in results.items(): + for device_id, json_bytes in device_keys.items(): + json_result.setdefault(user_id, {})[ + device_id] = json.loads( + json_bytes + ) + + for destination, device_keys in remote_queries.items(): + remote_result = yield self.federation.query_client_keys( + destination, {"device_keys": device_keys} + ) + for user_id, keys in remote_result["device_keys"].items(): + if user_id in device_keys: + json_result[user_id] = keys + defer.returnValue((200, {"device_keys": json_result})) diff --git a/synapse/rest/client/v2_alpha/keys.py b/synapse/rest/client/v2_alpha/keys.py index dc1d4d8fc6..705a0b6c17 100644 --- a/synapse/rest/client/v2_alpha/keys.py +++ b/synapse/rest/client/v2_alpha/keys.py @@ -186,17 +186,19 @@ class KeyQueryServlet(RestServlet): ) def __init__(self, hs): + """ + Args: + hs (synapse.server.HomeServer): + """ super(KeyQueryServlet, self).__init__() - self.store = hs.get_datastore() self.auth = hs.get_auth() - self.federation = hs.get_replication_layer() - self.is_mine = hs.is_mine + self.e2e_keys_handler = hs.get_e2e_keys_handler() @defer.inlineCallbacks def on_POST(self, request, user_id, device_id): yield self.auth.get_user_by_req(request) body = parse_json_object_from_request(request) - result = yield self.handle_request(body) + result = yield self.e2e_keys_handler.query_devices(body) defer.returnValue(result) @defer.inlineCallbacks @@ -205,45 +207,11 @@ class KeyQueryServlet(RestServlet): auth_user_id = requester.user.to_string() user_id = user_id if user_id else auth_user_id device_ids = [device_id] if device_id else [] - result = yield self.handle_request( + result = yield self.e2e_keys_handler.query_devices( {"device_keys": {user_id: device_ids}} ) defer.returnValue(result) - @defer.inlineCallbacks - def handle_request(self, body): - local_query = [] - remote_queries = {} - for user_id, device_ids in body.get("device_keys", {}).items(): - user = UserID.from_string(user_id) - if self.is_mine(user): - if not device_ids: - local_query.append((user_id, None)) - else: - for device_id in device_ids: - local_query.append((user_id, device_id)) - else: - remote_queries.setdefault(user.domain, {})[user_id] = list( - device_ids - ) - results = yield self.store.get_e2e_device_keys(local_query) - - json_result = {} - for user_id, device_keys in results.items(): - for device_id, json_bytes in device_keys.items(): - json_result.setdefault(user_id, {})[device_id] = json.loads( - json_bytes - ) - - for destination, device_keys in remote_queries.items(): - remote_result = yield self.federation.query_client_keys( - destination, {"device_keys": device_keys} - ) - for user_id, keys in remote_result["device_keys"].items(): - if user_id in device_keys: - json_result[user_id] = keys - defer.returnValue((200, {"device_keys": json_result})) - class OneTimeKeyServlet(RestServlet): """ diff --git a/synapse/server.py b/synapse/server.py index e8b166990d..6bb4988309 100644 --- a/synapse/server.py +++ b/synapse/server.py @@ -19,39 +19,38 @@ # partial one for unit test mocking. # Imports required for the default HomeServer() implementation -from twisted.web.client import BrowserLikePolicyForHTTPS -from twisted.enterprise import adbapi - -from synapse.appservice.scheduler import ApplicationServiceScheduler -from synapse.appservice.api import ApplicationServiceApi -from synapse.federation import initialize_http_replication -from synapse.handlers.device import DeviceHandler -from synapse.http.client import SimpleHttpClient, InsecureInterceptableContextFactory -from synapse.notifier import Notifier -from synapse.api.auth import Auth -from synapse.handlers import Handlers -from synapse.handlers.presence import PresenceHandler -from synapse.handlers.sync import SyncHandler -from synapse.handlers.typing import TypingHandler -from synapse.handlers.room import RoomListHandler -from synapse.handlers.auth import AuthHandler -from synapse.handlers.appservice import ApplicationServicesHandler -from synapse.state import StateHandler -from synapse.storage import DataStore -from synapse.util import Clock -from synapse.util.distributor import Distributor -from synapse.streams.events import EventSources -from synapse.api.ratelimiting import Ratelimiter -from synapse.crypto.keyring import Keyring -from synapse.push.pusherpool import PusherPool -from synapse.events.builder import EventBuilderFactory -from synapse.api.filtering import Filtering -from synapse.rest.media.v1.media_repository import MediaRepository - -from synapse.http.matrixfederationclient import MatrixFederationHttpClient - import logging +from twisted.enterprise import adbapi +from twisted.web.client import BrowserLikePolicyForHTTPS + +from synapse.api.auth import Auth +from synapse.api.filtering import Filtering +from synapse.api.ratelimiting import Ratelimiter +from synapse.appservice.api import ApplicationServiceApi +from synapse.appservice.scheduler import ApplicationServiceScheduler +from synapse.crypto.keyring import Keyring +from synapse.events.builder import EventBuilderFactory +from synapse.federation import initialize_http_replication +from synapse.handlers import Handlers +from synapse.handlers.appservice import ApplicationServicesHandler +from synapse.handlers.auth import AuthHandler +from synapse.handlers.device import DeviceHandler +from synapse.handlers.e2e_keys import E2eKeysHandler +from synapse.handlers.presence import PresenceHandler +from synapse.handlers.room import RoomListHandler +from synapse.handlers.sync import SyncHandler +from synapse.handlers.typing import TypingHandler +from synapse.http.client import SimpleHttpClient, InsecureInterceptableContextFactory +from synapse.http.matrixfederationclient import MatrixFederationHttpClient +from synapse.notifier import Notifier +from synapse.push.pusherpool import PusherPool +from synapse.rest.media.v1.media_repository import MediaRepository +from synapse.state import StateHandler +from synapse.storage import DataStore +from synapse.streams.events import EventSources +from synapse.util import Clock +from synapse.util.distributor import Distributor logger = logging.getLogger(__name__) @@ -94,6 +93,7 @@ class HomeServer(object): 'room_list_handler', 'auth_handler', 'device_handler', + 'e2e_keys_handler', 'application_service_api', 'application_service_scheduler', 'application_service_handler', @@ -202,6 +202,9 @@ class HomeServer(object): def build_device_handler(self): return DeviceHandler(self) + def build_e2e_keys_handler(self): + return E2eKeysHandler(self) + def build_application_service_api(self): return ApplicationServiceApi(self) diff --git a/synapse/server.pyi b/synapse/server.pyi index 902f725c06..c0aa868c4f 100644 --- a/synapse/server.pyi +++ b/synapse/server.pyi @@ -1,6 +1,7 @@ import synapse.handlers import synapse.handlers.auth import synapse.handlers.device +import synapse.handlers.e2e_keys import synapse.storage import synapse.state @@ -14,6 +15,9 @@ class HomeServer(object): def get_device_handler(self) -> synapse.handlers.device.DeviceHandler: pass + def get_e2e_keys_handler(self) -> synapse.handlers.e2e_keys.E2eKeysHandler: + pass + def get_handlers(self) -> synapse.handlers.Handlers: pass From 1efee2f52b931ddcd90e87d06c7ea614da2c9cd0 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Tue, 2 Aug 2016 18:06:31 +0100 Subject: [PATCH 02/38] E2E keys: Make federation query share code with client query Refactor the e2e query handler to separate out the local query, and then make the federation handler use it. --- synapse/federation/federation_server.py | 20 +---- synapse/federation/transport/server.py | 4 +- synapse/handlers/e2e_keys.py | 115 ++++++++++++++++++------ 3 files changed, 92 insertions(+), 47 deletions(-) diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py index 85f5e752fe..e637f2a8bd 100644 --- a/synapse/federation/federation_server.py +++ b/synapse/federation/federation_server.py @@ -348,27 +348,9 @@ class FederationServer(FederationBase): (200, send_content) ) - @defer.inlineCallbacks @log_function def on_query_client_keys(self, origin, content): - query = [] - for user_id, device_ids in content.get("device_keys", {}).items(): - if not device_ids: - query.append((user_id, None)) - else: - for device_id in device_ids: - query.append((user_id, device_id)) - - results = yield self.store.get_e2e_device_keys(query) - - json_result = {} - for user_id, device_keys in results.items(): - for device_id, json_bytes in device_keys.items(): - json_result.setdefault(user_id, {})[device_id] = json.loads( - json_bytes - ) - - defer.returnValue({"device_keys": json_result}) + return self.on_query_request("client_keys", content) @defer.inlineCallbacks @log_function diff --git a/synapse/federation/transport/server.py b/synapse/federation/transport/server.py index 26fa88ae84..1a88413d18 100644 --- a/synapse/federation/transport/server.py +++ b/synapse/federation/transport/server.py @@ -367,10 +367,8 @@ class FederationThirdPartyInviteExchangeServlet(BaseFederationServlet): class FederationClientKeysQueryServlet(BaseFederationServlet): PATH = "/user/keys/query" - @defer.inlineCallbacks def on_POST(self, origin, content, query): - response = yield self.handler.on_query_client_keys(origin, content) - defer.returnValue((200, response)) + return self.handler.on_query_client_keys(origin, content) class FederationClientKeysClaimServlet(BaseFederationServlet): diff --git a/synapse/handlers/e2e_keys.py b/synapse/handlers/e2e_keys.py index 73a14cf952..9c7e9494d6 100644 --- a/synapse/handlers/e2e_keys.py +++ b/synapse/handlers/e2e_keys.py @@ -13,12 +13,15 @@ # See the License for the specific language governing permissions and # limitations under the License. +import collections import json import logging from twisted.internet import defer +from synapse.api import errors import synapse.types + from ._base import BaseHandler logger = logging.getLogger(__name__) @@ -29,39 +32,101 @@ class E2eKeysHandler(BaseHandler): super(E2eKeysHandler, self).__init__(hs) self.store = hs.get_datastore() self.federation = hs.get_replication_layer() - self.is_mine = hs.is_mine + self.is_mine_id = hs.is_mine_id + + # doesn't really work as part of the generic query API, because the + # query request requires an object POST, but we abuse the + # "query handler" interface. + self.federation.register_query_handler( + "client_keys", self.on_federation_query_client_keys + ) @defer.inlineCallbacks def query_devices(self, query_body): - local_query = [] - remote_queries = {} - for user_id, device_ids in query_body.get("device_keys", {}).items(): + """ Handle a device key query from a client + + { + "device_keys": { + "": [""] + } + } + -> + { + "device_keys": { + "": { + "": { + ... + } + } + } + } + """ + device_keys_query = query_body.get("device_keys", {}) + + # separate users by domain. + # make a map from domain to user_id to device_ids + queries_by_domain = collections.defaultdict(dict) + for user_id, device_ids in device_keys_query.items(): user = synapse.types.UserID.from_string(user_id) - if self.is_mine(user): - if not device_ids: - local_query.append((user_id, None)) - else: - for device_id in device_ids: - local_query.append((user_id, device_id)) + queries_by_domain[user.domain][user_id] = device_ids + + # do the queries + # TODO: do these in parallel + results = {} + for destination, destination_query in queries_by_domain.items(): + if destination == self.hs.hostname: + res = yield self.query_local_devices(destination_query) else: - remote_queries.setdefault(user.domain, {})[user_id] = list( - device_ids + res = yield self.federation.query_client_keys( + destination, {"device_keys": destination_query} ) + res = res["device_keys"] + for user_id, keys in res.items(): + if user_id in destination_query: + results[user_id] = keys + + defer.returnValue((200, {"device_keys": results})) + + @defer.inlineCallbacks + def query_local_devices(self, query): + """Get E2E device keys for local users + + Args: + query (dict[string, list[string]|None): map from user_id to a list + of devices to query (None for all devices) + + Returns: + defer.Deferred: (resolves to dict[string, dict[string, dict]]): + map from user_id -> device_id -> device details + """ + local_query = [] + + for user_id, device_ids in query.items(): + if not self.is_mine_id(user_id): + logger.warning("Request for keys for non-local user %s", + user_id) + raise errors.SynapseError(400, "Not a user here") + + if not device_ids: + local_query.append((user_id, None)) + else: + for device_id in device_ids: + local_query.append((user_id, device_id)) + results = yield self.store.get_e2e_device_keys(local_query) - json_result = {} + # un-jsonify the results + json_result = collections.defaultdict(dict) for user_id, device_keys in results.items(): for device_id, json_bytes in device_keys.items(): - json_result.setdefault(user_id, {})[ - device_id] = json.loads( - json_bytes - ) + json_result[user_id][device_id] = json.loads(json_bytes) - for destination, device_keys in remote_queries.items(): - remote_result = yield self.federation.query_client_keys( - destination, {"device_keys": device_keys} - ) - for user_id, keys in remote_result["device_keys"].items(): - if user_id in device_keys: - json_result[user_id] = keys - defer.returnValue((200, {"device_keys": json_result})) + defer.returnValue(json_result) + + @defer.inlineCallbacks + def on_federation_query_client_keys(self, query_body): + """ Handle a device key query from a federated server + """ + device_keys_query = query_body.get("device_keys", {}) + res = yield self.query_local_devices(device_keys_query) + defer.returnValue({"device_keys": res}) From aecaec3e104fc8aebb4f2e3e9ce29bb7dee4dc0c Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 2 Aug 2016 18:25:53 +0100 Subject: [PATCH 03/38] Change the way we summarize URLs Using XPath is slow on some machines (for unknown reasons), so use a different approach to get a list of text nodes. Try to generate a summary that respect paragraph and then word boundaries, adding ellipses when appropriate. --- synapse/rest/media/v1/preview_url_resource.py | 78 ++++++++++++++++--- 1 file changed, 67 insertions(+), 11 deletions(-) diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py index 74c64f1371..ea46b8aa1b 100644 --- a/synapse/rest/media/v1/preview_url_resource.py +++ b/synapse/rest/media/v1/preview_url_resource.py @@ -29,6 +29,8 @@ from synapse.http.server import ( from synapse.util.async import ObservableDeferred from synapse.util.stringutils import is_ascii +from copy import deepcopy + import os import re import fnmatch @@ -329,20 +331,74 @@ class PreviewUrlResource(Resource): # ...or if they are within a