Merge pull request #23 from matrix-org/media_repository

Media repository
This commit is contained in:
Erik Johnston 2014-12-11 17:46:23 +00:00
commit 85574cfbf0
24 changed files with 1320 additions and 17 deletions

25
docs/media_repository.rst Normal file
View file

@ -0,0 +1,25 @@
Media Repository
================
The media repository is where attachments and avatar photos are stored.
It stores attachment content and thumbnails for media uploaded by local users.
It caches attachment content and thumbnails for media uploaded by remote users.
Storage
-------
Each item of media is assigned a ``media_id`` when it is uploaded.
The ``media_id`` is a randomly chosen, URL safe 24 character string.
Metadata such as the MIME type, upload time and length are stored in the
sqlite3 database indexed by ``media_id``.
Content is stored on the filesystem under a ``"local_content"`` directory.
Thumbnails are stored under a ``"local_thumbnails"`` directory.
The item with ``media_id`` ``"aabbccccccccdddddddddddd"`` is stored under
``"local_content/aa/bb/ccccccccdddddddddddd"``. Its thumbnail with width
``128`` and height ``96`` and type ``"image/jpeg"`` is stored under
``"local_thumbnails/aa/bb/ccccccccdddddddddddd/128-96-image-jpeg"``
Remote content is cached under ``"remote_content"`` directory. Each item of
remote content is assigned a local "``filesystem_id``" to ensure that the
directory structure ``"remote_content/server_name/aa/bb/ccccccccdddddddddddd"``
is appropriate. Thumbnails for remote content are stored under
``"remote_thumbnails/server_name/..."``

View file

@ -41,6 +41,7 @@ setup(
"pynacl",
"daemonize",
"py-bcrypt",
"pillow",
],
dependency_links=[
"https://github.com/matrix-org/syutil/tarball/v0.0.2#egg=syutil-0.0.2",

View file

@ -34,6 +34,7 @@ class Codes(object):
LIMIT_EXCEEDED = "M_LIMIT_EXCEEDED"
CAPTCHA_NEEDED = "M_CAPTCHA_NEEDED"
CAPTCHA_INVALID = "M_CAPTCHA_INVALID"
TOO_LARGE = "M_TOO_LARGE"
class CodeMessageException(Exception):

View file

@ -20,3 +20,4 @@ FEDERATION_PREFIX = "/_matrix/federation/v1"
WEB_CLIENT_PREFIX = "/_matrix/client"
CONTENT_REPO_PREFIX = "/_matrix/content"
SERVER_KEY_PREFIX = "/_matrix/key/v1"
MEDIA_PREFIX = "/_matrix/media/v1"

View file

@ -24,12 +24,13 @@ from twisted.web.resource import Resource
from twisted.web.static import File
from twisted.web.server import Site
from synapse.http.server import JsonResource, RootRedirect
from synapse.http.content_repository import ContentRepoResource
from synapse.media.v0.content_repository import ContentRepoResource
from synapse.media.v1.media_repository import MediaRepositoryResource
from synapse.http.server_key_resource import LocalKey
from synapse.http.matrixfederationclient import MatrixFederationHttpClient
from synapse.api.urls import (
CLIENT_PREFIX, FEDERATION_PREFIX, WEB_CLIENT_PREFIX, CONTENT_REPO_PREFIX,
SERVER_KEY_PREFIX,
SERVER_KEY_PREFIX, MEDIA_PREFIX
)
from synapse.config.homeserver import HomeServerConfig
from synapse.crypto import context_factory
@ -69,6 +70,9 @@ class SynapseHomeServer(HomeServer):
self, self.upload_dir, self.auth, self.content_addr
)
def build_resource_for_media_repository(self):
return MediaRepositoryResource(self)
def build_resource_for_server_key(self):
return LocalKey(self)
@ -99,6 +103,7 @@ class SynapseHomeServer(HomeServer):
(FEDERATION_PREFIX, self.get_resource_for_federation()),
(CONTENT_REPO_PREFIX, self.get_resource_for_content_repo()),
(SERVER_KEY_PREFIX, self.get_resource_for_server_key()),
(MEDIA_PREFIX, self.get_resource_for_media_repository()),
]
if web_client:
logger.info("Adding the web client.")

View file

@ -50,12 +50,26 @@ class Config(object):
)
return cls.abspath(file_path)
@staticmethod
def ensure_directory(dir_path):
if not os.path.exists(dir_path):
os.makedirs(dir_path)
if not os.path.isdir(dir_path):
raise ConfigError(
"%s is not a directory" % (dir_path,)
)
return dir_path
@classmethod
def read_file(cls, file_path, config_name):
cls.check_file(file_path, config_name)
with open(file_path) as file_stream:
return file_stream.read()
@staticmethod
def default_path(name):
return os.path.abspath(os.path.join(os.path.curdir, name))
@staticmethod
def read_config_file(file_path):
with open(file_path) as file_stream:

View file

@ -20,6 +20,8 @@ class ContentRepositoryConfig(Config):
def __init__(self, args):
super(ContentRepositoryConfig, self).__init__(args)
self.max_upload_size = self.parse_size(args.max_upload_size)
self.max_image_pixels = self.parse_size(args.max_image_pixels)
self.media_store_path = self.ensure_directory(args.media_store_path)
def parse_size(self, string):
sizes = {"K": 1024, "M": 1024 * 1024}
@ -37,3 +39,10 @@ class ContentRepositoryConfig(Config):
db_group.add_argument(
"--max-upload-size", default="1M"
)
db_group.add_argument(
"--media-store-path", default=cls.default_path("media_store")
)
db_group.add_argument(
"--max-image-pixels", default="32M",
help="Maximum number of pixels that will be thumbnailed"
)

View file

@ -14,10 +14,11 @@
# limitations under the License.
from twisted.internet import defer, reactor
from twisted.internet import defer, reactor, protocol
from twisted.internet.error import DNSLookupError
from twisted.web.client import readBody, _AgentBase, _URI
from twisted.web.http_headers import Headers
from twisted.web._newclient import ResponseDone
from synapse.http.endpoint import matrix_federation_endpoint
from synapse.util.async import sleep
@ -25,7 +26,7 @@ from synapse.util.logcontext import PreserveLoggingContext
from syutil.jsonutil import encode_canonical_json
from synapse.api.errors import CodeMessageException, SynapseError
from synapse.api.errors import CodeMessageException, SynapseError, Codes
from syutil.crypto.jsonsign import sign_json
@ -244,7 +245,7 @@ class MatrixFederationHttpClient(object):
@defer.inlineCallbacks
def get_json(self, destination, path, args={}, retry_on_dns_fail=True):
""" Get's some json from the given host homeserver and path
""" GETs some json from the given host homeserver and path
Args:
destination (str): The remote server to send the HTTP request
@ -252,9 +253,6 @@ class MatrixFederationHttpClient(object):
path (str): The HTTP path.
args (dict): A dictionary used to create query strings, defaults to
None.
**Note**: The value of each key is assumed to be an iterable
and *not* a string.
Returns:
Deferred: Succeeds when we get *any* HTTP response.
@ -289,6 +287,52 @@ class MatrixFederationHttpClient(object):
defer.returnValue(json.loads(body))
@defer.inlineCallbacks
def get_file(self, destination, path, output_stream, args={},
retry_on_dns_fail=True, max_size=None):
"""GETs a file from a given homeserver
Args:
destination (str): The remote server to send the HTTP request to.
path (str): The HTTP path to GET.
output_stream (file): File to write the response body to.
args (dict): Optional dictionary used to create the query string.
Returns:
A (int,dict) tuple of the file length and a dict of the response
headers.
"""
encoded_args = {}
for k, vs in args.items():
if isinstance(vs, basestring):
vs = [vs]
encoded_args[k] = [v.encode("UTF-8") for v in vs]
query_bytes = urllib.urlencode(encoded_args, True)
logger.debug("Query bytes: %s Retry DNS: %s", args, retry_on_dns_fail)
def body_callback(method, url_bytes, headers_dict):
self.sign_request(destination, method, url_bytes, headers_dict)
return None
response = yield self._create_request(
destination.encode("ascii"),
"GET",
path.encode("ascii"),
query_bytes=query_bytes,
body_callback=body_callback,
retry_on_dns_fail=retry_on_dns_fail
)
headers = dict(response.headers.getAllRawHeaders())
try:
length = yield _readBodyToFile(response, output_stream, max_size)
except:
logger.exception("Failed to download body")
raise
defer.returnValue((length, headers))
def _getEndpoint(self, reactor, destination):
return matrix_federation_endpoint(
reactor, destination, timeout=10,
@ -296,6 +340,38 @@ class MatrixFederationHttpClient(object):
)
class _ReadBodyToFileProtocol(protocol.Protocol):
def __init__(self, stream, deferred, max_size):
self.stream = stream
self.deferred = deferred
self.length = 0
self.max_size = max_size
def dataReceived(self, data):
self.stream.write(data)
self.length += len(data)
if self.max_size is not None and self.length >= self.max_size:
self.deferred.errback(SynapseError(
502,
"Requested file is too large > %r bytes" % (self.max_size,),
Codes.TOO_LARGE,
))
self.deferred = defer.Deferred()
self.transport.loseConnection()
def connectionLost(self, reason):
if reason.check(ResponseDone):
self.deferred.callback(self.length)
else:
self.deferred.errback(reason)
def _readBodyToFile(response, stream, max_size):
d = defer.Deferred()
response.deliverBody(_ReadBodyToFileProtocol(stream, d, max_size))
return d
def _print_ex(e):
if hasattr(e, "reasons") and e.reasons:
for ex in e.reasons:

View file

@ -166,14 +166,10 @@ class JsonResource(HttpServer, resource.Resource):
request)
return
if not self._request_user_agent_is_curl(request):
json_bytes = encode_canonical_json(response_json_object)
else:
json_bytes = encode_pretty_printed_json(response_json_object)
# TODO: Only enable CORS for the requests that need it.
respond_with_json_bytes(request, code, json_bytes, send_cors=True,
response_code_message=response_code_message)
respond_with_json(request, code, response_json_object, send_cors=True,
response_code_message=response_code_message,
pretty_print=self._request_user_agent_is_curl)
@staticmethod
def _request_user_agent_is_curl(request):
@ -202,6 +198,17 @@ class RootRedirect(resource.Resource):
return resource.Resource.getChild(self, name, request)
def respond_with_json(request, code, json_object, send_cors=False,
response_code_message=None, pretty_print=False):
if not pretty_print:
json_bytes = encode_pretty_printed_json(json_object)
else:
json_bytes = encode_canonical_json(json_object)
return respond_with_json_bytes(request, code, json_bytes, send_cors,
response_code_message=response_code_message)
def respond_with_json_bytes(request, code, json_bytes, send_cors=False,
response_code_message=None):
"""Sends encoded JSON in response to the given request.

View file

View file

View file

@ -13,7 +13,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from .server import respond_with_json_bytes
from synapse.http.server import respond_with_json_bytes
from synapse.util.stringutils import random_string
from synapse.api.errors import (

View file

View file

@ -0,0 +1,368 @@
# -*- coding: utf-8 -*-
# Copyright 2014 OpenMarket Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .thumbnailer import Thumbnailer
from synapse.http.server import respond_with_json
from synapse.util.stringutils import random_string
from synapse.api.errors import (
cs_exception, CodeMessageException, cs_error, Codes, SynapseError
)
from twisted.internet import defer
from twisted.web.resource import Resource
from twisted.protocols.basic import FileSender
import os
import logging
logger = logging.getLogger(__name__)
class BaseMediaResource(Resource):
isLeaf = True
def __init__(self, hs, filepaths):
Resource.__init__(self)
self.auth = hs.get_auth()
self.client = hs.get_http_client()
self.clock = hs.get_clock()
self.server_name = hs.hostname
self.store = hs.get_datastore()
self.max_upload_size = hs.config.max_upload_size
self.max_image_pixels = hs.config.max_image_pixels
self.filepaths = filepaths
self.downloads = {}
@staticmethod
def catch_errors(request_handler):
@defer.inlineCallbacks
def wrapped_request_handler(self, request):
try:
yield request_handler(self, request)
except CodeMessageException as e:
logger.exception(e)
respond_with_json(
request, e.code, cs_exception(e), send_cors=True
)
except:
logger.exception(
"Failed handle request %s.%s on %r",
request_handler.__module__,
request_handler.__name__,
self,
)
respond_with_json(
request,
500,
{"error": "Internal server error"},
send_cors=True
)
return wrapped_request_handler
@staticmethod
def _parse_media_id(request):
try:
server_name, media_id = request.postpath
return (server_name, media_id)
except:
raise SynapseError(
404,
"Invalid media id token %r" % (request.postpath,),
Codes.UNKKOWN,
)
@staticmethod
def _parse_integer(request, arg_name, default=None):
try:
if default is None:
return int(request.args[arg_name][0])
else:
return int(request.args.get(arg_name, [default])[0])
except:
raise SynapseError(
400,
"Missing integer argument %r" % (arg_name,),
Codes.UNKNOWN,
)
@staticmethod
def _parse_string(request, arg_name, default=None):
try:
if default is None:
return request.args[arg_name][0]
else:
return request.args.get(arg_name, [default])[0]
except:
raise SynapseError(
400,
"Missing string argument %r" % (arg_name,),
Codes.UNKNOWN,
)
def _respond_404(self, request):
respond_with_json(
request, 404,
cs_error(
"Not found %r" % (request.postpath,),
code=Codes.NOT_FOUND,
),
send_cors=True
)
@staticmethod
def _makedirs(filepath):
dirname = os.path.dirname(filepath)
if not os.path.exists(dirname):
os.makedirs(dirname)
def _get_remote_media(self, server_name, media_id):
key = (server_name, media_id)
download = self.downloads.get(key)
if download is None:
download = self._get_remote_media_impl(server_name, media_id)
self.downloads[key] = download
@download.addBoth
def callback(media_info):
del self.downloads[key]
return download
@defer.inlineCallbacks
def _get_remote_media_impl(self, server_name, media_id):
media_info = yield self.store.get_cached_remote_media(
server_name, media_id
)
if not media_info:
media_info = yield self._download_remote_file(
server_name, media_id
)
defer.returnValue(media_info)
@defer.inlineCallbacks
def _download_remote_file(self, server_name, media_id):
file_id = random_string(24)
fname = self.filepaths.remote_media_filepath(
server_name, file_id
)
self._makedirs(fname)
try:
with open(fname, "wb") as f:
request_path = "/".join((
"/_matrix/media/v1/download", server_name, media_id,
))
length, headers = yield self.client.get_file(
server_name, request_path, output_stream=f,
max_size=self.max_upload_size,
)
media_type = headers["Content-Type"][0]
time_now_ms = self.clock.time_msec()
yield self.store.store_cached_remote_media(
origin=server_name,
media_id=media_id,
media_type=media_type,
time_now_ms=self.clock.time_msec(),
upload_name=None,
media_length=length,
filesystem_id=file_id,
)
except:
os.remove(fname)
raise
media_info = {
"media_type": media_type,
"media_length": length,
"upload_name": None,
"created_ts": time_now_ms,
"filesystem_id": file_id,
}
yield self._generate_remote_thumbnails(
server_name, media_id, media_info
)
defer.returnValue(media_info)
@defer.inlineCallbacks
def _respond_with_file(self, request, media_type, file_path):
logger.debug("Responding with %r", file_path)
if os.path.isfile(file_path):
request.setHeader(b"Content-Type", media_type.encode("UTF-8"))
# cache for at least a day.
# XXX: we might want to turn this off for data we don't want to
# recommend caching as it's sensitive or private - or at least
# select private. don't bother setting Expires as all our
# clients are smart enough to be happy with Cache-Control
request.setHeader(
b"Cache-Control", b"public,max-age=86400,s-maxage=86400"
)
with open(file_path, "rb") as f:
yield FileSender().beginFileTransfer(f, request)
request.finish()
else:
self._respond_404()
def _get_thumbnail_requirements(self, media_type):
if media_type == "image/jpeg":
return (
(32, 32, "crop", "image/jpeg"),
(96, 96, "crop", "image/jpeg"),
(320, 240, "scale", "image/jpeg"),
(640, 480, "scale", "image/jpeg"),
)
elif (media_type == "image/png") or (media_type == "image/gif"):
return (
(32, 32, "crop", "image/png"),
(96, 96, "crop", "image/png"),
(320, 240, "scale", "image/png"),
(640, 480, "scale", "image/png"),
)
else:
return ()
@defer.inlineCallbacks
def _generate_local_thumbnails(self, media_id, media_info):
media_type = media_info["media_type"]
requirements = self._get_thumbnail_requirements(media_type)
if not requirements:
return
input_path = self.filepaths.local_media_filepath(media_id)
thumbnailer = Thumbnailer(input_path)
m_width = thumbnailer.width
m_height = thumbnailer.height
if m_width * m_height >= self.max_image_pixels:
logger.info(
"Image too large to thumbnail %r x %r > %r",
m_width, m_height, self.max_image_pixels
)
return
scales = set()
crops = set()
for r_width, r_height, r_method, r_type in requirements:
if r_method == "scale":
t_width, t_height = thumbnailer.aspect(r_width, r_height)
scales.add((
min(m_width, t_width), min(m_height, t_height), r_type,
))
elif r_method == "crop":
crops.add((r_width, r_height, r_type))
for t_width, t_height, t_type in scales:
t_method = "scale"
t_path = self.filepaths.local_media_thumbnail(
media_id, t_width, t_height, t_type, t_method
)
self._makedirs(t_path)
t_len = thumbnailer.scale(t_path, t_width, t_height, t_type)
yield self.store.store_local_thumbnail(
media_id, t_width, t_height, t_type, t_method, t_len
)
for t_width, t_height, t_type in crops:
if (t_width, t_height, t_type) in scales:
# If the aspect ratio of the cropped thumbnail matches a purely
# scaled one then there is no point in calculating a separate
# thumbnail.
continue
t_method = "crop"
t_path = self.filepaths.local_media_thumbnail(
media_id, t_width, t_height, t_type, t_method
)
self._makedirs(t_path)
t_len = thumbnailer.crop(t_path, t_width, t_height, t_type)
yield self.store.store_local_thumbnail(
media_id, t_width, t_height, t_type, t_method, t_len
)
defer.returnValue({
"width": m_width,
"height": m_height,
})
@defer.inlineCallbacks
def _generate_remote_thumbnails(self, server_name, media_id, media_info):
media_type = media_info["media_type"]
file_id = media_info["filesystem_id"]
requirements = self._get_thumbnail_requirements(media_type)
if not requirements:
return
input_path = self.filepaths.remote_media_filepath(server_name, file_id)
thumbnailer = Thumbnailer(input_path)
m_width = thumbnailer.width
m_height = thumbnailer.height
if m_width * m_height >= self.max_image_pixels:
logger.info(
"Image too large to thumbnail %r x %r > %r",
m_width, m_height, self.max_image_pixels
)
return
scales = set()
crops = set()
for r_width, r_height, r_method, r_type in requirements:
if r_method == "scale":
t_width, t_height = thumbnailer.aspect(r_width, r_height)
scales.add((
min(m_width, t_width), min(m_height, t_height), r_type,
))
elif r_method == "crop":
crops.add((r_width, r_height, r_type))
for t_width, t_height, t_type in scales:
t_method = "scale"
t_path = self.filepaths.remote_media_thumbnail(
server_name, file_id, t_width, t_height, t_type, t_method
)
self._makedirs(t_path)
t_len = thumbnailer.scale(t_path, t_width, t_height, t_type)
yield self.store.store_remote_media_thumbnail(
server_name, media_id, file_id,
t_width, t_height, t_type, t_method, t_len
)
for t_width, t_height, t_type in crops:
if (t_width, t_height, t_type) in scales:
# If the aspect ratio of the cropped thumbnail matches a purely
# scaled one then there is no point in calculating a separate
# thumbnail.
continue
t_method = "crop"
t_path = self.filepaths.remote_media_thumbnail(
server_name, file_id, t_width, t_height, t_type, t_method
)
self._makedirs(t_path)
t_len = thumbnailer.crop(t_path, t_width, t_height, t_type)
yield self.store.store_remote_media_thumbnail(
server_name, media_id, file_id,
t_width, t_height, t_type, t_method, t_len
)
defer.returnValue({
"width": m_width,
"height": m_height,
})

View file

@ -0,0 +1,68 @@
# -*- coding: utf-8 -*-
# Copyright 2014 OpenMarket Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .base_resource import BaseMediaResource
from twisted.web.server import NOT_DONE_YET
from twisted.internet import defer
import logging
logger = logging.getLogger(__name__)
class DownloadResource(BaseMediaResource):
def render_GET(self, request):
self._async_render_GET(request)
return NOT_DONE_YET
@BaseMediaResource.catch_errors
@defer.inlineCallbacks
def _async_render_GET(self, request):
try:
server_name, media_id = request.postpath
except:
self._respond_404(request)
return
if server_name == self.server_name:
yield self._respond_local_file(request, media_id)
else:
yield self._respond_remote_file(request, server_name, media_id)
@defer.inlineCallbacks
def _respond_local_file(self, request, media_id):
media_info = yield self.store.get_local_media(media_id)
if not media_info:
self._respond_404()
return
media_type = media_info["media_type"]
file_path = self.filepaths.local_media_filepath(media_id)
yield self._respond_with_file(request, media_type, file_path)
@defer.inlineCallbacks
def _respond_remote_file(self, request, server_name, media_id):
media_info = yield self._get_remote_media(server_name, media_id)
media_type = media_info["media_type"]
filesystem_id = media_info["filesystem_id"]
file_path = self.filepaths.remote_media_filepath(
server_name, filesystem_id
)
yield self._respond_with_file(request, media_type, file_path)

View file

@ -0,0 +1,67 @@
# -*- coding: utf-8 -*-
# Copyright 2014 OpenMarket Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
class MediaFilePaths(object):
def __init__(self, base_path):
self.base_path = base_path
def default_thumbnail(self, default_top_level, default_sub_type, width,
height, content_type, method):
top_level_type, sub_type = content_type.split("/")
file_name = "%i-%i-%s-%s-%s" % (
width, height, top_level_type, sub_type, method
)
return os.path.join(
self.base_path, "default_thumbnails", default_top_level,
default_sub_type, file_name
)
def local_media_filepath(self, media_id):
return os.path.join(
self.base_path, "local_content",
media_id[0:2], media_id[2:4], media_id[4:]
)
def local_media_thumbnail(self, media_id, width, height, content_type,
method):
top_level_type, sub_type = content_type.split("/")
file_name = "%i-%i-%s-%s-%s" % (
width, height, top_level_type, sub_type, method
)
return os.path.join(
self.base_path, "local_thumbnails",
media_id[0:2], media_id[2:4], media_id[4:],
file_name
)
def remote_media_filepath(self, server_name, file_id):
return os.path.join(
self.base_path, "remote_content", server_name,
file_id[0:2], file_id[2:4], file_id[4:]
)
def remote_media_thumbnail(self, server_name, file_id, width, height,
content_type, method):
top_level_type, sub_type = content_type.split("/")
file_name = "%i-%i-%s-%s" % (width, height, top_level_type, sub_type)
return os.path.join(
self.base_path, "remote_thumbnail", server_name,
file_id[0:2], file_id[2:4], file_id[4:],
file_name
)

View file

@ -0,0 +1,77 @@
# -*- coding: utf-8 -*-
# Copyright 2014 OpenMarket Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .upload_resource import UploadResource
from .download_resource import DownloadResource
from .thumbnail_resource import ThumbnailResource
from .filepath import MediaFilePaths
from twisted.web.resource import Resource
import logging
logger = logging.getLogger(__name__)
class MediaRepositoryResource(Resource):
"""Profiles file uploading and downloading.
Uploads are POSTed to a resource which returns a token which is used to GET
the download::
=> POST /_matrix/media/v1/upload HTTP/1.1
Content-Type: <media-type>
<media>
<= HTTP/1.1 200 OK
Content-Type: application/json
{ "token": <media-id> }
=> GET /_matrix/media/v1/download/<media-id> HTTP/1.1
<= HTTP/1.1 200 OK
Content-Type: <media-type>
Content-Disposition: attachment;filename=<upload-filename>
<media>
Clients can get thumbnails by supplying a desired width and height and
thumbnailing method::
=> GET /_matrix/media/v1
/thumbnail/<media-id>?width=<w>&height=<h>&method=<m> HTTP/1.1
<= HTTP/1.1 200 OK
Content-Type: image/jpeg or image/png
<thumbnail>
The thumbnail methods are "crop" and "scale". "scale" trys to return an
image where either the width or the height is smaller than the requested
size. The client should then scale and letterbox the image if it needs to
fit within a given rectangle. "crop" trys to return an image where the
width and height are close to the requested size and the aspect matches
the requested size. The client should scale the image if it needs to fit
within a given rectangle.
"""
def __init__(self, hs):
Resource.__init__(self)
filepaths = MediaFilePaths(hs.config.media_store_path)
self.putChild("upload", UploadResource(hs, filepaths))
self.putChild("download", DownloadResource(hs, filepaths))
self.putChild("thumbnail", ThumbnailResource(hs, filepaths))

View file

@ -0,0 +1,182 @@
# -*- coding: utf-8 -*-
# Copyright 2014 OpenMarket Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .base_resource import BaseMediaResource
from twisted.web.server import NOT_DONE_YET
from twisted.internet import defer
import logging
logger = logging.getLogger(__name__)
class ThumbnailResource(BaseMediaResource):
isLeaf = True
def render_GET(self, request):
self._async_render_GET(request)
return NOT_DONE_YET
@BaseMediaResource.catch_errors
@defer.inlineCallbacks
def _async_render_GET(self, request):
server_name, media_id = self._parse_media_id(request)
width = self._parse_integer(request, "width")
height = self._parse_integer(request, "height")
method = self._parse_string(request, "method", "scale")
m_type = self._parse_string(request, "type", "image/png")
if server_name == self.server_name:
yield self._respond_local_thumbnail(
request, media_id, width, height, method, m_type
)
else:
yield self._respond_remote_thumbnail(
request, server_name, media_id,
width, height, method, m_type
)
@defer.inlineCallbacks
def _respond_local_thumbnail(self, request, media_id, width, height,
method, m_type):
media_info = yield self.store.get_local_media(media_id)
if not media_info:
self._respond_404(request)
return
thumbnail_infos = yield self.store.get_local_media_thumbnails(media_id)
if thumbnail_infos:
thumbnail_info = self._select_thumbnail(
width, height, method, m_type, thumbnail_infos
)
t_width = thumbnail_info["thumbnail_width"]
t_height = thumbnail_info["thumbnail_height"]
t_type = thumbnail_info["thumbnail_type"]
t_method = thumbnail_info["thumbnail_method"]
file_path = self.filepaths.local_media_thumbnail(
media_id, t_width, t_height, t_type, t_method,
)
yield self._respond_with_file(request, t_type, file_path)
else:
yield self._respond_default_thumbnail(
request, media_info, width, height, method, m_type,
)
@defer.inlineCallbacks
def _respond_remote_thumbnail(self, request, server_name, media_id, width,
height, method, m_type):
# TODO: Don't download the whole remote file
# We should proxy the thumbnail from the remote server instead.
media_info = yield self._get_remote_media(server_name, media_id)
thumbnail_infos = yield self.store.get_remote_media_thumbnails(
server_name, media_id,
)
if thumbnail_infos:
thumbnail_info = self._select_thumbnail(
width, height, method, m_type, thumbnail_infos
)
t_width = thumbnail_info["thumbnail_width"]
t_height = thumbnail_info["thumbnail_height"]
t_type = thumbnail_info["thumbnail_type"]
t_method = thumbnail_info["thumbnail_method"]
file_id = thumbnail_info["filesystem_id"]
file_path = self.filepaths.remote_media_thumbnail(
server_name, file_id, t_width, t_height, t_type, t_method,
)
yield self._respond_with_file(request, t_type, file_path)
else:
yield self._respond_default_thumbnail(
request, media_info, width, height, method, m_type,
)
@defer.inlineCallbacks
def _respond_default_thumbnail(self, request, media_info, width, height,
method, m_type):
media_type = media_info["media_type"]
top_level_type = media_type.split("/")[0]
sub_type = media_type.split("/")[-1].split(";")[0]
thumbnail_infos = yield self.store.get_default_thumbnails(
top_level_type, sub_type,
)
if not thumbnail_infos:
thumbnail_infos = yield self.store.get_default_thumbnails(
top_level_type, "_default",
)
if not thumbnail_infos:
thumbnail_infos = yield self.store.get_default_thumbnails(
"_default", "_default",
)
if not thumbnail_infos:
self._respond_404(request)
return
thumbnail_info = self._select_thumbnail(
width, height, "crop", m_type, thumbnail_infos
)
t_width = thumbnail_info["thumbnail_width"]
t_height = thumbnail_info["thumbnail_height"]
t_type = thumbnail_info["thumbnail_type"]
t_method = thumbnail_info["thumbnail_method"]
file_path = self.filepaths.default_thumbnail(
top_level_type, sub_type, t_width, t_height, t_type, t_method,
)
yield self.respond_with_file(request, t_type, file_path)
def _select_thumbnail(self, desired_width, desired_height, desired_method,
desired_type, thumbnail_infos):
d_w = desired_width
d_h = desired_height
if desired_method.lower() == "crop":
info_list = []
for info in thumbnail_infos:
t_w = info["thumbnail_width"]
t_h = info["thumbnail_height"]
t_method = info["thumbnail_method"]
if t_method == "scale" or t_method == "crop":
aspect_quality = abs(d_w * t_h - d_h * t_w)
size_quality = abs((d_w - t_w) * (d_h - t_h))
type_quality = desired_type != info["thumbnail_type"]
length_quality = info["thumbnail_length"]
info_list.append((
aspect_quality, size_quality, type_quality,
length_quality, info
))
return min(info_list)[-1]
else:
info_list = []
for info in thumbnail_infos:
t_w = info["thumbnail_width"]
t_h = info["thumbnail_height"]
t_method = info["thumbnail_method"]
if t_method == "scale" and (t_w >= d_w or t_h >= d_h):
size_quality = abs((d_w - t_w) * (d_h - t_h))
type_quality = desired_type != info["thumbnail_type"]
length_quality = info["thumbnail_length"]
info_list.append((
size_quality, type_quality, length_quality, info
))
return min(info_list)[-1]

View file

@ -0,0 +1,89 @@
# -*- coding: utf-8 -*-
# Copyright 2014 OpenMarket Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import PIL.Image as Image
from io import BytesIO
class Thumbnailer(object):
FORMATS = {
"image/jpeg": "JPEG",
"image/png": "PNG",
}
def __init__(self, input_path):
self.image = Image.open(input_path)
self.width, self.height = self.image.size
def aspect(self, max_width, max_height):
"""Calculate the largest size that preserves aspect ratio which
fits within the given rectangle::
(w_in / h_in) = (w_out / h_out)
w_out = min(w_max, h_max * (w_in / h_in))
h_out = min(h_max, w_max * (h_in / w_in))
Args:
max_width: The largest possible width.
max_height: The larget possible height.
"""
if max_width * self.height < max_height * self.width:
return (max_width, (max_width * self.height) // self.width)
else:
return ((max_height * self.width) // self.height, max_height)
def scale(self, output_path, width, height, output_type):
"""Rescales the image to the given dimensions"""
scaled = self.image.resize((width, height), Image.BILINEAR)
return self.save_image(scaled, output_type, output_path)
def crop(self, output_path, width, height, output_type):
"""Rescales and crops the image to the given dimensions preserving
aspect::
(w_in / h_in) = (w_scaled / h_scaled)
w_scaled = max(w_out, h_out * (w_in / h_in))
h_scaled = max(h_out, w_out * (h_in / w_in))
Args:
max_width: The largest possible width.
max_height: The larget possible height.
"""
if width * self.height > height * self.width:
scaled_height = (width * self.height) // self.width
scaled_image = self.image.resize(
(width, scaled_height), Image.BILINEAR
)
crop_top = (scaled_height - height) // 2
crop_bottom = height + crop_top
cropped = scaled_image.crop((0, crop_top, width, crop_bottom))
else:
scaled_width = (height * self.width) // self.height
scaled_image = self.image.resize(
(scaled_width, height), Image.BILINEAR
)
crop_left = (scaled_width - width) // 2
crop_right = width + crop_left
cropped = scaled_image.crop((crop_left, 0, crop_right, height))
return self.save_image(cropped, output_type, output_path)
def save_image(self, output_image, output_type, output_path):
output_bytes_io = BytesIO()
output_image.save(output_bytes_io, self.FORMATS[output_type])
output_bytes = output_bytes_io.getvalue()
with open(output_path, "wb") as output_file:
output_file.write(output_bytes)
return len(output_bytes)

View file

@ -0,0 +1,111 @@
# -*- coding: utf-8 -*-
# Copyright 2014 OpenMarket Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from synapse.http.server import respond_with_json
from synapse.util.stringutils import random_string
from synapse.api.errors import (
cs_exception, SynapseError, CodeMessageException
)
from twisted.web.server import NOT_DONE_YET
from twisted.internet import defer
from .base_resource import BaseMediaResource
import logging
logger = logging.getLogger(__name__)
class UploadResource(BaseMediaResource):
def render_POST(self, request):
self._async_render_POST(request)
return NOT_DONE_YET
def render_OPTIONS(self, request):
respond_with_json(request, 200, {}, send_cors=True)
return NOT_DONE_YET
@defer.inlineCallbacks
def _async_render_POST(self, request):
try:
auth_user = yield self.auth.get_user_by_req(request)
# TODO: The checks here are a bit late. The content will have
# already been uploaded to a tmp file at this point
content_length = request.getHeader("Content-Length")
if content_length is None:
raise SynapseError(
msg="Request must specify a Content-Length", code=400
)
if int(content_length) > self.max_upload_size:
raise SynapseError(
msg="Upload request body is too large",
code=413,
)
headers = request.requestHeaders
if headers.hasHeader("Content-Type"):
media_type = headers.getRawHeaders("Content-Type")[0]
else:
raise SynapseError(
msg="Upload request missing 'Content-Type'",
code=400,
)
#if headers.hasHeader("Content-Disposition"):
# disposition = headers.getRawHeaders("Content-Disposition")[0]
# TODO(markjh): parse content-dispostion
media_id = random_string(24)
fname = self.filepaths.local_media_filepath(media_id)
self._makedirs(fname)
# This shouldn't block for very long because the content will have
# already been uploaded at this point.
with open(fname, "wb") as f:
f.write(request.content.read())
yield self.store.store_local_media(
media_id=media_id,
media_type=media_type,
time_now_ms=self.clock.time_msec(),
upload_name=None,
media_length=content_length,
user_id=auth_user,
)
media_info = {
"media_type": media_type,
"media_length": content_length,
}
yield self._generate_local_thumbnails(media_id, media_info)
respond_with_json(
request, 200, {"content_token": media_id}, send_cors=True
)
except CodeMessageException as e:
logger.exception(e)
respond_with_json(request, e.code, cs_exception(e), send_cors=True)
except:
logger.exception("Failed to store file")
respond_with_json(
request,
500,
{"error": "Internal server error"},
send_cors=True
)

View file

@ -78,6 +78,7 @@ class BaseHomeServer(object):
'resource_for_web_client',
'resource_for_content_repo',
'resource_for_server_key',
'resource_for_media_repository',
'event_sources',
'ratelimiter',
'keyring',

View file

@ -33,6 +33,7 @@ from .stream import StreamStore
from .transactions import TransactionStore
from .keys import KeyStore
from .event_federation import EventFederationStore
from .media_repository import MediaRepositoryStore
from .state import StateStore
from .signatures import SignatureStore
@ -62,6 +63,7 @@ SCHEMAS = [
"state",
"event_edges",
"event_signatures",
"media_repository",
]
@ -81,7 +83,9 @@ class DataStore(RoomMemberStore, RoomStore,
RegistrationStore, StreamStore, ProfileStore, FeedbackStore,
PresenceStore, TransactionStore,
DirectoryStore, KeyStore, StateStore, SignatureStore,
EventFederationStore, ):
EventFederationStore,
MediaRepositoryStore,
):
def __init__(self, hs):
super(DataStore, self).__init__(hs)

View file

@ -0,0 +1,129 @@
# -*- coding: utf-8 -*-
# Copyright 2014 OpenMarket Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from _base import SQLBaseStore
class MediaRepositoryStore(SQLBaseStore):
"""Persistence for attachments and avatars"""
def get_default_thumbnails(self, top_level_type, sub_type):
return []
def get_local_media(self, media_id):
"""Get the metadata for a local piece of media
Returns:
None if the meia_id doesn't exist.
"""
return self._simple_select_one(
"local_media_repository",
{"media_id": media_id},
("media_type", "media_length", "upload_name", "created_ts"),
allow_none=True,
)
def store_local_media(self, media_id, media_type, time_now_ms, upload_name,
media_length, user_id):
return self._simple_insert(
"local_media_repository",
{
"media_id": media_id,
"media_type": media_type,
"created_ts": time_now_ms,
"upload_name": upload_name,
"media_length": media_length,
"user_id": user_id.to_string(),
}
)
def get_local_media_thumbnails(self, media_id):
return self._simple_select_list(
"local_media_repository_thumbnails",
{"media_id": media_id},
(
"thumbnail_width", "thumbnail_height", "thumbnail_method",
"thumbnail_type", "thumbnail_length",
)
)
def store_local_thumbnail(self, media_id, thumbnail_width,
thumbnail_height, thumbnail_type,
thumbnail_method, thumbnail_length):
return self._simple_insert(
"local_media_repository_thumbnails",
{
"media_id": media_id,
"thumbnail_width": thumbnail_width,
"thumbnail_height": thumbnail_height,
"thumbnail_method": thumbnail_method,
"thumbnail_type": thumbnail_type,
"thumbnail_length": thumbnail_length,
}
)
def get_cached_remote_media(self, origin, media_id):
return self._simple_select_one(
"remote_media_cache",
{"media_origin": origin, "media_id": media_id},
(
"media_type", "media_length", "upload_name", "created_ts",
"filesystem_id",
),
allow_none=True,
)
def store_cached_remote_media(self, origin, media_id, media_type,
media_length, time_now_ms, upload_name,
filesystem_id):
return self._simple_insert(
"remote_media_cache",
{
"media_origin": origin,
"media_id": media_id,
"media_type": media_type,
"media_length": media_length,
"created_ts": time_now_ms,
"upload_name": upload_name,
"filesystem_id": filesystem_id,
}
)
def get_remote_media_thumbnails(self, origin, media_id):
return self._simple_select_list(
"remote_media_cache_thumbnails",
{"media_origin": origin, "media_id": media_id},
(
"thumbnail_width", "thumbnail_height", "thumbnail_method",
"thumbnail_type", "thumbnail_length", "filesystem_id",
)
)
def store_remote_media_thumbnail(self, origin, media_id, filesystem_id,
thumbnail_width, thumbnail_height,
thumbnail_type, thumbnail_method,
thumbnail_length):
return self._simple_insert(
"remote_media_cache_thumbnails",
{
"media_origin": origin,
"media_id": media_id,
"thumbnail_width": thumbnail_width,
"thumbnail_height": thumbnail_height,
"thumbnail_method": thumbnail_method,
"thumbnail_type": thumbnail_type,
"thumbnail_length": thumbnail_length,
"filesystem_id": filesystem_id,
}
)

View file

@ -0,0 +1,68 @@
/* Copyright 2014 OpenMarket Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
CREATE TABLE IF NOT EXISTS local_media_repository (
media_id TEXT, -- The id used to refer to the media.
media_type TEXT, -- The MIME-type of the media.
media_length INTEGER, -- Length of the media in bytes.
created_ts INTEGER, -- When the content was uploaded in ms.
upload_name TEXT, -- The name the media was uploaded with.
user_id TEXT, -- The user who uploaded the file.
CONSTRAINT uniqueness UNIQUE (media_id)
);
CREATE TABLE IF NOT EXISTS local_media_repository_thumbnails (
media_id TEXT, -- The id used to refer to the media.
thumbnail_width INTEGER, -- The width of the thumbnail in pixels.
thumbnail_height INTEGER, -- The height of the thumbnail in pixels.
thumbnail_type TEXT, -- The MIME-type of the thumbnail.
thumbnail_method TEXT, -- The method used to make the thumbnail.
thumbnail_length INTEGER, -- The length of the thumbnail in bytes.
CONSTRAINT uniqueness UNIQUE (
media_id, thumbnail_width, thumbnail_height, thumbnail_type
)
);
CREATE INDEX IF NOT EXISTS local_media_repository_thumbnails_media_id
ON local_media_repository_thumbnails (media_id);
CREATE TABLE IF NOT EXISTS remote_media_cache (
media_origin TEXT, -- The remote HS the media came from.
media_id TEXT, -- The id used to refer to the media on that server.
media_type TEXT, -- The MIME-type of the media.
created_ts INTEGER, -- When the content was uploaded in ms.
upload_name TEXT, -- The name the media was uploaded with.
media_length INTEGER, -- Length of the media in bytes.
filesystem_id TEXT, -- The name used to store the media on disk.
CONSTRAINT uniqueness UNIQUE (media_origin, media_id)
);
CREATE TABLE IF NOT EXISTS remote_media_cache_thumbnails (
media_origin TEXT, -- The remote HS the media came from.
media_id TEXT, -- The id used to refer to the media.
thumbnail_width INTEGER, -- The width of the thumbnail in pixels.
thumbnail_height INTEGER, -- The height of the thumbnail in pixels.
thumbnail_method TEXT, -- The method used to make the thumbnail
thumbnail_type TEXT, -- The MIME-type of the thumbnail.
thumbnail_length INTEGER, -- The length of the thumbnail in bytes.
filesystem_id TEXT, -- The name used to store the media on disk.
CONSTRAINT uniqueness UNIQUE (
media_origin, media_id, thumbnail_width, thumbnail_height,
thumbnail_type, thumbnail_type
)
);
CREATE INDEX IF NOT EXISTS remote_media_cache_thumbnails_media_id
ON local_media_repository_thumbnails (media_id);