Prepare for authenticated media freeze (#17433)

As part of the rollout of
[MSC3916](https://github.com/matrix-org/matrix-spec-proposals/blob/main/proposals/3916-authentication-for-media.md)
this PR adds support for designating authenticated media and ensuring
that authenticated media is not served over unauthenticated endpoints.
This commit is contained in:
Shay 2024-07-22 02:33:17 -07:00 committed by GitHub
parent d3f9afd8d9
commit dc8ddc6472
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
12 changed files with 362 additions and 12 deletions

View file

@ -0,0 +1 @@
Prepare for authenticated media freeze.

View file

@ -1863,6 +1863,18 @@ federation_rr_transactions_per_room_per_second: 40
## Media Store
Config options related to Synapse's media store.
---
### `enable_authenticated_media`
When set to true, all subsequent media uploads will be marked as authenticated, and will not be available over legacy
unauthenticated media endpoints (`/_matrix/media/(r0|v3|v1)/download` and `/_matrix/media/(r0|v3|v1)/thumbnail`) - requests for authenticated media over these endpoints will result in a 404. All media, including authenticated media, will be available over the authenticated media endpoints `_matrix/client/v1/media/download` and `_matrix/client/v1/media/thumbnail`. Media uploaded prior to setting this option to true will still be available over the legacy endpoints. Note if the setting is switched to false
after enabling, media marked as authenticated will be available over legacy endpoints. Defaults to false, but
this will change to true in a future Synapse release.
Example configuration:
```yaml
enable_authenticated_media: true
```
---
### `enable_media_repo`

View file

@ -119,18 +119,19 @@ BOOLEAN_COLUMNS = {
"e2e_room_keys": ["is_verified"],
"event_edges": ["is_state"],
"events": ["processed", "outlier", "contains_url"],
"local_media_repository": ["safe_from_quarantine"],
"local_media_repository": ["safe_from_quarantine", "authenticated"],
"per_user_experimental_features": ["enabled"],
"presence_list": ["accepted"],
"presence_stream": ["currently_active"],
"public_room_list_stream": ["visibility"],
"pushers": ["enabled"],
"redactions": ["have_censored"],
"remote_media_cache": ["authenticated"],
"room_stats_state": ["is_federatable"],
"rooms": ["is_public", "has_auth_chain_index"],
"users": ["shadow_banned", "approved", "locked", "suspended"],
"un_partial_stated_event_stream": ["rejection_status_changed"],
"users_who_share_rooms": ["share_private"],
"per_user_experimental_features": ["enabled"],
}

View file

@ -272,6 +272,10 @@ class ContentRepositoryConfig(Config):
remote_media_lifetime
)
self.enable_authenticated_media = config.get(
"enable_authenticated_media", False
)
def generate_config_section(self, data_dir_path: str, **kwargs: Any) -> str:
assert data_dir_path is not None
media_store = os.path.join(data_dir_path, "media_store")

View file

@ -430,6 +430,7 @@ class MediaRepository:
media_id: str,
name: Optional[str],
max_timeout_ms: int,
allow_authenticated: bool = True,
federation: bool = False,
) -> None:
"""Responds to requests for local media, if exists, or returns 404.
@ -442,6 +443,7 @@ class MediaRepository:
the filename in the Content-Disposition header of the response.
max_timeout_ms: the maximum number of milliseconds to wait for the
media to be uploaded.
allow_authenticated: whether media marked as authenticated may be served to this request
federation: whether the local media being fetched is for a federation request
Returns:
@ -451,6 +453,10 @@ class MediaRepository:
if not media_info:
return
if self.hs.config.media.enable_authenticated_media and not allow_authenticated:
if media_info.authenticated:
raise NotFoundError()
self.mark_recently_accessed(None, media_id)
media_type = media_info.media_type
@ -481,6 +487,7 @@ class MediaRepository:
max_timeout_ms: int,
ip_address: str,
use_federation_endpoint: bool,
allow_authenticated: bool = True,
) -> None:
"""Respond to requests for remote media.
@ -495,6 +502,8 @@ class MediaRepository:
ip_address: the IP address of the requester
use_federation_endpoint: whether to request the remote media over the new
federation `/download` endpoint
allow_authenticated: whether media marked as authenticated may be served to this
request
Returns:
Resolves once a response has successfully been written to request
@ -526,6 +535,7 @@ class MediaRepository:
self.download_ratelimiter,
ip_address,
use_federation_endpoint,
allow_authenticated,
)
# We deliberately stream the file outside the lock
@ -548,6 +558,7 @@ class MediaRepository:
max_timeout_ms: int,
ip_address: str,
use_federation: bool,
allow_authenticated: bool,
) -> RemoteMedia:
"""Gets the media info associated with the remote file, downloading
if necessary.
@ -560,6 +571,8 @@ class MediaRepository:
ip_address: IP address of the requester
use_federation: if a download is necessary, whether to request the remote file
over the federation `/download` endpoint
allow_authenticated: whether media marked as authenticated may be served to this
request
Returns:
The media info of the file
@ -581,6 +594,7 @@ class MediaRepository:
self.download_ratelimiter,
ip_address,
use_federation,
allow_authenticated,
)
# Ensure we actually use the responder so that it releases resources
@ -598,6 +612,7 @@ class MediaRepository:
download_ratelimiter: Ratelimiter,
ip_address: str,
use_federation_endpoint: bool,
allow_authenticated: bool,
) -> Tuple[Optional[Responder], RemoteMedia]:
"""Looks for media in local cache, if not there then attempt to
download from remote server.
@ -619,6 +634,11 @@ class MediaRepository:
"""
media_info = await self.store.get_cached_remote_media(server_name, media_id)
if self.hs.config.media.enable_authenticated_media and not allow_authenticated:
# if it isn't cached then don't fetch it or if it's authenticated then don't serve it
if not media_info or media_info.authenticated:
raise NotFoundError()
# file_id is the ID we use to track the file locally. If we've already
# seen the file then reuse the existing ID, otherwise generate a new
# one.
@ -792,6 +812,11 @@ class MediaRepository:
logger.info("Stored remote media in file %r", fname)
if self.hs.config.media.enable_authenticated_media:
authenticated = True
else:
authenticated = False
return RemoteMedia(
media_origin=server_name,
media_id=media_id,
@ -802,6 +827,7 @@ class MediaRepository:
filesystem_id=file_id,
last_access_ts=time_now_ms,
quarantined_by=None,
authenticated=authenticated,
)
async def _federation_download_remote_file(
@ -915,6 +941,11 @@ class MediaRepository:
logger.debug("Stored remote media in file %r", fname)
if self.hs.config.media.enable_authenticated_media:
authenticated = True
else:
authenticated = False
return RemoteMedia(
media_origin=server_name,
media_id=media_id,
@ -925,6 +956,7 @@ class MediaRepository:
filesystem_id=file_id,
last_access_ts=time_now_ms,
quarantined_by=None,
authenticated=authenticated,
)
def _get_thumbnail_requirements(
@ -1030,7 +1062,12 @@ class MediaRepository:
t_len = os.path.getsize(output_path)
await self.store.store_local_thumbnail(
media_id, t_width, t_height, t_type, t_method, t_len
media_id,
t_width,
t_height,
t_type,
t_method,
t_len,
)
return output_path

View file

@ -26,7 +26,7 @@ from typing import TYPE_CHECKING, List, Optional, Tuple, Type
from PIL import Image
from synapse.api.errors import Codes, SynapseError, cs_error
from synapse.api.errors import Codes, NotFoundError, SynapseError, cs_error
from synapse.config.repository import THUMBNAIL_SUPPORTED_MEDIA_FORMAT_MAP
from synapse.http.server import respond_with_json
from synapse.http.site import SynapseRequest
@ -274,6 +274,7 @@ class ThumbnailProvider:
m_type: str,
max_timeout_ms: int,
for_federation: bool,
allow_authenticated: bool = True,
) -> None:
media_info = await self.media_repo.get_local_media_info(
request, media_id, max_timeout_ms
@ -281,6 +282,12 @@ class ThumbnailProvider:
if not media_info:
return
# if the media the thumbnail is generated from is authenticated, don't serve the
# thumbnail over an unauthenticated endpoint
if self.hs.config.media.enable_authenticated_media and not allow_authenticated:
if media_info.authenticated:
raise NotFoundError()
thumbnail_infos = await self.store.get_local_media_thumbnails(media_id)
await self._select_and_respond_with_thumbnail(
request,
@ -307,14 +314,20 @@ class ThumbnailProvider:
desired_type: str,
max_timeout_ms: int,
for_federation: bool,
allow_authenticated: bool = True,
) -> None:
media_info = await self.media_repo.get_local_media_info(
request, media_id, max_timeout_ms
)
if not media_info:
return
# if the media the thumbnail is generated from is authenticated, don't serve the
# thumbnail over an unauthenticated endpoint
if self.hs.config.media.enable_authenticated_media and not allow_authenticated:
if media_info.authenticated:
raise NotFoundError()
thumbnail_infos = await self.store.get_local_media_thumbnails(media_id)
for info in thumbnail_infos:
t_w = info.width == desired_width
@ -381,14 +394,27 @@ class ThumbnailProvider:
max_timeout_ms: int,
ip_address: str,
use_federation: bool,
allow_authenticated: bool = True,
) -> None:
media_info = await self.media_repo.get_remote_media_info(
server_name, media_id, max_timeout_ms, ip_address, use_federation
server_name,
media_id,
max_timeout_ms,
ip_address,
use_federation,
allow_authenticated,
)
if not media_info:
respond_404(request)
return
# if the media the thumbnail is generated from is authenticated, don't serve the
# thumbnail over an unauthenticated endpoint
if self.hs.config.media.enable_authenticated_media and not allow_authenticated:
if media_info.authenticated:
respond_404(request)
return
thumbnail_infos = await self.store.get_remote_media_thumbnails(
server_name, media_id
)
@ -446,16 +472,28 @@ class ThumbnailProvider:
max_timeout_ms: int,
ip_address: str,
use_federation: bool,
allow_authenticated: bool = True,
) -> None:
# TODO: Don't download the whole remote file
# We should proxy the thumbnail from the remote server instead of
# downloading the remote file and generating our own thumbnails.
media_info = await self.media_repo.get_remote_media_info(
server_name, media_id, max_timeout_ms, ip_address, use_federation
server_name,
media_id,
max_timeout_ms,
ip_address,
use_federation,
allow_authenticated,
)
if not media_info:
return
# if the media the thumbnail is generated from is authenticated, don't serve the
# thumbnail over an unauthenticated endpoint
if self.hs.config.media.enable_authenticated_media and not allow_authenticated:
if media_info.authenticated:
raise NotFoundError()
thumbnail_infos = await self.store.get_remote_media_thumbnails(
server_name, media_id
)
@ -485,8 +523,8 @@ class ThumbnailProvider:
file_id: str,
url_cache: bool,
for_federation: bool,
server_name: Optional[str] = None,
media_info: Optional[LocalMedia] = None,
server_name: Optional[str] = None,
) -> None:
"""
Respond to a request with an appropriate thumbnail from the previously generated thumbnails.

View file

@ -84,7 +84,7 @@ class DownloadResource(RestServlet):
if self._is_mine_server_name(server_name):
await self.media_repo.get_local_media(
request, media_id, file_name, max_timeout_ms
request, media_id, file_name, max_timeout_ms, allow_authenticated=False
)
else:
allow_remote = parse_boolean(request, "allow_remote", default=True)
@ -106,4 +106,5 @@ class DownloadResource(RestServlet):
max_timeout_ms,
ip_address,
False,
allow_authenticated=False,
)

View file

@ -96,6 +96,7 @@ class ThumbnailResource(RestServlet):
m_type,
max_timeout_ms,
False,
allow_authenticated=False,
)
else:
await self.thumbnail_provider.respond_local_thumbnail(
@ -107,6 +108,7 @@ class ThumbnailResource(RestServlet):
m_type,
max_timeout_ms,
False,
allow_authenticated=False,
)
self.media_repo.mark_recently_accessed(None, media_id)
else:
@ -134,6 +136,7 @@ class ThumbnailResource(RestServlet):
m_type,
max_timeout_ms,
ip_address,
False,
use_federation=False,
allow_authenticated=False,
)
self.media_repo.mark_recently_accessed(server_name, media_id)

View file

@ -64,6 +64,7 @@ class LocalMedia:
quarantined_by: Optional[str]
safe_from_quarantine: bool
user_id: Optional[str]
authenticated: Optional[bool]
@attr.s(slots=True, frozen=True, auto_attribs=True)
@ -77,6 +78,7 @@ class RemoteMedia:
created_ts: int
last_access_ts: int
quarantined_by: Optional[str]
authenticated: Optional[bool]
@attr.s(slots=True, frozen=True, auto_attribs=True)
@ -218,6 +220,7 @@ class MediaRepositoryStore(MediaRepositoryBackgroundUpdateStore):
"last_access_ts",
"safe_from_quarantine",
"user_id",
"authenticated",
),
allow_none=True,
desc="get_local_media",
@ -235,6 +238,7 @@ class MediaRepositoryStore(MediaRepositoryBackgroundUpdateStore):
last_access_ts=row[6],
safe_from_quarantine=row[7],
user_id=row[8],
authenticated=row[9],
)
async def get_local_media_by_user_paginate(
@ -290,7 +294,8 @@ class MediaRepositoryStore(MediaRepositoryBackgroundUpdateStore):
last_access_ts,
quarantined_by,
safe_from_quarantine,
user_id
user_id,
authenticated
FROM local_media_repository
WHERE user_id = ?
ORDER BY {order_by_column} {order}, media_id ASC
@ -314,6 +319,7 @@ class MediaRepositoryStore(MediaRepositoryBackgroundUpdateStore):
quarantined_by=row[7],
safe_from_quarantine=bool(row[8]),
user_id=row[9],
authenticated=row[10],
)
for row in txn
]
@ -417,12 +423,18 @@ class MediaRepositoryStore(MediaRepositoryBackgroundUpdateStore):
time_now_ms: int,
user_id: UserID,
) -> None:
if self.hs.config.media.enable_authenticated_media:
authenticated = True
else:
authenticated = False
await self.db_pool.simple_insert(
"local_media_repository",
{
"media_id": media_id,
"created_ts": time_now_ms,
"user_id": user_id.to_string(),
"authenticated": authenticated,
},
desc="store_local_media_id",
)
@ -438,6 +450,11 @@ class MediaRepositoryStore(MediaRepositoryBackgroundUpdateStore):
user_id: UserID,
url_cache: Optional[str] = None,
) -> None:
if self.hs.config.media.enable_authenticated_media:
authenticated = True
else:
authenticated = False
await self.db_pool.simple_insert(
"local_media_repository",
{
@ -448,6 +465,7 @@ class MediaRepositoryStore(MediaRepositoryBackgroundUpdateStore):
"media_length": media_length,
"user_id": user_id.to_string(),
"url_cache": url_cache,
"authenticated": authenticated,
},
desc="store_local_media",
)
@ -638,6 +656,7 @@ class MediaRepositoryStore(MediaRepositoryBackgroundUpdateStore):
"filesystem_id",
"last_access_ts",
"quarantined_by",
"authenticated",
),
allow_none=True,
desc="get_cached_remote_media",
@ -654,6 +673,7 @@ class MediaRepositoryStore(MediaRepositoryBackgroundUpdateStore):
filesystem_id=row[4],
last_access_ts=row[5],
quarantined_by=row[6],
authenticated=row[7],
)
async def store_cached_remote_media(
@ -666,6 +686,11 @@ class MediaRepositoryStore(MediaRepositoryBackgroundUpdateStore):
upload_name: Optional[str],
filesystem_id: str,
) -> None:
if self.hs.config.media.enable_authenticated_media:
authenticated = True
else:
authenticated = False
await self.db_pool.simple_insert(
"remote_media_cache",
{
@ -677,6 +702,7 @@ class MediaRepositoryStore(MediaRepositoryBackgroundUpdateStore):
"upload_name": upload_name,
"filesystem_id": filesystem_id,
"last_access_ts": time_now_ms,
"authenticated": authenticated,
},
desc="store_cached_remote_media",
)

View file

@ -19,7 +19,7 @@
#
#
SCHEMA_VERSION = 85 # remember to update the list below when updating
SCHEMA_VERSION = 86 # remember to update the list below when updating
"""Represents the expectations made by the codebase about the database schema
This should be incremented whenever the codebase changes its requirements on the
@ -139,6 +139,9 @@ Changes in SCHEMA_VERSION = 84
Changes in SCHEMA_VERSION = 85
- Add a column `suspended` to the `users` table
Changes in SCHEMA_VERSION = 86
- Add a column `authenticated` to the tables `local_media_repository` and `remote_media_cache`
"""

View file

@ -0,0 +1,15 @@
--
-- This file is licensed under the Affero General Public License (AGPL) version 3.
--
-- Copyright (C) 2024 New Vector, Ltd
--
-- This program is free software: you can redistribute it and/or modify
-- it under the terms of the GNU Affero General Public License as
-- published by the Free Software Foundation, either version 3 of the
-- License, or (at your option) any later version.
--
-- See the GNU Affero General Public License for more details:
-- <https://www.gnu.org/licenses/agpl-3.0.html>.
ALTER TABLE remote_media_cache ADD COLUMN authenticated BOOLEAN DEFAULT FALSE NOT NULL;
ALTER TABLE local_media_repository ADD COLUMN authenticated BOOLEAN DEFAULT FALSE NOT NULL;

View file

@ -43,6 +43,7 @@ from twisted.python.failure import Failure
from twisted.test.proto_helpers import AccumulatingProtocol, MemoryReactor
from twisted.web.http_headers import Headers
from twisted.web.iweb import UNKNOWN_LENGTH, IResponse
from twisted.web.resource import Resource
from synapse.api.errors import HttpResponseException
from synapse.api.ratelimiting import Ratelimiter
@ -2466,3 +2467,211 @@ class DownloadAndThumbnailTestCase(unittest.HomeserverTestCase):
server_name=None,
)
)
configs = [
{"extra_config": {"dynamic_thumbnails": True}},
{"extra_config": {"dynamic_thumbnails": False}},
]
@parameterized_class(configs)
class AuthenticatedMediaTestCase(unittest.HomeserverTestCase):
extra_config: Dict[str, Any]
servlets = [
media.register_servlets,
login.register_servlets,
admin.register_servlets,
]
def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer:
config = self.default_config()
self.clock = clock
self.storage_path = self.mktemp()
self.media_store_path = self.mktemp()
os.mkdir(self.storage_path)
os.mkdir(self.media_store_path)
config["media_store_path"] = self.media_store_path
config["enable_authenticated_media"] = True
provider_config = {
"module": "synapse.media.storage_provider.FileStorageProviderBackend",
"store_local": True,
"store_synchronous": False,
"store_remote": True,
"config": {"directory": self.storage_path},
}
config["media_storage_providers"] = [provider_config]
config.update(self.extra_config)
return self.setup_test_homeserver(config=config)
def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
self.repo = hs.get_media_repository()
self.client = hs.get_federation_http_client()
self.store = hs.get_datastores().main
self.user = self.register_user("user", "pass")
self.tok = self.login("user", "pass")
def create_resource_dict(self) -> Dict[str, Resource]:
resources = super().create_resource_dict()
resources["/_matrix/media"] = self.hs.get_media_repository_resource()
return resources
def test_authenticated_media(self) -> None:
# upload some local media with authentication on
channel = self.make_request(
"POST",
"_matrix/media/v3/upload?filename=test_png_upload",
SMALL_PNG,
self.tok,
shorthand=False,
content_type=b"image/png",
custom_headers=[("Content-Length", str(67))],
)
self.assertEqual(channel.code, 200)
res = channel.json_body.get("content_uri")
assert res is not None
uri = res.split("mxc://")[1]
# request media over authenticated endpoint, should be found
channel2 = self.make_request(
"GET",
f"_matrix/client/v1/media/download/{uri}",
access_token=self.tok,
shorthand=False,
)
self.assertEqual(channel2.code, 200)
# request same media over unauthenticated media, should raise 404 not found
channel3 = self.make_request(
"GET", f"_matrix/media/v3/download/{uri}", shorthand=False
)
self.assertEqual(channel3.code, 404)
# check thumbnails as well
params = "?width=32&height=32&method=crop"
channel4 = self.make_request(
"GET",
f"/_matrix/client/v1/media/thumbnail/{uri}{params}",
shorthand=False,
access_token=self.tok,
)
self.assertEqual(channel4.code, 200)
params = "?width=32&height=32&method=crop"
channel5 = self.make_request(
"GET",
f"/_matrix/media/r0/thumbnail/{uri}{params}",
shorthand=False,
access_token=self.tok,
)
self.assertEqual(channel5.code, 404)
# Inject a piece of remote media.
file_id = "abcdefg12345"
file_info = FileInfo(server_name="lonelyIsland", file_id=file_id)
media_storage = self.hs.get_media_repository().media_storage
ctx = media_storage.store_into_file(file_info)
(f, fname) = self.get_success(ctx.__aenter__())
f.write(SMALL_PNG)
self.get_success(ctx.__aexit__(None, None, None))
# we write the authenticated status when storing media, so this should pick up
# config and authenticate the media
self.get_success(
self.store.store_cached_remote_media(
origin="lonelyIsland",
media_id="52",
media_type="image/png",
media_length=1,
time_now_ms=self.clock.time_msec(),
upload_name="remote_test.png",
filesystem_id=file_id,
)
)
# ensure we have thumbnails for the non-dynamic code path
if self.extra_config == {"dynamic_thumbnails": False}:
self.get_success(
self.repo._generate_thumbnails(
"lonelyIsland", "52", file_id, "image/png"
)
)
channel6 = self.make_request(
"GET",
"_matrix/client/v1/media/download/lonelyIsland/52",
access_token=self.tok,
shorthand=False,
)
self.assertEqual(channel6.code, 200)
channel7 = self.make_request(
"GET", f"_matrix/media/v3/download/{uri}", shorthand=False
)
self.assertEqual(channel7.code, 404)
params = "?width=32&height=32&method=crop"
channel8 = self.make_request(
"GET",
f"/_matrix/client/v1/media/thumbnail/lonelyIsland/52{params}",
shorthand=False,
access_token=self.tok,
)
self.assertEqual(channel8.code, 200)
channel9 = self.make_request(
"GET",
f"/_matrix/media/r0/thumbnail/lonelyIsland/52{params}",
shorthand=False,
access_token=self.tok,
)
self.assertEqual(channel9.code, 404)
# Inject a piece of local media that isn't authenticated
file_id = "abcdefg123456"
file_info = FileInfo(None, file_id=file_id)
ctx = media_storage.store_into_file(file_info)
(f, fname) = self.get_success(ctx.__aenter__())
f.write(SMALL_PNG)
self.get_success(ctx.__aexit__(None, None, None))
self.get_success(
self.store.db_pool.simple_insert(
"local_media_repository",
{
"media_id": "abcdefg123456",
"media_type": "image/png",
"created_ts": self.clock.time_msec(),
"upload_name": "test_local",
"media_length": 1,
"user_id": "someone",
"url_cache": None,
"authenticated": False,
},
desc="store_local_media",
)
)
# check that unauthenticated media is still available over both endpoints
channel9 = self.make_request(
"GET",
"/_matrix/client/v1/media/download/test/abcdefg123456",
shorthand=False,
access_token=self.tok,
)
self.assertEqual(channel9.code, 200)
channel10 = self.make_request(
"GET",
"/_matrix/media/r0/download/test/abcdefg123456",
shorthand=False,
access_token=self.tok,
)
self.assertEqual(channel10.code, 200)