Merge branch 'develop' into madlittlemods/11850-migrate-to-opentelemetry

This commit is contained in:
Eric Eastwood 2022-09-12 15:34:02 -05:00
commit 6c40dfafcf
15 changed files with 256 additions and 158 deletions

View file

@ -486,5 +486,7 @@ jobs:
needs: ${{ toJSON(needs) }}
# The newsfile lint may be skipped on non PR builds
skippable:
# Cargo test is skipped if there is no changes on Rust code
skippable: |
lint-newsfile
cargo-test

1
changelog.d/13727.doc Normal file
View file

@ -0,0 +1 @@
Fix a typo in the documentation for the login ratelimiting configuration.

1
changelog.d/13745.misc Normal file
View file

@ -0,0 +1 @@
Remove old queries to join room memberships to current state events. Contributed by Nick @ Beeper (@fizzadar).

1
changelog.d/13759.misc Normal file
View file

@ -0,0 +1 @@
Add a check for editable installs if the Rust library needs rebuilding.

1
changelog.d/13769.misc Normal file
View file

@ -0,0 +1 @@
Add a stub Rust crate.

1
changelog.d/13778.misc Normal file
View file

@ -0,0 +1 @@
Add a stub Rust crate.

View file

@ -1394,7 +1394,7 @@ This option specifies several limits for login:
client is attempting to log into. Defaults to `per_second: 0.17`,
`burst_count: 3`.
* `failted_attempts` ratelimits login requests based on the account the
* `failed_attempts` ratelimits login requests based on the account the
client is attempting to log into, based on the amount of failed login
attempts for this account. Defaults to `per_second: 0.17`, `burst_count: 3`.

View file

@ -19,3 +19,7 @@ name = "synapse.synapse_rust"
[dependencies]
pyo3 = { version = "0.16.5", features = ["extension-module", "macros", "abi3", "abi3-py37"] }
[build-dependencies]
blake2 = "0.10.4"
hex = "0.4.3"

45
rust/build.rs Normal file
View file

@ -0,0 +1,45 @@
//! This build script calculates the hash of all files in the `src/`
//! directory and adds it as an environment variable during build time.
//!
//! This is used so that the python code can detect when the built native module
//! does not match the source in-tree, helping to detect the case where the
//! source has been updated but the library hasn't been rebuilt.
use std::path::PathBuf;
use blake2::{Blake2b512, Digest};
fn main() -> Result<(), std::io::Error> {
let mut dirs = vec![PathBuf::from("src")];
let mut paths = Vec::new();
while let Some(path) = dirs.pop() {
let mut entries = std::fs::read_dir(path)?
.map(|res| res.map(|e| e.path()))
.collect::<Result<Vec<_>, std::io::Error>>()?;
entries.sort();
for entry in entries {
if entry.is_dir() {
dirs.push(entry)
} else {
paths.push(entry.to_str().expect("valid rust paths").to_string());
}
}
}
paths.sort();
let mut hasher = Blake2b512::new();
for path in paths {
let bytes = std::fs::read(path)?;
hasher.update(bytes);
}
let hex_digest = hex::encode(hasher.finalize());
println!("cargo:rustc-env=SYNAPSE_RUST_DIGEST={hex_digest}");
Ok(())
}

View file

@ -1,5 +1,13 @@
use pyo3::prelude::*;
/// Returns the hash of all the rust source files at the time it was compiled.
///
/// Used by python to detect if the rust library is outdated.
#[pyfunction]
fn get_rust_file_digest() -> &'static str {
env!("SYNAPSE_RUST_DIGEST")
}
/// Formats the sum of two numbers as string.
#[pyfunction]
#[pyo3(text_signature = "(a, b, /)")]
@ -11,6 +19,6 @@ fn sum_as_string(a: usize, b: usize) -> PyResult<String> {
#[pymodule]
fn synapse_rust(_py: Python<'_>, m: &PyModule) -> PyResult<()> {
m.add_function(wrap_pyfunction!(sum_as_string, m)?)?;
m.add_function(wrap_pyfunction!(get_rust_file_digest, m)?)?;
Ok(())
}

View file

@ -1 +1,2 @@
def sum_as_string(a: int, b: int) -> str: ...
def get_rust_file_digest() -> str: ...

View file

@ -20,6 +20,8 @@ import json
import os
import sys
from synapse.util.rust import check_rust_lib_up_to_date
# Check that we're not running on an unsupported Python version.
if sys.version_info < (3, 7):
print("Synapse requires Python 3.7 or above.")
@ -78,3 +80,6 @@ if bool(os.environ.get("SYNAPSE_TEST_PATCH_LOG_CONTEXTS", False)):
from synapse.util.patch_inline_callbacks import do_patch
do_patch()
check_rust_lib_up_to_date()

View file

@ -32,10 +32,7 @@ import attr
from synapse.api.constants import EventTypes, Membership
from synapse.metrics import LaterGauge
from synapse.metrics.background_process_metrics import (
run_as_background_process,
wrap_as_background_process,
)
from synapse.metrics.background_process_metrics import wrap_as_background_process
from synapse.storage._base import SQLBaseStore, db_to_json, make_in_list_sql_clause
from synapse.storage.database import (
DatabasePool,
@ -91,16 +88,6 @@ class RoomMemberWorkerStore(EventsWorkerStore):
# at a time. Keyed by room_id.
self._joined_host_linearizer = Linearizer("_JoinedHostsCache")
# Is the current_state_events.membership up to date? Or is the
# background update still running?
self._current_state_events_membership_up_to_date = False
txn = db_conn.cursor(
txn_name="_check_safe_current_state_events_membership_updated"
)
self._check_safe_current_state_events_membership_updated_txn(txn)
txn.close()
if (
self.hs.config.worker.run_background_tasks
and self.hs.config.metrics.metrics_flags.known_servers
@ -157,34 +144,6 @@ class RoomMemberWorkerStore(EventsWorkerStore):
self._known_servers_count = max([count, 1])
return self._known_servers_count
def _check_safe_current_state_events_membership_updated_txn(
self, txn: LoggingTransaction
) -> None:
"""Checks if it is safe to assume the new current_state_events
membership column is up to date
"""
pending_update = self.db_pool.simple_select_one_txn(
txn,
table="background_updates",
keyvalues={"update_name": _CURRENT_STATE_MEMBERSHIP_UPDATE_NAME},
retcols=["update_name"],
allow_none=True,
)
self._current_state_events_membership_up_to_date = not pending_update
# If the update is still running, reschedule to run.
if pending_update:
self._clock.call_later(
15.0,
run_as_background_process,
"_check_safe_current_state_events_membership_updated",
self.db_pool.runInteraction,
"_check_safe_current_state_events_membership_updated",
self._check_safe_current_state_events_membership_updated_txn,
)
@cached(max_entries=100000, iterable=True)
async def get_users_in_room(self, room_id: str) -> List[str]:
"""
@ -212,31 +171,14 @@ class RoomMemberWorkerStore(EventsWorkerStore):
`get_current_hosts_in_room()` and so we can re-use the cache but it's
not horrible to have here either.
"""
# If we can assume current_state_events.membership is up to date
# then we can avoid a join, which is a Very Good Thing given how
# frequently this function gets called.
if self._current_state_events_membership_up_to_date:
sql = """
SELECT c.state_key FROM current_state_events as c
/* Get the depth of the event from the events table */
INNER JOIN events AS e USING (event_id)
WHERE c.type = 'm.room.member' AND c.room_id = ? AND membership = ?
/* Sorted by lowest depth first */
ORDER BY e.depth ASC;
"""
else:
sql = """
SELECT c.state_key FROM room_memberships as m
/* Get the depth of the event from the events table */
INNER JOIN events AS e USING (event_id)
INNER JOIN current_state_events as c
ON m.event_id = c.event_id
AND m.room_id = c.room_id
AND m.user_id = c.state_key
WHERE c.type = 'm.room.member' AND c.room_id = ? AND m.membership = ?
/* Sorted by lowest depth first */
ORDER BY e.depth ASC;
"""
sql = """
SELECT c.state_key FROM current_state_events as c
/* Get the depth of the event from the events table */
INNER JOIN events AS e USING (event_id)
WHERE c.type = 'm.room.member' AND c.room_id = ? AND membership = ?
/* Sorted by lowest depth first */
ORDER BY e.depth ASC;
"""
txn.execute(sql, (room_id, Membership.JOIN))
return [r[0] for r in txn]
@ -353,28 +295,14 @@ class RoomMemberWorkerStore(EventsWorkerStore):
# We do this all in one transaction to keep the cache small.
# FIXME: get rid of this when we have room_stats
# If we can assume current_state_events.membership is up to date
# then we can avoid a join, which is a Very Good Thing given how
# frequently this function gets called.
if self._current_state_events_membership_up_to_date:
# Note, rejected events will have a null membership field, so
# we we manually filter them out.
sql = """
SELECT count(*), membership FROM current_state_events
WHERE type = 'm.room.member' AND room_id = ?
AND membership IS NOT NULL
GROUP BY membership
"""
else:
sql = """
SELECT count(*), m.membership FROM room_memberships as m
INNER JOIN current_state_events as c
ON m.event_id = c.event_id
AND m.room_id = c.room_id
AND m.user_id = c.state_key
WHERE c.type = 'm.room.member' AND c.room_id = ?
GROUP BY m.membership
"""
# Note, rejected events will have a null membership field, so
# we we manually filter them out.
sql = """
SELECT count(*), membership FROM current_state_events
WHERE type = 'm.room.member' AND room_id = ?
AND membership IS NOT NULL
GROUP BY membership
"""
txn.execute(sql, (room_id,))
res: Dict[str, MemberSummary] = {}
@ -383,30 +311,18 @@ class RoomMemberWorkerStore(EventsWorkerStore):
# we order by membership and then fairly arbitrarily by event_id so
# heroes are consistent
if self._current_state_events_membership_up_to_date:
# Note, rejected events will have a null membership field, so
# we we manually filter them out.
sql = """
SELECT state_key, membership, event_id
FROM current_state_events
WHERE type = 'm.room.member' AND room_id = ?
AND membership IS NOT NULL
ORDER BY
CASE membership WHEN ? THEN 1 WHEN ? THEN 2 ELSE 3 END ASC,
event_id ASC
LIMIT ?
"""
else:
sql = """
SELECT c.state_key, m.membership, c.event_id
FROM room_memberships as m
INNER JOIN current_state_events as c USING (room_id, event_id)
WHERE c.type = 'm.room.member' AND c.room_id = ?
ORDER BY
CASE m.membership WHEN ? THEN 1 WHEN ? THEN 2 ELSE 3 END ASC,
c.event_id ASC
LIMIT ?
"""
# Note, rejected events will have a null membership field, so
# we we manually filter them out.
sql = """
SELECT state_key, membership, event_id
FROM current_state_events
WHERE type = 'm.room.member' AND room_id = ?
AND membership IS NOT NULL
ORDER BY
CASE membership WHEN ? THEN 1 WHEN ? THEN 2 ELSE 3 END ASC,
event_id ASC
LIMIT ?
"""
# 6 is 5 (number of heroes) plus 1, in case one of them is the calling user.
txn.execute(sql, (room_id, Membership.JOIN, Membership.INVITE, 6))
@ -649,27 +565,15 @@ class RoomMemberWorkerStore(EventsWorkerStore):
# We use `current_state_events` here and not `local_current_membership`
# as a) this gets called with remote users and b) this only gets called
# for rooms the server is participating in.
if self._current_state_events_membership_up_to_date:
sql = """
SELECT room_id, e.instance_name, e.stream_ordering
FROM current_state_events AS c
INNER JOIN events AS e USING (room_id, event_id)
WHERE
c.type = 'm.room.member'
AND c.state_key = ?
AND c.membership = ?
"""
else:
sql = """
SELECT room_id, e.instance_name, e.stream_ordering
FROM current_state_events AS c
INNER JOIN room_memberships AS m USING (room_id, event_id)
INNER JOIN events AS e USING (room_id, event_id)
WHERE
c.type = 'm.room.member'
AND c.state_key = ?
AND m.membership = ?
"""
sql = """
SELECT room_id, e.instance_name, e.stream_ordering
FROM current_state_events AS c
INNER JOIN events AS e USING (room_id, event_id)
WHERE
c.type = 'm.room.member'
AND c.state_key = ?
AND c.membership = ?
"""
txn.execute(sql, (user_id, Membership.JOIN))
return frozenset(
@ -707,27 +611,15 @@ class RoomMemberWorkerStore(EventsWorkerStore):
user_ids,
)
if self._current_state_events_membership_up_to_date:
sql = f"""
SELECT c.state_key, room_id, e.instance_name, e.stream_ordering
FROM current_state_events AS c
INNER JOIN events AS e USING (room_id, event_id)
WHERE
c.type = 'm.room.member'
AND c.membership = ?
AND {clause}
"""
else:
sql = f"""
SELECT c.state_key, room_id, e.instance_name, e.stream_ordering
FROM current_state_events AS c
INNER JOIN room_memberships AS m USING (room_id, event_id)
INNER JOIN events AS e USING (room_id, event_id)
WHERE
c.type = 'm.room.member'
AND m.membership = ?
AND {clause}
"""
sql = f"""
SELECT c.state_key, room_id, e.instance_name, e.stream_ordering
FROM current_state_events AS c
INNER JOIN events AS e USING (room_id, event_id)
WHERE
c.type = 'm.room.member'
AND c.membership = ?
AND {clause}
"""
txn.execute(sql, [Membership.JOIN] + args)

View file

@ -0,0 +1,52 @@
# Copyright 2022 Beeper
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Forces through the `current_state_events_membership` background job so checks
for its completion can be removed.
Note the background job must still remain defined in the database class.
"""
def run_upgrade(cur, database_engine, *args, **kwargs):
cur.execute("SELECT update_name FROM background_updates")
rows = cur.fetchall()
for row in rows:
if row[0] == "current_state_events_membership":
break
# No pending background job so nothing to do here
else:
return
# Populate membership field for all current_state_events, this may take
# a while but was originally handled via a background update in 2019.
cur.execute(
"""
UPDATE current_state_events
SET membership = (
SELECT membership FROM room_memberships
WHERE event_id = current_state_events.event_id
)
"""
)
# Finally, delete the background job because we've handled it above
cur.execute(
"""
DELETE FROM background_updates
WHERE update_name = 'current_state_events_membership'
"""
)

84
synapse/util/rust.py Normal file
View file

@ -0,0 +1,84 @@
# Copyright 2022 The Matrix.org Foundation C.I.C.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import sys
from hashlib import blake2b
import synapse
from synapse.synapse_rust import get_rust_file_digest
def check_rust_lib_up_to_date() -> None:
"""For editable installs check if the rust library is outdated and needs to
be rebuilt.
"""
if not _dist_is_editable():
return
synapse_dir = os.path.dirname(synapse.__file__)
synapse_root = os.path.abspath(os.path.join(synapse_dir, ".."))
# Double check we've not gone into site-packages...
if os.path.basename(synapse_root) == "site-packages":
return
# ... and it looks like the root of a python project.
if not os.path.exists("pyproject.toml"):
return
# Get the hash of all Rust source files
hash = _hash_rust_files_in_directory(os.path.join(synapse_root, "rust", "src"))
if hash != get_rust_file_digest():
raise Exception("Rust module outdated. Please rebuild using `poetry install`")
def _hash_rust_files_in_directory(directory: str) -> str:
"""Get the hash of all files in a directory (recursively)"""
directory = os.path.abspath(directory)
paths = []
dirs = [directory]
while dirs:
dir = dirs.pop()
with os.scandir(dir) as d:
for entry in d:
if entry.is_dir():
dirs.append(entry.path)
else:
paths.append(entry.path)
# We sort to make sure that we get a consistent and well-defined ordering.
paths.sort()
hasher = blake2b()
for path in paths:
with open(os.path.join(directory, path), "rb") as f:
hasher.update(f.read())
return hasher.hexdigest()
def _dist_is_editable() -> bool:
"""Is distribution an editable install?"""
for path_item in sys.path:
egg_link = os.path.join(path_item, "matrix-synapse.egg-link")
if os.path.isfile(egg_link):
return True
return False