From 615d96ad6e5bba6f260cb03f4ec119bef51a3309 Mon Sep 17 00:00:00 2001 From: Dirk Klimpel <5740567+dklimpel@users.noreply.github.com> Date: Mon, 9 May 2022 23:43:02 +0200 Subject: [PATCH] Update SQL statements in docs for Synapse Admins (#12536) --- changelog.d/12536.doc | 1 + .../administration/useful_sql_for_admins.md | 191 +++++++++++------- 2 files changed, 122 insertions(+), 70 deletions(-) create mode 100644 changelog.d/12536.doc diff --git a/changelog.d/12536.doc b/changelog.d/12536.doc new file mode 100644 index 0000000000..4034c42076 --- /dev/null +++ b/changelog.d/12536.doc @@ -0,0 +1 @@ +Update SQL statements and replace use of old table `user_stats_historical` in docs for Synapse Admins. diff --git a/docs/usage/administration/useful_sql_for_admins.md b/docs/usage/administration/useful_sql_for_admins.md index d4aada3272..f3b97f9576 100644 --- a/docs/usage/administration/useful_sql_for_admins.md +++ b/docs/usage/administration/useful_sql_for_admins.md @@ -1,7 +1,10 @@ ## Some useful SQL queries for Synapse Admins ## Size of full matrix db -`SELECT pg_size_pretty( pg_database_size( 'matrix' ) );` +```sql +SELECT pg_size_pretty( pg_database_size( 'matrix' ) ); +``` + ### Result example: ``` pg_size_pretty @@ -9,39 +12,19 @@ pg_size_pretty 6420 MB (1 row) ``` -## Show top 20 larger rooms by state events count -```sql -SELECT r.name, s.room_id, s.current_state_events - FROM room_stats_current s - LEFT JOIN room_stats_state r USING (room_id) - ORDER BY current_state_events DESC - LIMIT 20; -``` - -and by state_group_events count: -```sql -SELECT rss.name, s.room_id, count(s.room_id) FROM state_groups_state s -LEFT JOIN room_stats_state rss USING (room_id) -GROUP BY s.room_id, rss.name -ORDER BY count(s.room_id) DESC -LIMIT 20; -``` -plus same, but with join removed for performance reasons: -```sql -SELECT s.room_id, count(s.room_id) FROM state_groups_state s -GROUP BY s.room_id -ORDER BY count(s.room_id) DESC -LIMIT 20; -``` ## Show top 20 larger tables by row count ```sql -SELECT relname, n_live_tup as rows - FROM pg_stat_user_tables +SELECT relname, n_live_tup AS "rows" + FROM pg_stat_user_tables ORDER BY n_live_tup DESC LIMIT 20; ``` -This query is quick, but may be very approximate, for exact number of rows use `SELECT COUNT(*) FROM `. +This query is quick, but may be very approximate, for exact number of rows use: +```sql +SELECT COUNT(*) FROM ; +``` + ### Result example: ``` state_groups_state - 161687170 @@ -66,46 +49,19 @@ device_lists_stream - 326903 user_directory_search - 316433 ``` -## Show top 20 rooms by new events count in last 1 day: -```sql -SELECT e.room_id, r.name, COUNT(e.event_id) cnt FROM events e -LEFT JOIN room_stats_state r USING (room_id) -WHERE e.origin_server_ts >= DATE_PART('epoch', NOW() - INTERVAL '1 day') * 1000 GROUP BY e.room_id, r.name ORDER BY cnt DESC LIMIT 20; -``` - -## Show top 20 users on homeserver by sent events (messages) at last month: -```sql -SELECT user_id, SUM(total_events) - FROM user_stats_historical - WHERE TO_TIMESTAMP(end_ts/1000) AT TIME ZONE 'UTC' > date_trunc('day', now() - interval '1 month') - GROUP BY user_id - ORDER BY SUM(total_events) DESC - LIMIT 20; -``` - -## Show last 100 messages from needed user, with room names: -```sql -SELECT e.room_id, r.name, e.event_id, e.type, e.content, j.json FROM events e - LEFT JOIN event_json j USING (room_id) - LEFT JOIN room_stats_state r USING (room_id) - WHERE sender = '@LOGIN:example.com' - AND e.type = 'm.room.message' - ORDER BY stream_ordering DESC - LIMIT 100; -``` - ## Show top 20 larger tables by storage size ```sql SELECT nspname || '.' || relname AS "relation", - pg_size_pretty(pg_total_relation_size(C.oid)) AS "total_size" - FROM pg_class C - LEFT JOIN pg_namespace N ON (N.oid = C.relnamespace) + pg_size_pretty(pg_total_relation_size(c.oid)) AS "total_size" + FROM pg_class c + LEFT JOIN pg_namespace n ON (n.oid = c.relnamespace) WHERE nspname NOT IN ('pg_catalog', 'information_schema') - AND C.relkind <> 'i' + AND c.relkind <> 'i' AND nspname !~ '^pg_toast' - ORDER BY pg_total_relation_size(C.oid) DESC + ORDER BY pg_total_relation_size(c.oid) DESC LIMIT 20; ``` + ### Result example: ``` public.state_groups_state - 27 GB @@ -130,8 +86,93 @@ public.device_lists_remote_cache - 124 MB public.state_group_edges - 122 MB ``` +## Show top 20 larger rooms by state events count +You get the same information when you use the +[admin API](../../admin_api/rooms.md#list-room-api) +and set parameter `order_by=state_events`. + +```sql +SELECT r.name, s.room_id, s.current_state_events + FROM room_stats_current s + LEFT JOIN room_stats_state r USING (room_id) + ORDER BY current_state_events DESC + LIMIT 20; +``` + +and by state_group_events count: +```sql +SELECT rss.name, s.room_id, COUNT(s.room_id) + FROM state_groups_state s + LEFT JOIN room_stats_state rss USING (room_id) + GROUP BY s.room_id, rss.name + ORDER BY COUNT(s.room_id) DESC + LIMIT 20; +``` + +plus same, but with join removed for performance reasons: +```sql +SELECT s.room_id, COUNT(s.room_id) + FROM state_groups_state s + GROUP BY s.room_id + ORDER BY COUNT(s.room_id) DESC + LIMIT 20; +``` + +## Show top 20 rooms by new events count in last 1 day: +```sql +SELECT e.room_id, r.name, COUNT(e.event_id) cnt + FROM events e + LEFT JOIN room_stats_state r USING (room_id) + WHERE e.origin_server_ts >= DATE_PART('epoch', NOW() - INTERVAL '1 day') * 1000 + GROUP BY e.room_id, r.name + ORDER BY cnt DESC + LIMIT 20; +``` + +## Show top 20 users on homeserver by sent events (messages) at last month: +Caution. This query does not use any indexes, can be slow and create load on the database. +```sql +SELECT COUNT(*), sender + FROM events + WHERE (type = 'm.room.encrypted' OR type = 'm.room.message') + AND origin_server_ts >= DATE_PART('epoch', NOW() - INTERVAL '1 month') * 1000 + GROUP BY sender + ORDER BY COUNT(*) DESC + LIMIT 20; +``` + +## Show last 100 messages from needed user, with room names: +```sql +SELECT e.room_id, r.name, e.event_id, e.type, e.content, j.json + FROM events e + LEFT JOIN event_json j USING (room_id) + LEFT JOIN room_stats_state r USING (room_id) + WHERE sender = '@LOGIN:example.com' + AND e.type = 'm.room.message' + ORDER BY stream_ordering DESC + LIMIT 100; +``` + ## Show rooms with names, sorted by events in this rooms -`echo "select event_json.room_id,room_stats_state.name from event_json,room_stats_state where room_stats_state.room_id=event_json.room_id" | psql synapse | sort | uniq -c | sort -n` + +**Sort and order with bash** +```bash +echo "SELECT event_json.room_id, room_stats_state.name FROM event_json, room_stats_state \ +WHERE room_stats_state.room_id = event_json.room_id" | psql -d synapse -h localhost -U synapse_user -t \ +| sort | uniq -c | sort -n +``` +Documentation for `psql` command line parameters: https://www.postgresql.org/docs/current/app-psql.html + +**Sort and order with SQL** +```sql +SELECT COUNT(*), event_json.room_id, room_stats_state.name + FROM event_json, room_stats_state + WHERE room_stats_state.room_id = event_json.room_id + GROUP BY event_json.room_id, room_stats_state.name + ORDER BY COUNT(*) DESC + LIMIT 50; +``` + ### Result example: ``` 9459 !FPUfgzXYWTKgIrwKxW:matrix.org | This Week in Matrix @@ -145,12 +186,22 @@ public.state_group_edges - 122 MB ``` ## Lookup room state info by list of room_id +You get the same information when you use the +[admin API](../../admin_api/rooms.md#room-details-api). ```sql -SELECT rss.room_id, rss.name, rss.canonical_alias, rss.topic, rss.encryption, rsc.joined_members, rsc.local_users_in_room, rss.join_rules -FROM room_stats_state rss -LEFT JOIN room_stats_current rsc USING (room_id) -WHERE room_id IN (WHERE room_id IN ( - '!OGEhHVWSdvArJzumhm:matrix.org', - '!YTvKGNlinIzlkMTVRl:matrix.org' -) -``` \ No newline at end of file +SELECT rss.room_id, rss.name, rss.canonical_alias, rss.topic, rss.encryption, + rsc.joined_members, rsc.local_users_in_room, rss.join_rules + FROM room_stats_state rss + LEFT JOIN room_stats_current rsc USING (room_id) + WHERE room_id IN ( WHERE room_id IN ( + '!OGEhHVWSdvArJzumhm:matrix.org', + '!YTvKGNlinIzlkMTVRl:matrix.org' + ); +``` + +## Show users and devices that have not been online for a while +```sql +SELECT user_id, device_id, user_agent, TO_TIMESTAMP(last_seen / 1000) AS "last_seen" + FROM devices + WHERE last_seen < DATE_PART('epoch', NOW() - INTERVAL '3 month') * 1000; +```