synapse/v1.92/tcp_replication.html

401 lines
34 KiB
HTML
Raw Normal View History

<!DOCTYPE HTML>
<html lang="en" class="sidebar-visible no-js light">
<head>
<!-- Book generated using mdBook -->
<meta charset="UTF-8">
<title>TCP Replication - Synapse</title>
<!-- Custom HTML head -->
<meta content="text/html; charset=utf-8" http-equiv="Content-Type">
<meta name="description" content="">
<meta name="viewport" content="width=device-width, initial-scale=1">
<meta name="theme-color" content="#ffffff" />
<link rel="icon" href="favicon.svg">
<link rel="shortcut icon" href="favicon.png">
<link rel="stylesheet" href="css/variables.css">
<link rel="stylesheet" href="css/general.css">
<link rel="stylesheet" href="css/chrome.css">
<link rel="stylesheet" href="css/print.css" media="print">
<!-- Fonts -->
<link rel="stylesheet" href="FontAwesome/css/font-awesome.css">
<link rel="stylesheet" href="fonts/fonts.css">
<!-- Highlight.js Stylesheets -->
<link rel="stylesheet" href="highlight.css">
<link rel="stylesheet" href="tomorrow-night.css">
<link rel="stylesheet" href="ayu-highlight.css">
<!-- Custom theme stylesheets -->
<link rel="stylesheet" href="docs/website_files/table-of-contents.css">
<link rel="stylesheet" href="docs/website_files/remove-nav-buttons.css">
<link rel="stylesheet" href="docs/website_files/indent-section-headers.css">
<link rel="stylesheet" href="docs/website_files/version-picker.css">
</head>
<body>
<!-- Provide site root to javascript -->
<script type="text/javascript">
var path_to_root = "";
var default_theme = window.matchMedia("(prefers-color-scheme: dark)").matches ? "navy" : "light";
</script>
<!-- Work around some values being stored in localStorage wrapped in quotes -->
<script type="text/javascript">
try {
var theme = localStorage.getItem('mdbook-theme');
var sidebar = localStorage.getItem('mdbook-sidebar');
if (theme.startsWith('"') && theme.endsWith('"')) {
localStorage.setItem('mdbook-theme', theme.slice(1, theme.length - 1));
}
if (sidebar.startsWith('"') && sidebar.endsWith('"')) {
localStorage.setItem('mdbook-sidebar', sidebar.slice(1, sidebar.length - 1));
}
} catch (e) { }
</script>
<!-- Set the theme before any content is loaded, prevents flash -->
<script type="text/javascript">
var theme;
try { theme = localStorage.getItem('mdbook-theme'); } catch(e) { }
if (theme === null || theme === undefined) { theme = default_theme; }
var html = document.querySelector('html');
html.classList.remove('no-js')
html.classList.remove('light')
html.classList.add(theme);
html.classList.add('js');
</script>
<!-- Hide / unhide sidebar before it is displayed -->
<script type="text/javascript">
var html = document.querySelector('html');
var sidebar = 'hidden';
if (document.body.clientWidth >= 1080) {
try { sidebar = localStorage.getItem('mdbook-sidebar'); } catch(e) { }
sidebar = sidebar || 'visible';
}
html.classList.remove('sidebar-visible');
html.classList.add("sidebar-" + sidebar);
</script>
<nav id="sidebar" class="sidebar" aria-label="Table of contents">
<div class="sidebar-scrollbox">
<ol class="chapter"><li class="chapter-item expanded affix "><li class="part-title">Introduction</li><li class="chapter-item expanded "><a href="welcome_and_overview.html">Welcome and Overview</a></li><li class="chapter-item expanded affix "><li class="part-title">Setup</li><li class="chapter-item expanded "><a href="setup/installation.html">Installation</a></li><li class="chapter-item expanded "><a href="postgres.html">Using Postgres</a></li><li class="chapter-item expanded "><a href="reverse_proxy.html">Configuring a Reverse Proxy</a></li><li class="chapter-item expanded "><a href="setup/forward_proxy.html">Configuring a Forward/Outbound Proxy</a></li><li class="chapter-item expanded "><a href="turn-howto.html">Configuring a Turn Server</a></li><li><ol class="section"><li class="chapter-item expanded "><a href="setup/turn/coturn.html">coturn TURN server</a></li><li class="chapter-item expanded "><a href="setup/turn/eturnal.html">eturnal TURN server</a></li></ol></li><li class="chapter-item expanded "><a href="delegate.html">Delegation</a></li><li class="chapter-item expanded affix "><li class="part-title">Upgrading</li><li class="chapter-item expanded "><a href="upgrade.html">Upgrading between Synapse Versions</a></li><li class="chapter-item expanded affix "><li class="part-title">Usage</li><li class="chapter-item expanded "><a href="federate.html">Federation</a></li><li class="chapter-item expanded "><a href="usage/configuration/index.html">Configuration</a></li><li><ol class="section"><li class="chapter-item expanded "><a href="usage/configuration/config_documentation.html">Configuration Manual</a></li><li class="chapter-item expanded "><a href="usage/configuration/homeserver_sample_config.html">Homeserver Sample Config File</a></li><li class="chapter-item expanded "><a href="usage/configuration/logging_sample_config.html">Logging Sample Config File</a></li><li class="chapter-item expanded "><a href="structured_logging.html">Structured Logging</a></li><li class="chapter-item expanded "><a href="templates.html">Templates</a></li><li class="chapter-item expanded "><a href="usage/configuration/user_authentication/index.html">User Authentication</a></li><li><ol class="section"><li class="chapter-item expanded "><a href="usage/configuration/user_authentication/single_sign_on/index.html">Single-Sign On</a></li><li><ol class="section"><li class="chapter-item expanded "><a href="openid.html">OpenID Connect</a></li><li class="chapter-item expanded "><a href="usage/configuration/user_authentication/single_sign_on/saml.html">SAML</a></li><li class="chapter-item expanded "><a href="usage/configuration/user_authentication/single_sign_on/cas.html">CAS</a></li><li class="chapter-item expanded "><a href="sso_mapping_providers.html">SSO Mapping Providers</a></li></ol></li><li class="chapter-item expanded "><a href="password_auth_providers.html">Password Auth Providers</a></li><li class="chapter-item expanded "><a href="jwt.html">JSON Web Tokens</a></li><li class="chapter-item expanded "><a href="usage/configuration/user_authentication/refresh_tokens.html">Refresh Tokens</a></li></ol></li><li class="chapter-item expanded "><a href="CAPTCHA_SETUP.html">Registration Captcha</a></li><li class="chapter-item expanded "><a href="application_services.html">Application Services</a></li><li class="chapter-item expanded "><a href="server_notices.html">Server Notices</a></li><li class="chapter-item expanded "><a href="consent_tracking.html">Consent Tracking</a></li><li class="chapter-item expanded "><a href="user_directory.html">User Directory</a></li><li class="chapter-item expanded "><a href="message_retention_policies.html">Message Retention Policies</a></li><li class="chapter-item expanded "><a href="modules/index.html">Pluggable Modules</a></li><li><ol class="section"><li class="chapter-item expanded "><a href="modules/writing_a_module.html">Writing a module</a></li><li><ol class="section"><li class="chapter-item expanded "><a href="modules/spam_checker_callbacks.html">Spam checker callbacks</a></li><li class="chapter-item
</div>
<div id="sidebar-resize-handle" class="sidebar-resize-handle"></div>
</nav>
<div id="page-wrapper" class="page-wrapper">
<div class="page">
<div id="menu-bar-hover-placeholder"></div>
<div id="menu-bar" class="menu-bar sticky bordered">
<div class="left-buttons">
<button id="sidebar-toggle" class="icon-button" type="button" title="Toggle Table of Contents" aria-label="Toggle Table of Contents" aria-controls="sidebar">
<i class="fa fa-bars"></i>
</button>
<button id="theme-toggle" class="icon-button" type="button" title="Change theme" aria-label="Change theme" aria-haspopup="true" aria-expanded="false" aria-controls="theme-list">
<i class="fa fa-paint-brush"></i>
</button>
<ul id="theme-list" class="theme-popup" aria-label="Themes" role="menu">
<li role="none"><button role="menuitem" class="theme" id="light">Light (default)</button></li>
<li role="none"><button role="menuitem" class="theme" id="rust">Rust</button></li>
<li role="none"><button role="menuitem" class="theme" id="coal">Coal</button></li>
<li role="none"><button role="menuitem" class="theme" id="navy">Navy</button></li>
<li role="none"><button role="menuitem" class="theme" id="ayu">Ayu</button></li>
</ul>
<button id="search-toggle" class="icon-button" type="button" title="Search. (Shortkey: s)" aria-label="Toggle Searchbar" aria-expanded="false" aria-keyshortcuts="S" aria-controls="searchbar">
<i class="fa fa-search"></i>
</button>
<div class="version-picker">
<div class="dropdown">
<div class="select">
<span></span>
<i class="fa fa-chevron-down"></i>
</div>
<input type="hidden" name="version">
<ul class="dropdown-menu">
<!-- Versions will be added dynamically in version-picker.js -->
</ul>
</div>
</div>
</div>
<h1 class="menu-title">Synapse</h1>
<div class="right-buttons">
<a href="print.html" title="Print this book" aria-label="Print this book">
<i id="print-button" class="fa fa-print"></i>
</a>
<a href="https://github.com/matrix-org/synapse" title="Git repository" aria-label="Git repository">
<i id="git-repository-button" class="fa fa-github"></i>
</a>
<a href="https://github.com/matrix-org/synapse/edit/develop/docs/tcp_replication.md" title="Suggest an edit" aria-label="Suggest an edit">
<i id="git-edit-button" class="fa fa-edit"></i>
</a>
</div>
</div>
<div id="search-wrapper" class="hidden">
<form id="searchbar-outer" class="searchbar-outer">
<input type="search" id="searchbar" name="searchbar" placeholder="Search this book ..." aria-controls="searchresults-outer" aria-describedby="searchresults-header">
</form>
<div id="searchresults-outer" class="searchresults-outer hidden">
<div id="searchresults-header" class="searchresults-header"></div>
<ul id="searchresults">
</ul>
</div>
</div>
<!-- Apply ARIA attributes after the sidebar and the sidebar toggle button are added to the DOM -->
<script type="text/javascript">
document.getElementById('sidebar-toggle').setAttribute('aria-expanded', sidebar === 'visible');
document.getElementById('sidebar').setAttribute('aria-hidden', sidebar !== 'visible');
Array.from(document.querySelectorAll('#sidebar a')).forEach(function(link) {
link.setAttribute('tabIndex', sidebar === 'visible' ? 0 : -1);
});
</script>
<div id="content" class="content">
<main>
<!-- Page table of contents -->
<div class="sidetoc">
<nav class="pagetoc"></nav>
</div>
<h1 id="tcp-replication"><a class="header" href="#tcp-replication">TCP Replication</a></h1>
<h2 id="motivation"><a class="header" href="#motivation">Motivation</a></h2>
<p>Previously the workers used an HTTP long poll mechanism to get updates
from the master, which had the problem of causing a lot of duplicate
work on the server. This TCP protocol replaces those APIs with the aim
of increased efficiency.</p>
<h2 id="overview"><a class="header" href="#overview">Overview</a></h2>
<p>The protocol is based on fire and forget, line based commands. An
example flow would be (where '&gt;' indicates master to worker and
'&lt;' worker to master flows):</p>
<pre><code>&gt; SERVER example.com
&lt; REPLICATE
&gt; POSITION events master 53 53
&gt; RDATA events master 54 [&quot;$foo1:bar.com&quot;, ...]
&gt; RDATA events master 55 [&quot;$foo4:bar.com&quot;, ...]
</code></pre>
<p>The example shows the server accepting a new connection and sending its identity
with the <code>SERVER</code> command, followed by the client server to respond with the
position of all streams. The server then periodically sends <code>RDATA</code> commands
which have the format <code>RDATA &lt;stream_name&gt; &lt;instance_name&gt; &lt;token&gt; &lt;row&gt;</code>, where
the format of <code>&lt;row&gt;</code> is defined by the individual streams. The
<code>&lt;instance_name&gt;</code> is the name of the Synapse process that generated the data
(usually &quot;master&quot;). We expect an RDATA for every row in the DB.</p>
<p>Error reporting happens by either the client or server sending an ERROR
command, and usually the connection will be closed.</p>
<p>Since the protocol is a simple line based, its possible to manually
connect to the server using a tool like netcat. A few things should be
noted when manually using the protocol:</p>
<ul>
<li>The federation stream is only available if federation sending has
been disabled on the main process.</li>
<li>The server will only time connections out that have sent a <code>PING</code>
command. If a ping is sent then the connection will be closed if no
further commands are receieved within 15s. Both the client and
server protocol implementations will send an initial PING on
connection and ensure at least one command every 5s is sent (not
necessarily <code>PING</code>).</li>
<li><code>RDATA</code> commands <em>usually</em> include a numeric token, however if the
stream has multiple rows to replicate per token the server will send
multiple <code>RDATA</code> commands, with all but the last having a token of
<code>batch</code>. See the documentation on <code>commands.RdataCommand</code> for
further details.</li>
</ul>
<h2 id="architecture"><a class="header" href="#architecture">Architecture</a></h2>
<p>The basic structure of the protocol is line based, where the initial
word of each line specifies the command. The rest of the line is parsed
based on the command. For example, the RDATA command is defined as:</p>
<pre><code>RDATA &lt;stream_name&gt; &lt;instance_name&gt; &lt;token&gt; &lt;row_json&gt;
</code></pre>
<p>(Note that &lt;row_json&gt; may contains spaces, but cannot contain
newlines.)</p>
<p>Blank lines are ignored.</p>
<h3 id="keep-alives"><a class="header" href="#keep-alives">Keep alives</a></h3>
<p>Both sides are expected to send at least one command every 5s or so, and
should send a <code>PING</code> command if necessary. If either side do not receive
a command within e.g. 15s then the connection should be closed.</p>
<p>Because the server may be connected to manually using e.g. netcat, the
timeouts aren't enabled until an initial <code>PING</code> command is seen. Both
the client and server implementations below send a <code>PING</code> command
immediately on connection to ensure the timeouts are enabled.</p>
<p>This ensures that both sides can quickly realize if the tcp connection
has gone and handle the situation appropriately.</p>
<h3 id="start-up"><a class="header" href="#start-up">Start up</a></h3>
<p>When a new connection is made, the server:</p>
<ul>
<li>Sends a <code>SERVER</code> command, which includes the identity of the server,
allowing the client to detect if its connected to the expected
server</li>
<li>Sends a <code>PING</code> command as above, to enable the client to time out
connections promptly.</li>
</ul>
<p>The client:</p>
<ul>
<li>Sends a <code>NAME</code> command, allowing the server to associate a human
friendly name with the connection. This is optional.</li>
<li>Sends a <code>PING</code> as above</li>
<li>Sends a <code>REPLICATE</code> to get the current position of all streams.</li>
<li>On receipt of a <code>SERVER</code> command, checks that the server name
matches the expected server name.</li>
</ul>
<h3 id="error-handling"><a class="header" href="#error-handling">Error handling</a></h3>
<p>If either side detects an error it can send an <code>ERROR</code> command and close
the connection.</p>
<p>If the client side loses the connection to the server it should
reconnect, following the steps above.</p>
<h3 id="congestion"><a class="header" href="#congestion">Congestion</a></h3>
<p>If the server sends messages faster than the client can consume them the
server will first buffer a (fairly large) number of commands and then
disconnect the client. This ensures that we don't queue up an unbounded
number of commands in memory and gives us a potential opportunity to
squawk loudly. When/if the client recovers it can reconnect to the
server and ask for missed messages.</p>
<h3 id="reliability"><a class="header" href="#reliability">Reliability</a></h3>
<p>In general the replication stream should be considered an unreliable
transport since e.g. commands are not resent if the connection
disappears.</p>
<p>The exception to that are the replication streams, i.e. RDATA commands,
since these include tokens which can be used to restart the stream on
connection errors.</p>
<p>The client should keep track of the token in the last RDATA command
received for each stream so that on reconnection it can start streaming
from the correct place. Note: not all RDATA have valid tokens due to
batching. See <code>RdataCommand</code> for more details.</p>
<h3 id="example"><a class="header" href="#example">Example</a></h3>
<p>An example iteraction is shown below. Each line is prefixed with '&gt;'
or '&lt;' to indicate which side is sending, these are <em>not</em> included on
the wire:</p>
<pre><code>* connection established *
&gt; SERVER localhost:8823
&gt; PING 1490197665618
&lt; NAME synapse.app.appservice
&lt; PING 1490197665618
&lt; REPLICATE
&gt; POSITION events master 1 1
&gt; POSITION backfill master 1 1
&gt; POSITION caches master 1 1
&gt; RDATA caches master 2 [&quot;get_user_by_id&quot;,[&quot;@01register-user:localhost:8823&quot;],1490197670513]
&gt; RDATA events master 14 [&quot;$149019767112vOHxz:localhost:8823&quot;,
&quot;!AFDCvgApUmpdfVjIXm:localhost:8823&quot;,&quot;m.room.guest_access&quot;,&quot;&quot;,null]
&lt; PING 1490197675618
&gt; ERROR server stopping
* connection closed by server *
</code></pre>
<p>The <code>POSITION</code> command sent by the server is used to set the clients
position without needing to send data with the <code>RDATA</code> command.</p>
<p>An example of a batched set of <code>RDATA</code> is:</p>
<pre><code>&gt; RDATA caches master batch [&quot;get_user_by_id&quot;,[&quot;@test:localhost:8823&quot;],1490197670513]
&gt; RDATA caches master batch [&quot;get_user_by_id&quot;,[&quot;@test2:localhost:8823&quot;],1490197670513]
&gt; RDATA caches master batch [&quot;get_user_by_id&quot;,[&quot;@test3:localhost:8823&quot;],1490197670513]
&gt; RDATA caches master 54 [&quot;get_user_by_id&quot;,[&quot;@test4:localhost:8823&quot;],1490197670513]
</code></pre>
<p>In this case the client shouldn't advance their caches token until it
sees the the last <code>RDATA</code>.</p>
<h3 id="list-of-commands"><a class="header" href="#list-of-commands">List of commands</a></h3>
<p>The list of valid commands, with which side can send it: server (S) or
client (C):</p>
<h4 id="server-s"><a class="header" href="#server-s">SERVER (S)</a></h4>
<p>Sent at the start to identify which server the client is talking to</p>
<h4 id="rdata-s"><a class="header" href="#rdata-s">RDATA (S)</a></h4>
<p>A single update in a stream</p>
<h4 id="position-s"><a class="header" href="#position-s">POSITION (S)</a></h4>
<p>On receipt of a POSITION command clients should check if they have missed any
updates, and if so then fetch them out of band. Sent in response to a
REPLICATE command (but can happen at any time).</p>
<p>The POSITION command includes the source of the stream. Currently all streams
are written by a single process (usually &quot;master&quot;). If fetching missing
updates via HTTP API, rather than via the DB, then processes should make the
request to the appropriate process.</p>
<p>Two positions are included, the &quot;new&quot; position and the last position sent respectively.
This allows servers to tell instances that the positions have advanced but no
data has been written, without clients needlessly checking to see if they
have missed any updates. Instances will only fetch stuff if there is a gap between
their current position and the given last position.</p>
<h4 id="error-s-c"><a class="header" href="#error-s-c">ERROR (S, C)</a></h4>
<p>There was an error</p>
<h4 id="ping-s-c"><a class="header" href="#ping-s-c">PING (S, C)</a></h4>
<p>Sent periodically to ensure the connection is still alive</p>
<h4 id="name-c"><a class="header" href="#name-c">NAME (C)</a></h4>
<p>Sent at the start by client to inform the server who they are</p>
<h4 id="replicate-c"><a class="header" href="#replicate-c">REPLICATE (C)</a></h4>
<p>Asks the server for the current position of all streams.</p>
<h4 id="user_sync-c"><a class="header" href="#user_sync-c">USER_SYNC (C)</a></h4>
<p>A user has started or stopped syncing on this process.</p>
<h4 id="clear_user_sync-c"><a class="header" href="#clear_user_sync-c">CLEAR_USER_SYNC (C)</a></h4>
<p>The server should clear all associated user sync data from the worker.</p>
<p>This is used when a worker is shutting down.</p>
<h4 id="federation_ack-c"><a class="header" href="#federation_ack-c">FEDERATION_ACK (C)</a></h4>
<p>Acknowledge receipt of some federation data</p>
<h3 id="remote_server_up-s-c"><a class="header" href="#remote_server_up-s-c">REMOTE_SERVER_UP (S, C)</a></h3>
<p>Inform other processes that a remote server may have come back online.</p>
<p>See <code>synapse/replication/tcp/commands.py</code> for a detailed description and
the format of each command.</p>
<h3 id="cache-invalidation-stream"><a class="header" href="#cache-invalidation-stream">Cache Invalidation Stream</a></h3>
<p>The cache invalidation stream is used to inform workers when they need
to invalidate any of their caches in the data store. This is done by
streaming all cache invalidations done on master down to the workers,
assuming that any caches on the workers also exist on the master.</p>
<p>Each individual cache invalidation results in a row being sent down
replication, which includes the cache name (the name of the function)
and they key to invalidate. For example:</p>
<pre><code>&gt; RDATA caches master 550953771 [&quot;get_user_by_id&quot;, [&quot;@bob:example.com&quot;], 1550574873251]
</code></pre>
<p>Alternatively, an entire cache can be invalidated by sending down a <code>null</code>
instead of the key. For example:</p>
<pre><code>&gt; RDATA caches master 550953772 [&quot;get_user_by_id&quot;, null, 1550574873252]
</code></pre>
<p>However, there are times when a number of caches need to be invalidated
at the same time with the same key. To reduce traffic we batch those
invalidations into a single poke by defining a special cache name that
workers understand to mean to expand to invalidate the correct caches.</p>
<p>Currently the special cache names are declared in
<code>synapse/storage/_base.py</code> and are:</p>
<ol>
<li><code>cs_cache_fake</code> ─ invalidates caches that depend on the current
state</li>
</ol>
</main>
<nav class="nav-wrapper" aria-label="Page navigation">
<!-- Mobile navigation buttons -->
<a rel="prev" href="development/synapse_architecture/streams.html" class="mobile-nav-chapters previous" title="Previous chapter" aria-label="Previous chapter" aria-keyshortcuts="Left">
<i class="fa fa-angle-left"></i>
</a>
<a rel="next" href="development/synapse_architecture/faster_joins.html" class="mobile-nav-chapters next" title="Next chapter" aria-label="Next chapter" aria-keyshortcuts="Right">
<i class="fa fa-angle-right"></i>
</a>
<div style="clear: both"></div>
</nav>
</div>
</div>
<nav class="nav-wide-wrapper" aria-label="Page navigation">
<a rel="prev" href="development/synapse_architecture/streams.html" class="nav-chapters previous" title="Previous chapter" aria-label="Previous chapter" aria-keyshortcuts="Left">
<i class="fa fa-angle-left"></i>
</a>
<a rel="next" href="development/synapse_architecture/faster_joins.html" class="nav-chapters next" title="Next chapter" aria-label="Next chapter" aria-keyshortcuts="Right">
<i class="fa fa-angle-right"></i>
</a>
</nav>
</div>
<script type="text/javascript">
window.playground_copyable = true;
</script>
<script src="elasticlunr.min.js" type="text/javascript" charset="utf-8"></script>
<script src="mark.min.js" type="text/javascript" charset="utf-8"></script>
<script src="searcher.js" type="text/javascript" charset="utf-8"></script>
<script src="clipboard.min.js" type="text/javascript" charset="utf-8"></script>
<script src="highlight.js" type="text/javascript" charset="utf-8"></script>
<script src="book.js" type="text/javascript" charset="utf-8"></script>
<!-- Custom JS scripts -->
<script type="text/javascript" src="docs/website_files/table-of-contents.js"></script>
<script type="text/javascript" src="docs/website_files/version-picker.js"></script>
<script type="text/javascript" src="docs/website_files/version.js"></script>
</body>
</html>