Merge pull request #2858 from matrix-org/rav/purge_updates

delete_local_events for purge_room_history
This commit is contained in:
Richard van der Hoff 2018-02-09 14:11:00 +00:00 committed by GitHub
commit 10b34dbb9a
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 100 additions and 41 deletions

View file

@ -4,8 +4,6 @@ Purge History API
The purge history API allows server admins to purge historic events from their The purge history API allows server admins to purge historic events from their
database, reclaiming disk space. database, reclaiming disk space.
**NB!** This will not delete local events (locally sent messages content etc) from the database, but will remove lots of the metadata about them and does dramatically reduce the on disk space usage
Depending on the amount of history being purged a call to the API may take Depending on the amount of history being purged a call to the API may take
several minutes or longer. During this period users will not be able to several minutes or longer. During this period users will not be able to
paginate further back in the room from the point being purged from. paginate further back in the room from the point being purged from.
@ -15,3 +13,15 @@ The API is simply:
``POST /_matrix/client/r0/admin/purge_history/<room_id>/<event_id>`` ``POST /_matrix/client/r0/admin/purge_history/<room_id>/<event_id>``
including an ``access_token`` of a server admin. including an ``access_token`` of a server admin.
By default, events sent by local users are not deleted, as they may represent
the only copies of this content in existence. (Events sent by remote users are
deleted, and room state data before the cutoff is always removed).
To delete local events as well, set ``delete_local_events`` in the body:
.. code:: json
{
"delete_local_events": True,
}

View file

@ -51,7 +51,7 @@ class MessageHandler(BaseHandler):
self.pagination_lock = ReadWriteLock() self.pagination_lock = ReadWriteLock()
@defer.inlineCallbacks @defer.inlineCallbacks
def purge_history(self, room_id, event_id): def purge_history(self, room_id, event_id, delete_local_events=False):
event = yield self.store.get_event(event_id) event = yield self.store.get_event(event_id)
if event.room_id != room_id: if event.room_id != room_id:
@ -60,7 +60,7 @@ class MessageHandler(BaseHandler):
depth = event.depth depth = event.depth
with (yield self.pagination_lock.write(room_id)): with (yield self.pagination_lock.write(room_id)):
yield self.store.delete_old_state(room_id, depth) yield self.store.purge_history(room_id, depth, delete_local_events)
@defer.inlineCallbacks @defer.inlineCallbacks
def get_messages(self, requester, room_id=None, pagin_config=None, def get_messages(self, requester, room_id=None, pagin_config=None,

View file

@ -148,11 +148,13 @@ def parse_string_from_args(args, name, default=None, required=False,
return default return default
def parse_json_value_from_request(request): def parse_json_value_from_request(request, allow_empty_body=False):
"""Parse a JSON value from the body of a twisted HTTP request. """Parse a JSON value from the body of a twisted HTTP request.
Args: Args:
request: the twisted HTTP request. request: the twisted HTTP request.
allow_empty_body (bool): if True, an empty body will be accepted and
turned into None
Returns: Returns:
The JSON value. The JSON value.
@ -165,6 +167,9 @@ def parse_json_value_from_request(request):
except Exception: except Exception:
raise SynapseError(400, "Error reading JSON content.") raise SynapseError(400, "Error reading JSON content.")
if not content_bytes and allow_empty_body:
return None
try: try:
content = simplejson.loads(content_bytes) content = simplejson.loads(content_bytes)
except Exception as e: except Exception as e:
@ -174,17 +179,24 @@ def parse_json_value_from_request(request):
return content return content
def parse_json_object_from_request(request): def parse_json_object_from_request(request, allow_empty_body=False):
"""Parse a JSON object from the body of a twisted HTTP request. """Parse a JSON object from the body of a twisted HTTP request.
Args: Args:
request: the twisted HTTP request. request: the twisted HTTP request.
allow_empty_body (bool): if True, an empty body will be accepted and
turned into an empty dict.
Raises: Raises:
SynapseError if the request body couldn't be decoded as JSON or SynapseError if the request body couldn't be decoded as JSON or
if it wasn't a JSON object. if it wasn't a JSON object.
""" """
content = parse_json_value_from_request(request) content = parse_json_value_from_request(
request, allow_empty_body=allow_empty_body,
)
if allow_empty_body and content is None:
return {}
if type(content) != dict: if type(content) != dict:
message = "Content must be a JSON object." message = "Content must be a JSON object."

View file

@ -129,7 +129,16 @@ class PurgeHistoryRestServlet(ClientV1RestServlet):
if not is_admin: if not is_admin:
raise AuthError(403, "You are not a server admin") raise AuthError(403, "You are not a server admin")
yield self.handlers.message_handler.purge_history(room_id, event_id) body = parse_json_object_from_request(request, allow_empty_body=True)
delete_local_events = bool(
body.get("delete_local_history", False)
)
yield self.handlers.message_handler.purge_history(
room_id, event_id,
delete_local_events=delete_local_events,
)
defer.returnValue((200, {})) defer.returnValue((200, {}))

View file

@ -2063,16 +2063,32 @@ class EventsStore(SQLBaseStore):
) )
return self.runInteraction("get_all_new_events", get_all_new_events_txn) return self.runInteraction("get_all_new_events", get_all_new_events_txn)
def delete_old_state(self, room_id, topological_ordering): def purge_history(
return self.runInteraction( self, room_id, topological_ordering, delete_local_events,
"delete_old_state", ):
self._delete_old_state_txn, room_id, topological_ordering """Deletes room history before a certain point
)
def _delete_old_state_txn(self, txn, room_id, topological_ordering): Args:
"""Deletes old room state room_id (str):
topological_ordering (int):
minimum topo ordering to preserve
delete_local_events (bool):
if True, we will delete local events as well as remote ones
(instead of just marking them as outliers and deleting their
state groups).
""" """
return self.runInteraction(
"purge_history",
self._purge_history_txn, room_id, topological_ordering,
delete_local_events,
)
def _purge_history_txn(
self, txn, room_id, topological_ordering, delete_local_events,
):
# Tables that should be pruned: # Tables that should be pruned:
# event_auth # event_auth
# event_backward_extremities # event_backward_extremities
@ -2113,7 +2129,7 @@ class EventsStore(SQLBaseStore):
400, "topological_ordering is greater than forward extremeties" 400, "topological_ordering is greater than forward extremeties"
) )
logger.debug("[purge] looking for events to delete") logger.info("[purge] looking for events to delete")
txn.execute( txn.execute(
"SELECT event_id, state_key FROM events" "SELECT event_id, state_key FROM events"
@ -2125,16 +2141,16 @@ class EventsStore(SQLBaseStore):
to_delete = [ to_delete = [
(event_id,) for event_id, state_key in event_rows (event_id,) for event_id, state_key in event_rows
if state_key is None and not self.hs.is_mine_id(event_id) if state_key is None and (
delete_local_events or not self.hs.is_mine_id(event_id)
)
] ]
logger.info( logger.info(
"[purge] found %i events before cutoff, of which %i are remote" "[purge] found %i events before cutoff, of which %i can be deleted",
" non-state events to delete", len(event_rows), len(to_delete)) len(event_rows), len(to_delete),
)
for event_id, state_key in event_rows: logger.info("[purge] Finding new backward extremities")
txn.call_after(self._get_state_group_for_event.invalidate, (event_id,))
logger.debug("[purge] Finding new backward extremities")
# We calculate the new entries for the backward extremeties by finding # We calculate the new entries for the backward extremeties by finding
# all events that point to events that are to be purged # all events that point to events that are to be purged
@ -2148,7 +2164,7 @@ class EventsStore(SQLBaseStore):
) )
new_backwards_extrems = txn.fetchall() new_backwards_extrems = txn.fetchall()
logger.debug("[purge] replacing backward extremities: %r", new_backwards_extrems) logger.info("[purge] replacing backward extremities: %r", new_backwards_extrems)
txn.execute( txn.execute(
"DELETE FROM event_backward_extremities WHERE room_id = ?", "DELETE FROM event_backward_extremities WHERE room_id = ?",
@ -2164,7 +2180,7 @@ class EventsStore(SQLBaseStore):
] ]
) )
logger.debug("[purge] finding redundant state groups") logger.info("[purge] finding redundant state groups")
# Get all state groups that are only referenced by events that are # Get all state groups that are only referenced by events that are
# to be deleted. # to be deleted.
@ -2181,14 +2197,14 @@ class EventsStore(SQLBaseStore):
) )
state_rows = txn.fetchall() state_rows = txn.fetchall()
logger.debug("[purge] found %i redundant state groups", len(state_rows)) logger.info("[purge] found %i redundant state groups", len(state_rows))
# make a set of the redundant state groups, so that we can look them up # make a set of the redundant state groups, so that we can look them up
# efficiently # efficiently
state_groups_to_delete = set([sg for sg, in state_rows]) state_groups_to_delete = set([sg for sg, in state_rows])
# Now we get all the state groups that rely on these state groups # Now we get all the state groups that rely on these state groups
logger.debug("[purge] finding state groups which depend on redundant" logger.info("[purge] finding state groups which depend on redundant"
" state groups") " state groups")
remaining_state_groups = [] remaining_state_groups = []
for i in xrange(0, len(state_rows), 100): for i in xrange(0, len(state_rows), 100):
@ -2214,7 +2230,7 @@ class EventsStore(SQLBaseStore):
# Now we turn the state groups that reference to-be-deleted state # Now we turn the state groups that reference to-be-deleted state
# groups to non delta versions. # groups to non delta versions.
for sg in remaining_state_groups: for sg in remaining_state_groups:
logger.debug("[purge] de-delta-ing remaining state group %s", sg) logger.info("[purge] de-delta-ing remaining state group %s", sg)
curr_state = self._get_state_groups_from_groups_txn( curr_state = self._get_state_groups_from_groups_txn(
txn, [sg], types=None txn, [sg], types=None
) )
@ -2251,7 +2267,7 @@ class EventsStore(SQLBaseStore):
], ],
) )
logger.debug("[purge] removing redundant state groups") logger.info("[purge] removing redundant state groups")
txn.executemany( txn.executemany(
"DELETE FROM state_groups_state WHERE state_group = ?", "DELETE FROM state_groups_state WHERE state_group = ?",
state_rows state_rows
@ -2261,18 +2277,15 @@ class EventsStore(SQLBaseStore):
state_rows state_rows
) )
# Delete all non-state logger.info("[purge] removing events from event_to_state_groups")
logger.debug("[purge] removing events from event_to_state_groups")
txn.executemany( txn.executemany(
"DELETE FROM event_to_state_groups WHERE event_id = ?", "DELETE FROM event_to_state_groups WHERE event_id = ?",
[(event_id,) for event_id, _ in event_rows] [(event_id,) for event_id, _ in event_rows]
) )
for event_id, _ in event_rows:
logger.debug("[purge] updating room_depth") txn.call_after(self._get_state_group_for_event.invalidate, (
txn.execute( event_id,
"UPDATE room_depth SET min_depth = ? WHERE room_id = ?", ))
(topological_ordering, room_id,)
)
# Delete all remote non-state events # Delete all remote non-state events
for table in ( for table in (
@ -2290,7 +2303,8 @@ class EventsStore(SQLBaseStore):
"event_signatures", "event_signatures",
"rejections", "rejections",
): ):
logger.debug("[purge] removing remote non-state events from %s", table) logger.info("[purge] removing remote non-state events from %s",
table)
txn.executemany( txn.executemany(
"DELETE FROM %s WHERE event_id = ?" % (table,), "DELETE FROM %s WHERE event_id = ?" % (table,),
@ -2298,16 +2312,30 @@ class EventsStore(SQLBaseStore):
) )
# Mark all state and own events as outliers # Mark all state and own events as outliers
logger.debug("[purge] marking remaining events as outliers") logger.info("[purge] marking remaining events as outliers")
txn.executemany( txn.executemany(
"UPDATE events SET outlier = ?" "UPDATE events SET outlier = ?"
" WHERE event_id = ?", " WHERE event_id = ?",
[ [
(True, event_id,) for event_id, state_key in event_rows (True, event_id,) for event_id, state_key in event_rows
if state_key is not None or self.hs.is_mine_id(event_id) if state_key is not None or (
not delete_local_events and self.hs.is_mine_id(event_id)
)
] ]
) )
# synapse tries to take out an exclusive lock on room_depth whenever it
# persists events (because upsert), and once we run this update, we
# will block that for the rest of our transaction.
#
# So, let's stick it at the end so that we don't block event
# persistence.
logger.info("[purge] updating room_depth")
txn.execute(
"UPDATE room_depth SET min_depth = ? WHERE room_id = ?",
(topological_ordering, room_id,)
)
logger.info("[purge] done") logger.info("[purge] done")
@defer.inlineCallbacks @defer.inlineCallbacks