Improve documentation and comments

This commit is contained in:
Erik Johnston 2018-05-17 15:08:50 +01:00
parent 3369354b56
commit c771c124d5
3 changed files with 58 additions and 10 deletions

View file

@ -33,13 +33,18 @@ class ChunkDBOrderedListStore(OrderedListStore):
"""Used as the list store for room chunks, efficiently maintaining them in """Used as the list store for room chunks, efficiently maintaining them in
topological order on updates. topological order on updates.
A room chunk is a connected portion of the room events DAG. As such it
inherits a DAG, i.e. if an event in one chunk references an event in a
second chunk, then we say that the first chunk references the second, and
thus forming a DAG.
The class is designed for use inside transactions and so takes a The class is designed for use inside transactions and so takes a
transaction object in the constructor. This means that it needs to be transaction object in the constructor. This means that it needs to be
re-instantiated in each transaction, so all state needs to be stored re-instantiated in each transaction, so all state needs to be stored
in the database. in the database.
Internally the ordering is implemented using floats, and the average is Internally the ordering is implemented using floats, and the average is
taken when a node is inserted inbetween other nodes. To avoid presicion taken when a node is inserted between other nodes. To avoid precision
errors a minimum difference between sucessive orderings is attempted to be errors a minimum difference between sucessive orderings is attempted to be
kept; whenever the difference is too small we attempt to rebalance. See kept; whenever the difference is too small we attempt to rebalance. See
the `_rebalance` function for implementation details. the `_rebalance` function for implementation details.
@ -51,6 +56,10 @@ class ChunkDBOrderedListStore(OrderedListStore):
edge is from B to A. This ensures that newer chunks get inserted at the edge is from B to A. This ensures that newer chunks get inserted at the
end (rather than the start). end (rather than the start).
Note: Calls to `add_node` and `add_edge` cannot overlap for the same room,
and so callers should perform some form of per-room locking when using
this class.
Args: Args:
txn txn
room_id (str) room_id (str)
@ -59,7 +68,7 @@ class ChunkDBOrderedListStore(OrderedListStore):
in a range around the node, where the bounds are rounded to this in a range around the node, where the bounds are rounded to this
number of digits. number of digits.
min_difference (int): A rebalance is triggered when the difference min_difference (int): A rebalance is triggered when the difference
between two successive orderings are less than the reverse of between two successive orderings is less than the reciprocal of
this. this.
""" """
def __init__(self, def __init__(self,

View file

@ -16,8 +16,9 @@
"""This module contains an implementation of the Katriel-Bodlaender algorithm, """This module contains an implementation of the Katriel-Bodlaender algorithm,
which is used to do online topological ordering of graphs. which is used to do online topological ordering of graphs.
Note that the ordering derived from the graph has the first node one with no Note that the ordering derived from the graph is such that the source node of
incoming edges at the start, and the last node one with no outgoing edges. an edge comes before the target node of the edge, i.e. a graph of A -> B -> C
would produce the ordering [A, B, C].
This ordering is therefore opposite to what one might expect when considering This ordering is therefore opposite to what one might expect when considering
the room DAG, as newer messages would be added to the start rather than the the room DAG, as newer messages would be added to the start rather than the
@ -25,15 +26,46 @@ end.
***We therefore invert the direction of edges when using the algorithm*** ***We therefore invert the direction of edges when using the algorithm***
See https://www.sciencedirect.com/science/article/pii/S0304397507006573 See:
A tight analysis of the KatrielBodlaender algorithm for online topological
ordering
Hsiao-Fei Liua and Kun-Mao Chao
https://www.sciencedirect.com/science/article/pii/S0304397507006573
and:
Online Topological Ordering
Irit Katriel and Hans L. Bodlaender
http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.78.7933 )
""" """
from abc import ABCMeta, abstractmethod from abc import ABCMeta, abstractmethod
class OrderedListStore(object): class OrderedListStore(object):
"""An abstract base class that is used to store a topological ordering of """An abstract base class that is used to store a graph and maintain a
a graph. Suitable for use with the Katriel-Bodlaender algorithm. topological consistent, total ordering.
Internally this uses the Katriel-Bodlaender algorithm, which requires the
store expose an interface for the total ordering that supports:
- Insertion of the node into the ordering either immediately before or
after another node.
- Deletion of the node from the ordering
- Comparing the relative ordering of two arbitary nodes
- Get the node immediately before or after a given node in the ordering
It also needs to be able to interact with the graph in the following ways:
- Query the number of edges from a node in the graph
- Query the number of edges into a node in the graph
- Add an edge to the graph
Users of subclasses should call `add_node` and `add_edge` whenever editing
the graph. The total ordering exposed will remain constant until the next
call to one of these methods.
Note: Calls to `add_node` and `add_edge` cannot overlap, and so callers
should perform some form of locking.
""" """
__metaclass__ = ABCMeta __metaclass__ = ABCMeta
@ -53,7 +85,8 @@ class OrderedListStore(object):
@abstractmethod @abstractmethod
def get_prev(self, node_id): def get_prev(self, node_id):
"""Gets the node immediately before the given node """Gets the node immediately before the given node in the topological
ordering.
Args: Args:
node_id (str) node_id (str)
@ -65,7 +98,8 @@ class OrderedListStore(object):
@abstractmethod @abstractmethod
def get_next(self, node_id): def get_next(self, node_id):
"""Gets the node immediately after the given node """Gets the node immediately after the given node in the topological
ordering.
Args: Args:
node_id (str) node_id (str)
@ -125,7 +159,8 @@ class OrderedListStore(object):
list[tuple[float, str]]: Returns a list of tuple of an ordering list[tuple[float, str]]: Returns a list of tuple of an ordering
term and the node ID. The ordering term can be used to sort the term and the node ID. The ordering term can be used to sort the
returned list. returned list.
The ordering is valid until subsequent calls to insert_* functions The ordering is valid until subsequent calls to `add_edge`
functions
""" """
pass pass

View file

@ -23,6 +23,10 @@ from synapse.storage.chunk_ordered_table import ChunkDBOrderedListStore
class ChunkLinearizerStoreTestCase(tests.unittest.TestCase): class ChunkLinearizerStoreTestCase(tests.unittest.TestCase):
"""Tests to ensure that the ordering and rebalancing functions of
ChunkDBOrderedListStore work as expected.
"""
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
super(ChunkLinearizerStoreTestCase, self).__init__(*args, **kwargs) super(ChunkLinearizerStoreTestCase, self).__init__(*args, **kwargs)