From 86dd403933d6cdde5d65c32e540b84d7822666df Mon Sep 17 00:00:00 2001
From: Jamie Pine <ijamespine@me.com>
Date: Sun, 22 May 2022 20:42:41 -0700
Subject: [PATCH] improved sync schema

---
 core/prisma/schema.prisma                  | 13 +++++++--
 docs/architecture/database.md              | 13 +++++++++
 docs/architecture/distributed-data-sync.md | 34 +++++++---------------
 docs/architecture/virtual-filesystem.md    | 16 ++++------
 4 files changed, 40 insertions(+), 36 deletions(-)
 create mode 100644 docs/architecture/database.md
diff --git a/core/prisma/schema.prisma b/core/prisma/schema.prisma
index ce3bdf006..ce07fec16 100644
--- a/core/prisma/schema.prisma
+++ b/core/prisma/schema.prisma
@@ -19,12 +19,19 @@ model Migration {
 }
 
 model SyncEvent {
-    id        Int    @id @default(autoincrement())
+    id        Int     @id @default(autoincrement())
     node_id   Int
     timestamp String
-    data      String
-    node      Node   @relation(fields: [node_id], references: [id])
+    // individual record pub id OR compound many-to-many pub ids
+    record_id String
+    // the type of operation, I.E: CREATE, UPDATE, DELETE as an enum
+    type      Int
+    // the column name for atomic update operations
+    column    String?
+    // the new value for create/update operations, msgpack encoded
+    value     String
 
+    node Node @relation(fields: [node_id], references: [id])
     @@map("sync_events")
 }
 
diff --git a/docs/architecture/database.md b/docs/architecture/database.md
new file mode 100644
index 000000000..0533d3a63
--- /dev/null
+++ b/docs/architecture/database.md
@@ -0,0 +1,13 @@
+## Database backup
+
+
+
+## Database migrations
+
+Currently migrations are applied on app launch with no visual feedback, backup or error handling.
+
+It doesn't appear that migrations are applied succesfully 
+
+
+
+## 
\ No newline at end of file
diff --git a/docs/architecture/distributed-data-sync.md b/docs/architecture/distributed-data-sync.md
index 34d4c565f..db35b07d2 100644
--- a/docs/architecture/distributed-data-sync.md
+++ b/docs/architecture/distributed-data-sync.md
@@ -5,20 +5,8 @@ Synchronizing data between clients in a Spacedrive network is accomplished using
 Designed for synchronizing data in realtime between [SQLite](https://www.sqlite.org/) databases potentially in the gigabytes.
 
 ```rust
-mod sync {
-  struct SyncEngine {
-    pending: Vec<SyncEvent>,     // events waiting to be sent
-  }
-
-  struct SyncEvent {
-    client_uuid: String,         // client that created change
-    timestamp: uhlc::Timestamp,  // unique hybrid logical clock timestamp
-    resource: SyncResource,      // the CRDT resource
-    transport: SyncTransport,    // method of data transport (msg or binary)
-  }
-
-  // we can now impl specfic CRDT traits to given resources
-  enum SyncResource {
+// we can now impl specfic CRDT traits to given resources
+enum SyncResource {
     FilePath(dyn Replicate),
     File(dyn PropertyOperation),
     Tag(dyn PropertyOperation),
@@ -46,32 +34,32 @@ Data is divided into several kinds, Shared and Owned.
 
 \*_Shared data doesn't always use this method, in some cases we can create shared resources in bulk, where conflicts are handled by simply merging. More on that in [Synchronization Strategy]()_.
 
-## Client Pool
+## Node Pool
 
-The client pool maintains record of all clients in your network.
+The node pool maintains record of all nodes in your network.
 
 An exact replica of the client pool is synchronized on each client. When a given client has a state change, it will notify every other client in the pool via the `connection` struct.
 
 The `ClientConnection` is maintained in memory and is established on startup.
 
 ```rust
-struct ClientPool {
+struct NodePool {
   clients: Vec<Client>
 }
 
-struct Client {
+struct Node {
   uuid: String,
   last_seen: DateTime<Utc>,
   last_synchronized: DateTime<Utc>,
-  connection: Option<ClientConnection>
+  connection: Option<NodeConnection>
 }
 ```
 
-Clients will ping-pong to ensure their connection stays alive, this logic is contained within the `ClientConnection` instance.
+Nodes will ping-pong to ensure their connection stays alive, this logic is contained within the `NodeConnection` instance.
 
-**Handling stale clients**
+**Handling stale nodes**
 
-If a client has not been seen in X amount of time, other clients will not persist pending operations for them. Clients take care of flushing the pending operation queue once all non-stale clients have received the pending operations.
+If a node has not been seen in X amount of time, other nodes will not persist pending operations for them. Nodes take care of flushing the pending operation queue once all non-stale nodes have received the pending operations.
 
 ## Clock
 
@@ -93,7 +81,7 @@ This allows us to entirely avoid the need to synchronize time between clients, a
 
 Sync happens in the following order:
 
-Owned data → Bulk shared data → Shared data → Relational data
+Owned data → Bulk shared data → Shared data 
 
 ### Types of CRDT:
 
diff --git a/docs/architecture/virtual-filesystem.md b/docs/architecture/virtual-filesystem.md
index e109bef11..ef9afc4f6 100644
--- a/docs/architecture/virtual-filesystem.md
+++ b/docs/architecture/virtual-filesystem.md
@@ -9,38 +9,34 @@ Represents a unique file across the virtual filesystem, all Spacedrive metadata
 ```rust
 struct File {
   id: i32,
-  partial_checksum: str,
-  checksum: Option<str>,
-
+  cas_id: str,
+  integrity_checksum: Option<str>,
   kind: FileKind,
-
   hidden: bool,
   favorite: bool,
   has_thumbnail: bool,
   has_thumbstrip: bool,
   has_video_preview: bool,
-  encryption: EncryptionAlgorithm,
+  key: Key,
   ipfs_id: Option<str>,
-
-  file_paths: Vec<FilePath>,
+  paths: Vec<FilePath>,
   tags: Vec<Tag>,
   labels: Vec<Label>,
   comments: Vec<Comment>,
   albums: Vec<Album>,
   media_data: Option<MediaData>,
-
   date_created: DateTime<Utc>,
   date_modified: DateTime<Utc>,
 }
 ```
 
-- `partial_checksum ` - A SHA256 checksum generated from 5 samples of 10,000 bytes throughout the file data, including the beginning and end + total byte count. This is used to identify a file as _likely_ unique in under 100µs.
+- `cas_id ` - A SHA256 checksum generated from 5 samples of 10,000 bytes throughout the file data, including the beginning and end + total byte count. This is used to identify a file as _likely_ unique in under 100µs.
 
 > ~~It is impossible to have a unique constraint at a database level for the `partial_checksum` however we can asynchronously resolve conflicts by querying for duplicates and generating full checksums at a later date.~~
 >
 > For synchronization of this resource we can tolerate temporary duplicates, any client can calculate that two files resources are duplicate and merge them into a single resource. In turn, triggering a shared data merge operation, whereby the older record is prioritised at a property level during the merge.
 
-- `checksum` - A full SHA256 checksum of the file data used to verify uniqueness should a `partial_checksum` conflict occur.
+- `integrity_checksum` - A full SHA256 checksum of the file data used to verify uniqueness should a `cas_id` conflict occur.
 
 ### FilePath — `Owned data`