diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml index ec071c8aa..3582b6290 100644 --- a/.github/FUNDING.yml +++ b/.github/FUNDING.yml @@ -1 +1 @@ -open_collective: spacedrive \ No newline at end of file +open_collective: spacedrive diff --git a/.github/actions/build-and-publish-server/action.yml b/.github/actions/build-and-publish-server/action.yml index 225d268f7..efbb4ee93 100644 --- a/.github/actions/build-and-publish-server/action.yml +++ b/.github/actions/build-and-publish-server/action.yml @@ -13,13 +13,13 @@ runs: registry: ghcr.io username: ${{ github.repository_owner }} password: ${{ inputs.gh_token }} - + - name: Build Server shell: bash run: | cargo build --release -p server cp ./target/release/server ./apps/server/server - + - name: Determine image name & tag shell: bash run: | @@ -46,10 +46,10 @@ runs: run: | docker tag $IMAGE_NAME:$IMAGE_TAG $IMAGE_NAME:staging docker push $IMAGE_NAME:staging - + - name: Tag & push image as latest production image if: github.event_name == 'release' shell: bash run: | docker tag $IMAGE_NAME:$IMAGE_TAG $IMAGE_NAME:production - docker push $IMAGE_NAME:production \ No newline at end of file + docker push $IMAGE_NAME:production diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 8af60f121..3d53df5bf 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -3,8 +3,8 @@ on: pull_request: push: branches: - - main - - ci + - main + - ci workflow_dispatch: jobs: @@ -18,115 +18,115 @@ jobs: RUST_CACHE_VERSION: 0 runs-on: ${{ matrix.platform }} steps: - - uses: actions/checkout@v2 - - # from https://github.com/zmwangx/rust-ffmpeg/blob/master/.github/workflows/build.yml - - name: Install ffmpeg (Windows) - if: matrix.platform == 'windows-latest' - run: | - $VCINSTALLDIR = $(& "${env:ProgramFiles(x86)}\Microsoft Visual Studio\Installer\vswhere.exe" -latest -property installationPath) - Add-Content $env:GITHUB_ENV "LIBCLANG_PATH=${VCINSTALLDIR}\VC\Tools\LLVM\x64\bin`n" - Invoke-WebRequest "${env:FFMPEG_DOWNLOAD_URL}" -OutFile ffmpeg-release-full-shared.7z - 7z x ffmpeg-release-full-shared.7z - mkdir ffmpeg - mv ffmpeg-*/* ffmpeg/ - Add-Content $env:GITHUB_ENV "FFMPEG_DIR=${pwd}\ffmpeg`n" - Add-Content $env:GITHUB_PATH "${pwd}\ffmpeg\bin`n" - - - name: Install CMake (Windows) - uses: lukka/get-cmake@latest - if: matrix.platform == 'windows-latest' - - # Optimisation for windows - - name: Rename existing rust toolchain (Windows) - if: matrix.platform == 'windows-latest' - run: Rename-Item C:\Users\runneradmin\.rustup\toolchains\stable-x86_64-pc-windows-msvc C:\Users\runneradmin\.rustup\toolchains\stable-x86_64-pc-windows-msvc.old - - - name: Setup Node - uses: actions/setup-node@v1 - with: - node-version: 16 - - - name: Install Rust stable - uses: actions-rs/toolchain@v1 - with: - toolchain: stable - profile: minimal - override: true - components: rustfmt, rust-src - - - name: Cache Rust Dependencies - uses: Swatinem/rust-cache@cb2cf0cc7c5198d3364b9630e2c3d457f160790c - with: - sharedKey: ${{ env.RUST_CACHE_VERSION }} - - - name: Cache pnpm dependencies - uses: actions/cache@v2 - with: - path: ~/.pnpm-store - key: ${{ runner.os }}-${{ hashFiles('**/pnpm-lock.yaml') }} - restore-keys: | - ${{ runner.os }}- + - uses: actions/checkout@v2 - - name: Install pnpm - uses: pnpm/action-setup@v2.2.1 - with: - version: 6.32.6 - - - name: Install dependencies (Ubuntu) - if: matrix.platform == 'ubuntu-latest' - run: | - sudo apt-get update - sudo apt-get install -y --no-install-recommends \ - libgtk-3-dev \ - webkit2gtk-4.0 \ - libappindicator3-dev \ - librsvg2-dev \ - patchelf \ - libssl-dev \ - libavcodec-dev \ - libavdevice-dev \ - libavfilter-dev \ - libavformat-dev \ - libavresample-dev \ - libavutil-dev \ - libswscale-dev \ - libswresample-dev \ - pkg-config \ - ffmpeg - - - name: Install dependencies (macOS) - if: matrix.platform == 'macos-latest' - run: | - brew install ffmpeg - - - name: Install pnpm dependencies - run: pnpm i - - - name: Build codegen - run: pnpm prep:ci - - - name: Build frontend - run: pnpm desktop build:vite - - - name: Build Tauri app - uses: tauri-apps/tauri-action@v0 - with: - projectPath: apps/desktop - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + # from https://github.com/zmwangx/rust-ffmpeg/blob/master/.github/workflows/build.yml + - name: Install ffmpeg (Windows) + if: matrix.platform == 'windows-latest' + run: | + $VCINSTALLDIR = $(& "${env:ProgramFiles(x86)}\Microsoft Visual Studio\Installer\vswhere.exe" -latest -property installationPath) + Add-Content $env:GITHUB_ENV "LIBCLANG_PATH=${VCINSTALLDIR}\VC\Tools\LLVM\x64\bin`n" + Invoke-WebRequest "${env:FFMPEG_DOWNLOAD_URL}" -OutFile ffmpeg-release-full-shared.7z + 7z x ffmpeg-release-full-shared.7z + mkdir ffmpeg + mv ffmpeg-*/* ffmpeg/ + Add-Content $env:GITHUB_ENV "FFMPEG_DIR=${pwd}\ffmpeg`n" + Add-Content $env:GITHUB_PATH "${pwd}\ffmpeg\bin`n" - - name: Build and publish server - if: matrix.platform == 'ubuntu-latest' - uses: ./.github/actions/build-and-publish-server - with: - gh_token: ${{ secrets.GITHUB_TOKEN }} - - - name: Deploy Spacedrive Server to Kubernetes - if: matrix.platform == 'ubuntu-latest' - env: - K8S_KUBECONFIG: ${{ secrets.K8S_KUBECONFIG }} - run: | - mkdir -p ~/.kube - echo "$K8S_KUBECONFIG" > ~/.kube/config 2>&1 - kubectl rollout restart deployment/sdserver-deployment + - name: Install CMake (Windows) + uses: lukka/get-cmake@latest + if: matrix.platform == 'windows-latest' + + # Optimisation for windows + - name: Rename existing rust toolchain (Windows) + if: matrix.platform == 'windows-latest' + run: Rename-Item C:\Users\runneradmin\.rustup\toolchains\stable-x86_64-pc-windows-msvc C:\Users\runneradmin\.rustup\toolchains\stable-x86_64-pc-windows-msvc.old + + - name: Setup Node + uses: actions/setup-node@v1 + with: + node-version: 16 + + - name: Install Rust stable + uses: actions-rs/toolchain@v1 + with: + toolchain: stable + profile: minimal + override: true + components: rustfmt, rust-src + + - name: Cache Rust Dependencies + uses: Swatinem/rust-cache@cb2cf0cc7c5198d3364b9630e2c3d457f160790c + with: + sharedKey: ${{ env.RUST_CACHE_VERSION }} + + - name: Cache pnpm dependencies + uses: actions/cache@v2 + with: + path: ~/.pnpm-store + key: ${{ runner.os }}-${{ hashFiles('**/pnpm-lock.yaml') }} + restore-keys: | + ${{ runner.os }}- + + - name: Install pnpm + uses: pnpm/action-setup@v2.2.1 + with: + version: 6.32.6 + + - name: Install dependencies (Ubuntu) + if: matrix.platform == 'ubuntu-latest' + run: | + sudo apt-get update + sudo apt-get install -y --no-install-recommends \ + libgtk-3-dev \ + webkit2gtk-4.0 \ + libappindicator3-dev \ + librsvg2-dev \ + patchelf \ + libssl-dev \ + libavcodec-dev \ + libavdevice-dev \ + libavfilter-dev \ + libavformat-dev \ + libavresample-dev \ + libavutil-dev \ + libswscale-dev \ + libswresample-dev \ + pkg-config \ + ffmpeg + + - name: Install dependencies (macOS) + if: matrix.platform == 'macos-latest' + run: | + brew install ffmpeg + + - name: Install pnpm dependencies + run: pnpm i + + - name: Build codegen + run: pnpm prep:ci + + - name: Build frontend + run: pnpm desktop build:vite + + - name: Build Tauri app + uses: tauri-apps/tauri-action@v0 + with: + projectPath: apps/desktop + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + + - name: Build and publish server + if: matrix.platform == 'ubuntu-latest' + uses: ./.github/actions/build-and-publish-server + with: + gh_token: ${{ secrets.GITHUB_TOKEN }} + + - name: Deploy Spacedrive Server to Kubernetes + if: matrix.platform == 'ubuntu-latest' + env: + K8S_KUBECONFIG: ${{ secrets.K8S_KUBECONFIG }} + run: | + mkdir -p ~/.kube + echo "$K8S_KUBECONFIG" > ~/.kube/config 2>&1 + kubectl rollout restart deployment/sdserver-deployment diff --git a/.github/workflows/org-readme.yml b/.github/workflows/org-readme.yml index b12ca9b0a..4ff9ff360 100644 --- a/.github/workflows/org-readme.yml +++ b/.github/workflows/org-readme.yml @@ -3,9 +3,9 @@ name: Update Org README on: push: branches: - - main + - main paths: - - README.md + - README.md workflow_dispatch: jobs: @@ -25,4 +25,4 @@ jobs: destination_folder: 'profile' user_email: 'actions@spacedrive.app' user_name: 'GH Actions' - commit_message: 'Update README' \ No newline at end of file + commit_message: 'Update README' diff --git a/README.md b/README.md index bd9f665af..09be79b15 100644 --- a/README.md +++ b/README.md @@ -57,37 +57,42 @@ For independent creatives, hoarders and those that want to own their digital foo

-> NOTE: Spacedrive is under active development, most of the listed features are still experimental and subject to change. Additionally, most of the links on this page are broken but will be working once the repository is made public. -> +> NOTE: Spacedrive is under active development, most of the listed features are still experimental and subject to change. Additionally, most of the links on this page are broken but will be working once the repository is made public. # What is a VDFS? -A VDFS (virtual distributed filesystem) is a filesystem designed to work atop a variety of storage layers. It is not restricted to a single machine, with a uniform API to manipulate and access content across many devices. It achieves this by maintaining a virtual index of all storage locations, synchronizing the database between clients in realtime. This implementation also uses [CAS](https://en.wikipedia.org/wiki/Content-addressable_storage) (Content-addressable storage) to uniquely identify files, while keeping record of logical file paths relative to the storage locations. -The first implementation of a VDFS can be found in this UC Berkeley [paper](https://www2.eecs.berkeley.edu/Pubs/TechRpts/2018/EECS-2018-29.pdf) by Haoyuan Li. This paper describes its use for cloud computing, however the underlying concepts can be translated to open consumer software. +A VDFS (virtual distributed filesystem) is a filesystem designed to work atop a variety of storage layers. It is not restricted to a single machine, with a uniform API to manipulate and access content across many devices. It achieves this by maintaining a virtual index of all storage locations, synchronizing the database between clients in realtime. This implementation also uses [CAS](https://en.wikipedia.org/wiki/Content-addressable_storage) (Content-addressable storage) to uniquely identify files, while keeping record of logical file paths relative to the storage locations. + +The first implementation of a VDFS can be found in this UC Berkeley [paper](https://www2.eecs.berkeley.edu/Pubs/TechRpts/2018/EECS-2018-29.pdf) by Haoyuan Li. This paper describes its use for cloud computing, however the underlying concepts can be translated to open consumer software. # Motivation + Many of us have multiple cloud accounts, drives that aren’t backed up and data at risk of loss. We depend on cloud services like Google Photos and iCloud, but are locked in with limited capacity and almost zero interoperability between services and operating systems. Photo albums shouldn’t be suck in a device ecosystem, or harvested for advertising data. They should be OS agnostic, permanent and personally owned. Data we create is our legacy, that will long outlive us—open source technology is the only way to ensure we retain absolute control over the data that defines our lives, at unlimited scale. - # Features + _Note: Links are for highlight purposes only until feature specific documentation is complete._ -**Complete:** *(in testing)* +**Complete:** _(in testing)_ + - **[File discovery](#features)** - Scan devices, drives and cloud accounts to build a directory of all files with metadata. - **[Preview generation](#features)** - Auto generate lower resolution stand-ins for image and video. - **[Statistics](#features)** - Total capacity, index size, preview media size, free space etc. - + **In progress:** -- **[File Explorer](#features)** - Browse online/offline storage locations, view files with metadata, perform basic CRUD. + +- **[File Explorer](#features)** - Browse online/offline storage locations, view files with metadata, perform basic CRUD. - **[Realtime synchronization](#features)** - Data index synchronized in realtime between devices, prioritizing peer-to-peer LAN connections (WiFi sync). - + **To be developed (MVP):** + - **[Photos](#features)** - Photo and video albums similar to Apple/Google photos. - **[Search](#features)** - Deep search into your filesystem with a keybind, including offline locations. - **[Tags](#features)** - Define routines on custom tags to automate workflows, easily tag files individually, in bulk and automatically via rules. - **[Extensions](#features)** - Build tools on top of Spacedrive, extend functionality and integrate third party services. Extension directory on [spacedrive.app/extensions](#features). - -**To be developed (Post-MVP):** + +**To be developed (Post-MVP):** + - **[Cloud integration](#features)** - Index & backup to Apple Photos, Google Drive, Dropbox, OneDrive & Mega + easy API for the community to add more. - **[Encrypted vault(s)](#features)** - Effortlessly manage & encrypt sensitive files, built on top of VeraCrypt. Encrypt individual files or create flexible-size vaults. - **[Key manager](#features)** - View, mount, dismount and hide keys. Mounted keys automatically unlock respective areas of your filesystem. @@ -99,48 +104,57 @@ _Note: Links are for highlight purposes only until feature specific documentatio - **[Self hosted](#features)** - Spacedrive can be deployed as a service, behaving as just another device powering your personal cloud. # Developer Installation Instructions + This environment uses [Cargo](https://doc.rust-lang.org/cargo/getting-started/installation.html) and [pnpm](https://pnpm.io/installation). Ensure you have them installed before continuing. - `$ git clone https://github.com/spacedriveapp/spacedrive` -- IMPORTANT: *Install [FFMPEG](https://www.ffmpeg.org/download.html) if you don't have it already* +- IMPORTANT: _Install [FFMPEG](https://www.ffmpeg.org/download.html) if you don't have it already_ - `$ cd spacedrive` - `$ pnpm i` - `$ pnpm prep` - Runs all necessary codegen & builds required dependencies. To quickly run only the desktop app after `prep` you can use: + - `$ pnpm desktop dev` To run the landing page + - `$ pnpm web dev` - runs the web app for the embed - `$ pnpm landing dev` If you are having issues ensure you are using the following versions of Rust and Node: + - Rust version: **1.58.1** - Node version: **17** # Architecture -This project is using what I'm calling the **"PRRTT"** stack (Prisma, Rust, React, TypeScript, Tauri). + +This project is using what I'm calling the **"PRRTT"** stack (Prisma, Rust, React, TypeScript, Tauri). + - Prisma on the front-end? 🤯 Made possible thanks to [prisma-client-rust](https://github.com/brendonovich/prisma-client-rust), developed by [Brendonovich](https://github.com/brendonovich). Gives us access to the powerful migration CLI in development, along with the Prisma syntax for our schema. The application bundles with the Prisma query engine and codegen for a beautiful Rust API. Our lightweight migration runner is custom built for a desktop app context. -- Tauri allows us to create a pure Rust native OS webview, without the overhead of your average Electron app. This brings the bundle size and average memory usage down dramatically. It also contributes to a more native feel, especially on macOS due to Safari's close integration with the OS. +- Tauri allows us to create a pure Rust native OS webview, without the overhead of your average Electron app. This brings the bundle size and average memory usage down dramatically. It also contributes to a more native feel, especially on macOS due to Safari's close integration with the OS. - The core (`sdcore`) is written in pure Rust. -## Monorepo structure: +## Monorepo structure: ### Apps: + - `desktop`: A [Tauri](https://tauri.studio) app. - `mobile`: A [React Native](https://reactnative.dev/) app. - `web`: A [React](https://reactjs.org) webapp. - `landing`: A [React](https://reactjs.org) app using Vite SSR & Vite pages. ### Core: + - `core`: The [Rust](#) core, referred to internally as `sdcore`. Contains filesystem, database and networking logic. Can be deployed in a variety of host applications. ### Packages: + - `client`: A [TypeScript](#) client library to handle dataflow via RPC between UI and the Rust core. -- `ui`: A [React]([#](https://reactjs.org)) Shared component library. +- `ui`: A [React](<[#](https://reactjs.org)>) Shared component library. - `interface`: The complete user interface in React (used by apps `desktop`, `web` and `landing`) - `config`: `eslint` configurations (includes `eslint-config-next`, `eslint-config-prettier` and all `tsconfig.json` configs used throughout the monorepo. - `macos`: A [Swift](#) Native binary for MacOS system extensions. - `ios`: A [Swift](#) Native binary (planned). - `windows`: A [C#](#) Native binary (planned). -- `android`: A [Kotlin](#) Native binary (planned). \ No newline at end of file +- `android`: A [Kotlin](#) Native binary (planned). diff --git a/apps/desktop/tsconfig.json b/apps/desktop/tsconfig.json index 0d4845c9c..df04b3c8c 100644 --- a/apps/desktop/tsconfig.json +++ b/apps/desktop/tsconfig.json @@ -19,8 +19,8 @@ "paths": { "@sd/interface": ["../../packages/interface/src/index.ts"], "@sd/ui": ["../../packages/ui/src/index.ts"], - "@sd/client": ["../../packages/client/src/index.ts"], + "@sd/client": ["../../packages/client/src/index.ts"] } }, - "include": ["src"], + "include": ["src"] } diff --git a/apps/landing/index.html b/apps/landing/index.html index 9c08edc92..651e6c629 100644 --- a/apps/landing/index.html +++ b/apps/landing/index.html @@ -5,8 +5,14 @@ Spacedrive — A file manager from the future. - - + + @@ -14,4 +20,4 @@
- \ No newline at end of file + diff --git a/apps/landing/src/style.scss b/apps/landing/src/style.scss index 6a4e65b4b..849ff7cc8 100644 --- a/apps/landing/src/style.scss +++ b/apps/landing/src/style.scss @@ -1,9 +1,8 @@ html { - background-color: black; - -ms-overflow-style: none; /* IE and Edge */ - scrollbar-width: none; /* Firefox */ - &::-webkit-scrollbar { - display: none; - } + background-color: black; + -ms-overflow-style: none; /* IE and Edge */ + scrollbar-width: none; /* Firefox */ + &::-webkit-scrollbar { + display: none; } - \ No newline at end of file +} diff --git a/apps/landing/tsconfig.json b/apps/landing/tsconfig.json index 8a12d6272..8be0a3e91 100644 --- a/apps/landing/tsconfig.json +++ b/apps/landing/tsconfig.json @@ -18,7 +18,7 @@ "paths": { "@sd/interface": ["../../packages/interface/src/index.ts"], "@sd/ui": ["../../packages/ui/src/index.ts"], - "@sd/client": ["../../packages/client/src/index.ts"], + "@sd/client": ["../../packages/client/src/index.ts"] } }, "ts-node": { @@ -27,6 +27,14 @@ "module": "CommonJS" } }, - "include": ["src", "env.d.ts", "src/vite-env.d.ts", "src/components", "src/pages", "renderer", "server"], + "include": [ + "src", + "env.d.ts", + "src/vite-env.d.ts", + "src/components", + "src/pages", + "renderer", + "server" + ], "references": [{ "path": "./tsconfig.node.json" }] } diff --git a/apps/mobile/package.json b/apps/mobile/package.json index 531d5bee4..84d6df566 100644 --- a/apps/mobile/package.json +++ b/apps/mobile/package.json @@ -1,7 +1,6 @@ { - "name": "mobile", - "version": "0.0.0", - "main": "index.js", - "license": "MIT" - } - \ No newline at end of file + "name": "mobile", + "version": "0.0.0", + "main": "index.js", + "license": "MIT" +} diff --git a/apps/server/k8s/infrastructure.yaml b/apps/server/k8s/infrastructure.yaml index f5a100ab1..a5e44b4ee 100644 --- a/apps/server/k8s/infrastructure.yaml +++ b/apps/server/k8s/infrastructure.yaml @@ -23,9 +23,9 @@ metadata: name: spacedrive-ns-full namespace: spacedrive rules: -- apiGroups: ["apps"] - resources: ["deployments"] - verbs: ["get", "patch"] + - apiGroups: ['apps'] + resources: ['deployments'] + verbs: ['get', 'patch'] --- apiVersion: rbac.authorization.k8s.io/v1 kind: RoleBinding @@ -33,10 +33,10 @@ metadata: name: spacedrive-ci-rb namespace: spacedrive subjects: -- kind: ServiceAccount - name: spacedrive-ci - namespace: spacedrive + - kind: ServiceAccount + name: spacedrive-ci + namespace: spacedrive roleRef: apiGroup: rbac.authorization.k8s.io kind: Role - name: spacedrive-ns-full \ No newline at end of file + name: spacedrive-ns-full diff --git a/apps/server/k8s/sdserver.yaml b/apps/server/k8s/sdserver.yaml index d00d70a18..00f02c1c1 100644 --- a/apps/server/k8s/sdserver.yaml +++ b/apps/server/k8s/sdserver.yaml @@ -13,16 +13,16 @@ metadata: traefik.ingress.kubernetes.io/router.middlewares: kube-system-antiseo@kubernetescrd spec: rules: - - host: spacedrive.otbeaumont.me - http: - paths: - - path: / - pathType: Prefix - backend: - service: - name: sdserver-service - port: - number: 8080 + - host: spacedrive.otbeaumont.me + http: + paths: + - path: / + pathType: Prefix + backend: + service: + name: sdserver-service + port: + number: 8080 --- apiVersion: v1 kind: Service diff --git a/apps/web/README.md b/apps/web/README.md index 1865c0378..dca4765c0 100644 --- a/apps/web/README.md +++ b/apps/web/README.md @@ -1 +1 @@ -# Spacedrive Webapp \ No newline at end of file +# Spacedrive Webapp diff --git a/apps/web/src/App.tsx b/apps/web/src/App.tsx index 96693c9c9..c4a59ffb9 100644 --- a/apps/web/src/App.tsx +++ b/apps/web/src/App.tsx @@ -4,9 +4,7 @@ import SpacedriveInterface from '@sd/interface'; import { ClientCommand, ClientQuery, CoreEvent } from '@sd/core'; import { BaseTransport } from '@sd/client'; -const websocket = new WebSocket( - import.meta.env.VITE_SDSERVER_BASE_URL || 'ws://localhost:8080/ws' -); +const websocket = new WebSocket(import.meta.env.VITE_SDSERVER_BASE_URL || 'ws://localhost:8080/ws'); const randomId = () => Math.random().toString(36).slice(2); diff --git a/apps/web/src/index.html b/apps/web/src/index.html index 6d7fb0d8a..fd076f840 100644 --- a/apps/web/src/index.html +++ b/apps/web/src/index.html @@ -8,4 +8,4 @@
- \ No newline at end of file + diff --git a/apps/web/tsconfig.json b/apps/web/tsconfig.json index a30446533..303561fda 100644 --- a/apps/web/tsconfig.json +++ b/apps/web/tsconfig.json @@ -18,8 +18,8 @@ "paths": { "@sd/interface": ["../../packages/interface/src/index.ts"], "@sd/ui": ["../../packages/ui/src/index.ts"], - "@sd/client": ["../../packages/client/src/index.ts"], + "@sd/client": ["../../packages/client/src/index.ts"] } }, - "include": ["src"], + "include": ["src"] } diff --git a/docs/architecture/distributed-data-sync.md b/docs/architecture/distributed-data-sync.md index 85af21964..f70bca179 100644 --- a/docs/architecture/distributed-data-sync.md +++ b/docs/architecture/distributed-data-sync.md @@ -9,14 +9,14 @@ mod sync { struct SyncEngine { pending: Vec, // events waiting to be sent } - + struct SyncEvent { client_uuid: String, // client that created change timestamp: uhlc::Timestamp, // unique hybrid logical clock timestamp - resource: SyncResource, // the CRDT resource + resource: SyncResource, // the CRDT resource transport: SyncTransport, // method of data transport (msg or binary) } - + // we can now impl specfic CRDT traits to given resources enum SyncResource { FilePath(dyn Replicate), @@ -28,33 +28,29 @@ mod sync { } ``` - - ## Data Types + Data is divided into several kinds, Shared and Owned. + - **Shared data** - Can be created and modified by any client. Has a `uuid`. - *Sync Method:* `Property operation*` + _Sync Method:_ `Property operation*` - > Shared resources could be,`files`, `tags`, `comments`, `albums` and `labels`. Since these can be created, updated or deleted by any client at any time. + > Shared resources could be,`files`, `tags`, `comments`, `albums` and `labels`. Since these can be created, updated or deleted by any client at any time. - **Owned data** - Can only be modified by the client that created it. Has a `client_id` and `uuid`. - *Sync Method:* `Replicate` + _Sync Method:_ `Replicate` > Owned resources would be `file_paths`, `jobs`, `locations` and `media_data`, since a client is the single source of truth for this data. This means we can perform conflict free synchronization. - - -**Shared data doesn't always use this method, in some cases we can create shared resources in bulk, where conflicts are handled by simply merging. More on that in [Synchronization Strategy]()*. - - - +\*_Shared data doesn't always use this method, in some cases we can create shared resources in bulk, where conflicts are handled by simply merging. More on that in [Synchronization Strategy]()_. ## Client Pool + The client pool maintains record of all clients in your network. -An exact replica of the client pool is synchronized on each client. When a given client has a state change, it will notify every other client in the pool via the `connection` struct. +An exact replica of the client pool is synchronized on each client. When a given client has a state change, it will notify every other client in the pool via the `connection` struct. The `ClientConnection` is maintained in memory and is established on startup. @@ -70,21 +66,18 @@ struct Client { connection: Option } ``` + Clients will ping-pong to ensure their connection stays alive, this logic is contained within the `ClientConnection` instance. **Handling stale clients** If a client has not been seen in X amount of time, other clients will not persist pending operations for them. Clients take care of flushing the pending operation queue once all non-stale clients have received the pending operations. - - - - ## Clock With realtime synchronization it is important to maintain the true order of events, we can timestamp each operation, but have to account for time drift; there is no way to guarantee two machines have synchronized system clocks. -We can solve this with a Unique Hybrid Logical Clock ([UHLC]()): a globally-unique, monotonic timestamp. +We can solve this with a Unique Hybrid Logical Clock ([UHLC]()): a globally-unique, monotonic timestamp. ``` 2022-04-09T06:53:36.397295996Z/89F9DD8514914648989315B2D30D7BE5 @@ -92,20 +85,15 @@ We can solve this with a Unique Hybrid Logical Clock ([UHLC]()): a globally-uniq Each client combines their hybrid time with a unique identifier. When receiving new [Sync Events](), a client will update its own clock with the incoming timestamp. - A client will reject operations with a timestamp drift greater than 100ms (can be adjusted). This allows us to entirely avoid the need to synchronize time between clients, as each client controls its own order of operations, never producing a conflicting timestamp with another system in the network. - - ## Synchronization Strategy Sync happens in the following order: -Owned data → Bulk shared data → Shared data → Relational data - - +Owned data → Bulk shared data → Shared data → Relational data ### Types of CRDT: @@ -115,19 +103,14 @@ trait PropertyOperation; trait Replicate; ``` -- **PropertyOperation** - Update Shared resources at a property level. Operations stored in `pending_operations` table. +- **PropertyOperation** - Update Shared resources at a property level. Operations stored in `pending_operations` table. - **Replicate** - Used exclusively for Owned data, clients will replicate with no questions asked. - ~~**Last Write Win** - The most recent event will always be applied, used for many-to-many datasets.~~ - - - - - - ## Operations -Operations perform a Shared data change, they are cached in the database as `pending_operations`. + +Operations perform a Shared data change, they are cached in the database as `pending_operations`. Operations are removed once all online clients have received the payload. @@ -152,8 +135,6 @@ enum OperationMethod { ``` - - ## Pending operations Here are some examples of how operations are stored to minimize disk usage and data duplication. @@ -162,29 +143,25 @@ Here are some examples of how operations are stored to minimize disk usage and d In the next case we're handling the creation of a Shared resource. The `method` is marked `Create` and the value is `NULL`. This is because we can also use the actual database record in the `tags` table as it was newly created. -| `client_uuid` | `uhlc_timestamp` | `method` | `resource_key` | `resource_uuid` | `resource_property` | `value` | -|----------|-------------|------|----------|----------|----------|----------| -| 2e8f85bf... | 2022-04-09T06:53:36... | Create | tags | 2e8f85bf... | NULL | NULL | +| `client_uuid` | `uhlc_timestamp` | `method` | `resource_key` | `resource_uuid` | `resource_property` | `value` | +| ------------- | ---------------------- | -------- | -------------- | --------------- | ------------------- | ------- | +| 2e8f85bf... | 2022-04-09T06:53:36... | Create | tags | 2e8f85bf... | NULL | NULL | **Update operation for Shared data** Shared data works at a property level -| `client_uuid` | `uhlc_timestamp` | `method` | `resource_key` | `resource_uuid` | `resource_property` | `value` | -|----------|-------------|------|----------|----------|----------|----------| -| 2e8f85bf... | 2022-04-09T06:53:36... | Update | albums | 2e8f85bf... | name | "jeff" | - - +| `client_uuid` | `uhlc_timestamp` | `method` | `resource_key` | `resource_uuid` | `resource_property` | `value` | +| ------------- | ---------------------- | -------- | -------------- | --------------- | ------------------- | ------- | +| 2e8f85bf... | 2022-04-09T06:53:36... | Update | albums | 2e8f85bf... | name | "jeff" | ## Owned Data Synchronization Owned data does not use the Operation system, it is queried dynamically by the `updated_at` column on Owned datasets. -For the sake of compatibility with local relations, some resource properties can be ignored*, such as `file_id` and `parent_id` on the `file_paths` resource, these are re-calculated on bulk ingest. - -*_This will require some form of definition when creating an owned data resource_. - +For the sake of compatibility with local relations, some resource properties can be ignored\*, such as `file_id` and `parent_id` on the `file_paths` resource, these are re-calculated on bulk ingest. +\*_This will require some form of definition when creating an owned data resource_. ## Bulk Shared Data Synchronization @@ -192,21 +169,17 @@ In some cases we are able to create many shared data resources at once and resol This is intended for the `files` resource. It requires Shared data behaviour as most other shared resources are related at a database level and user defined metadata can be assigned, however it is initially derived from `file_paths` which is Owned data. -As `files` are created in abundance (hundreds of thousands at a time), it would be inefficient to record these changes in the `pending_operations` table. But we are also unable to sync in the same way as Owned data due to the possibility of conflicts. +As `files` are created in abundance (hundreds of thousands at a time), it would be inefficient to record these changes in the `pending_operations` table. But we are also unable to sync in the same way as Owned data due to the possibility of conflicts. We handle this by using `SyncMethod::Merge`, simply merging the data where the oldest resource properties are prioritized. - - - - -## Combining CRDTs +## Combining CRDTs Combining CRDT types allow for some tailored functionality for particular resources. Looking at the `jobs` resource let look how `OperationalTransform + Replicate` might work. -Jobs are unique in that they have frequent updates to some properties and +Jobs are unique in that they have frequent updates to some properties and ```rust impl OperationalTransform for Job { @@ -216,16 +189,10 @@ impl OperationalTransform for Job { } impl Replicate for Job { - + } ``` - - - - - - ## Creating Sync Events We have a simple Rust syntax for creating sync events in the core. @@ -233,66 +200,38 @@ We have a simple Rust syntax for creating sync events in the core. ```rust aysnc fn my_core_function(&ctx: CoreContext) -> Result<()> { let mut file = File::get_unique(1).await?; - - ctx.sync.operation(file.id, + + ctx.sync.operation(file.id, SyncResource::File( Operation::Update( FileUpdate::HasThumbnail(true) ) ) ); - + Ok(()) } ``` -Then inside the `sync` function we send the event to the +Then inside the `sync` function we send the event to the ```rust impl SyncEngine { pub fn operation(&self, uuid: &str, sync_resource: SyncResource) { self.perform_operation( - uuid.clone(), + uuid.clone(), SyncTransport::Message(sync_resource) ); - } + } } ``` -Files also impempent `OperationalMerge` would use - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Files also impempent `OperationalMerge` would use # Resources - https://archive.jlongster.com/using-crdts-in-the-wild -- https://cse.buffalo.edu/tech-reports/2014-04.pdf +- https://cse.buffalo.edu/tech-reports/2014-04.pdf - https://sergeiturukin.com/2017/06/26/hybrid-logical-clocks.html - https://github.com/atolab/uhlc-rs - https://github.com/alangibson/awesome-crdt diff --git a/docs/architecture/virtual-filesystem.md b/docs/architecture/virtual-filesystem.md index f3bd8e50d..42e53d0e4 100644 --- a/docs/architecture/virtual-filesystem.md +++ b/docs/architecture/virtual-filesystem.md @@ -2,18 +2,16 @@ Spacedrive maintains a virtual filesystem comprised of storage locations through various clients. It records important metadata about a given file as well as a unique checksum for content based addressing [CAS](). - - ### File — `Shared data` -Represents a unique file across the virtual filesystem, all Spacedrive metadata is tied to this resource through local data relations. +Represents a unique file across the virtual filesystem, all Spacedrive metadata is tied to this resource through local data relations. ```rust struct File { id: i32, partial_checksum: str, checksum: Option, - + kind: FileKind, hidden: bool, @@ -23,20 +21,20 @@ struct File { has_video_preview: bool, encryption: EncryptionAlgorithm, ipfs_id: Option, - + file_paths: Vec, tags: Vec, labels: Vec