[ENG-1628] Write new indexer with the task system (#2161)

* Moving file-path-helper to a sub-crate on core

* Parallel walker tested and working

* Change inner core crate names to sd-core-* naming scheme

* Moving stuff around

* Save and Update tasks for the new indexer job

* Some initial drafts on the new job system

* More drafts on job system

* Changing walker task to a more explicit state machine
Also drafting more of job system

* More drafting on job resume

* Completed the draft on job system inner workings

* New job context abstraction to decouple library stuff from job system

* Properly use composition on task dispatcher

* First draft on indexer job

* Job serialization

* Handling ancestors in the distributed walker for indexer

* Saving computed directories sizes on a location to db

* Enable a way to check if jobs are running in a location

* Progress reporting on indexer job

* Reorganizing modules

* Shallow indexer

* Rust fmt

* Attempting windows CI fix

* Attempting windows CI fix again

* Attempting windows CI fix again
This commit is contained in:
Ericson "Fogo" Soares 2024-04-08 11:32:44 -03:00 committed by GitHub
parent 11555c583a
commit 6b760b0b2a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
106 changed files with 7557 additions and 850 deletions

125
Cargo.lock generated
View file

@ -6454,7 +6454,9 @@ version = "0.10.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fabbf1ead8a5bcbc20f5f8b939ee3f5b0f6f281b6ad3468b84656b658b455259"
dependencies = [
"phf_macros 0.10.0",
"phf_shared 0.10.0",
"proc-macro-hack",
]
[[package]]
@ -6531,6 +6533,20 @@ dependencies = [
"syn 1.0.109",
]
[[package]]
name = "phf_macros"
version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "58fdf3184dd560f160dd73922bea2d5cd6e8f064bf4b13110abd81b03697b4e0"
dependencies = [
"phf_generator 0.10.0",
"phf_shared 0.10.0",
"proc-macro-hack",
"proc-macro2",
"quote",
"syn 1.0.109",
]
[[package]]
name = "phf_macros"
version = "0.11.2"
@ -8115,8 +8131,9 @@ dependencies = [
"reqwest",
"rmp-serde",
"rmpv",
"sd-core-file-path-helper",
"sd-core-prisma-helpers",
"sd-core-sync",
"sd-file-path-helper",
"sd-prisma",
"sd-sync",
"sd-utils",
@ -8220,11 +8237,14 @@ dependencies = [
"sd-ai",
"sd-cache",
"sd-cloud-api",
"sd-core-file-path-helper",
"sd-core-heavy-lifting",
"sd-core-indexer-rules",
"sd-core-prisma-helpers",
"sd-core-sync",
"sd-crypto",
"sd-ffmpeg",
"sd-file-ext",
"sd-file-path-helper",
"sd-images",
"sd-media-metadata",
"sd-p2p",
@ -8260,6 +8280,90 @@ dependencies = [
"webp",
]
[[package]]
name = "sd-core-file-path-helper"
version = "0.1.0"
dependencies = [
"chrono",
"prisma-client-rust",
"regex",
"sd-core-prisma-helpers",
"sd-prisma",
"sd-utils",
"serde",
"thiserror",
"tokio",
"tracing",
"winapi-util",
]
[[package]]
name = "sd-core-heavy-lifting"
version = "0.1.0"
dependencies = [
"async-channel",
"async-trait",
"chrono",
"futures",
"futures-concurrency",
"globset",
"itertools 0.12.0",
"lending-stream",
"prisma-client-rust",
"rmp-serde",
"rmpv",
"rspc",
"sd-core-file-path-helper",
"sd-core-indexer-rules",
"sd-core-prisma-helpers",
"sd-core-sync",
"sd-prisma",
"sd-sync",
"sd-task-system",
"sd-utils",
"serde",
"serde_json",
"specta",
"strum",
"tempfile",
"thiserror",
"tokio",
"tokio-stream",
"tracing",
"tracing-test",
"uuid",
]
[[package]]
name = "sd-core-indexer-rules"
version = "0.1.0"
dependencies = [
"chrono",
"futures-concurrency",
"globset",
"prisma-client-rust",
"rmp-serde",
"rspc",
"sd-prisma",
"sd-utils",
"serde",
"specta",
"tempfile",
"thiserror",
"tokio",
"tracing",
"uuid",
]
[[package]]
name = "sd-core-prisma-helpers"
version = "0.1.0"
dependencies = [
"prisma-client-rust",
"sd-prisma",
"serde",
]
[[package]]
name = "sd-core-sync"
version = "0.0.0"
@ -8414,22 +8518,6 @@ dependencies = [
"tokio",
]
[[package]]
name = "sd-file-path-helper"
version = "0.1.0"
dependencies = [
"chrono",
"prisma-client-rust",
"regex",
"sd-prisma",
"sd-utils",
"serde",
"thiserror",
"tokio",
"tracing",
"winapi-util",
]
[[package]]
name = "sd-images"
version = "0.0.0"
@ -9450,6 +9538,7 @@ version = "0.25.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "290d54ea6f91c969195bdbcd7442c8c2a2ba87da8bf60a7ee86a235d4bc1e125"
dependencies = [
"phf 0.10.1",
"strum_macros",
]

View file

@ -59,9 +59,12 @@ chrono = "0.4.31"
clap = "4.4.7"
futures = "0.3.30"
futures-concurrency = "7.4.3"
globset = "^0.4.13"
hex = "0.4.3"
http = "0.2.9"
image = "0.24.7"
itertools = "0.12.0"
lending-stream = "1.0.0"
normpath = "1.1.1"
once_cell = "1.18.0"
pin-project-lite = "0.2.13"
@ -84,6 +87,9 @@ uhlc = "=0.5.2"
uuid = "1.5.0"
webp = "0.2.6"
[workspace.dev-dependencies]
tracing-test = { version = "^0.2.4" }
[patch.crates-io]
# Proper IOS Support
if-watch = { git = "https://github.com/oscartbeaumont/if-watch.git", rev = "a92c17d3f85c1c6fb0afeeaf6c2b24d0b147e8c3" }

View file

@ -6,6 +6,7 @@ repository = { workspace = true }
edition = { workspace = true }
[dependencies]
# Spacedrive Sub-crates
sd-crypto = { path = "../../crates/crypto" }
anyhow = { workspace = true }

View file

@ -9,7 +9,11 @@ repository = { workspace = true }
edition = { workspace = true }
[dependencies]
sd-core = { path = "../../../core", features = ["ffmpeg", "heif"] }
# Spacedrive Sub-crates
sd-core = { path = "../../../core", features = [
"ffmpeg",
"heif",
] }
sd-fda = { path = "../../../crates/fda" }
sd-prisma = { path = "../../../crates/prisma" }

View file

@ -11,6 +11,7 @@ edition = { workspace = true }
crate-type = ["cdylib"]
[dependencies]
# Spacedrive Sub-crates
sd-mobile-core = { path = "../../core" }
# FFI

View file

@ -7,6 +7,7 @@ repository = { workspace = true }
edition = { workspace = true }
[dependencies]
# Spacedrive Sub-crates
sd-core = { path = "../../../../../core", features = [
"mobile",
], default-features = false }

View file

@ -14,4 +14,5 @@ edition = { workspace = true }
crate-type = ["staticlib"]
[dependencies]
# Spacedrive Sub-crates
sd-mobile-core = { path = "../../core" }

View file

@ -11,6 +11,7 @@ assets = []
ai-models = ["sd-core/ai"]
[dependencies]
# Spacedrive Sub-crates
sd-core = { path = "../../core", features = [
"ffmpeg",
"heif",

View file

@ -19,11 +19,18 @@ ai = ["dep:sd-ai"]
crypto = ["dep:sd-crypto"]
[dependencies]
# Sub-crates
sd-cache = { path = "../crates/cache" }
# Inner Core Sub-crates
sd-core-file-path-helper = { path = "./crates/file-path-helper" }
sd-core-heavy-lifting = { path = "./crates/heavy-lifting" }
sd-core-indexer-rules = { path = "./crates/indexer-rules" }
sd-core-prisma-helpers = { path = "./crates/prisma-helpers" }
sd-core-sync = { path = "./crates/sync" }
# sd-cloud-api = { path = "../crates/cloud-api" }
sd-file-path-helper = { path = "../crates/file-path-helper" }
# Spacedrive Sub-crates
sd-actors = { version = "0.1.0", path = "../crates/actors" }
sd-ai = { path = "../crates/ai", optional = true }
sd-cache = { path = "../crates/cache" }
sd-cloud-api = { version = "0.1.0", path = "../crates/cloud-api" }
sd-crypto = { path = "../crates/crypto", features = [
"sys",
"tokio",
@ -41,10 +48,8 @@ sd-p2p-block = { path = "../crates/p2p-block" }
sd-p2p-proto = { path = "../crates/p2p-proto" }
sd-p2p-tunnel = { path = "../crates/p2p-tunnel" }
sd-prisma = { path = "../crates/prisma" }
sd-ai = { path = "../crates/ai", optional = true }
sd-sync = { path = "../crates/sync" }
sd-utils = { path = "../crates/utils" }
sd-cloud-api = { version = "0.1.0", path = "../crates/cloud-api" }
# Workspace dependencies
async-channel = { workspace = true }
@ -56,6 +61,7 @@ chrono = { workspace = true, features = ["serde"] }
futures = { workspace = true }
futures-concurrency = { workspace = true }
image = { workspace = true }
itertools = { workspace = true }
normpath = { workspace = true, features = ["localization"] }
once_cell = { workspace = true }
pin-project-lite = { workspace = true }
@ -63,6 +69,7 @@ prisma-client-rust = { workspace = true, features = ["rspc"] }
regex = { workspace = true }
reqwest = { workspace = true, features = ["json", "native-tls-vendored"] }
rmp-serde = { workspace = true }
rmpv = { workspace = true }
rspc = { workspace = true, features = [
"axum",
"uuid",
@ -98,22 +105,25 @@ webp = { workspace = true }
# Specific Core dependencies
async-recursion = "1.0.5"
async-stream = "0.3.5"
aws-sdk-s3 = { version = "1.5.0", features = ["behavior-version-latest"] }
aws-config = "1.0.3"
aws-credential-types = "1.0.3"
base91 = "0.1.0"
bytes = "1.5.0"
ctor = "0.2.5"
directories = "5.0.1"
flate2 = "1.0.28"
globset = { version = "^0.4.13", features = ["serde1"] }
hostname = "0.3.1"
http-body = "0.4.5"
http-range = "0.1.5"
hyper = { version = "=0.14.28", features = ["http1", "server", "client"] }
int-enum = "0.5.0"
itertools = "0.12.0"
libc = "0.2.153"
mini-moka = "0.10.2"
notify = { git = "https://github.com/notify-rs/notify.git", rev = "c3929ed114fbb0bc7457a9a498260461596b00ca", default-features = false, features = [
"macos_fsevent",
] }
rmpv = { workspace = true }
rmp = "0.8.12"
serde-hashkey = "0.4.5"
serde_repr = "0.1"
serde_with = "3.4.0"
@ -121,14 +131,7 @@ slotmap = "1.0.6"
static_assertions = "1.1.0"
sysinfo = "0.29.10"
tar = "0.4.40"
aws-sdk-s3 = { version = "1.5.0", features = ["behavior-version-latest"] }
aws-config = "1.0.3"
aws-credential-types = "1.0.3"
base91 = "0.1.0"
sd-actors = { version = "0.1.0", path = "../crates/actors" }
tower-service = "0.3.2"
hyper = { version = "=0.14.28", features = ["http1", "server", "client"] }
rmp = "0.8.12"
# Override features of transitive dependencies
[dependencies.openssl]
@ -151,5 +154,6 @@ icrate = { version = "0.1.0", features = [
] }
[dev-dependencies]
tracing-test = "^0.2.4"
tracing-test = { workspace.dev-dependencies = true }
aovec = "1.1.0"
globset = { workspace = true }

View file

@ -1,5 +1,5 @@
[package]
name = "sd-file-path-helper"
name = "sd-core-file-path-helper"
version = "0.1.0"
authors = ["Ericson Soares <ericson@spacedrive.com>"]
readme = "README.md"
@ -9,8 +9,12 @@ repository = { workspace = true }
edition = { workspace = true }
[dependencies]
sd-prisma = { path = "../prisma" }
sd-utils = { path = "../utils" }
# Inner Core Sub-crates
sd-core-prisma-helpers = { path = "../prisma-helpers" }
# Spacedrive Sub-crates
sd-prisma = { path = "../../../crates/prisma" }
sd-utils = { path = "../../../crates/utils" }
chrono = { workspace = true, features = ["serde"] }
prisma-client-rust = { workspace = true }

View file

@ -1,3 +1,10 @@
use sd_core_prisma_helpers::{
file_path_for_file_identifier, file_path_for_media_processor, file_path_for_object_validator,
file_path_to_full_path, file_path_to_handle_custom_uri, file_path_to_handle_p2p_serve_file,
file_path_to_isolate, file_path_to_isolate_with_id, file_path_to_isolate_with_pub_id,
file_path_walker, file_path_with_object,
};
use sd_prisma::prisma::{file_path, location};
use sd_utils::error::NonUtf8PathError;
@ -11,12 +18,7 @@ use std::{
use regex::RegexSet;
use serde::{Deserialize, Serialize};
use super::{
file_path_for_file_identifier, file_path_for_media_processor, file_path_for_object_validator,
file_path_to_full_path, file_path_to_handle_custom_uri, file_path_to_handle_p2p_serve_file,
file_path_to_isolate, file_path_to_isolate_with_id, file_path_walker, file_path_with_object,
FilePathError,
};
use super::FilePathError;
static FORBIDDEN_FILE_NAMES: OnceLock<RegexSet> = OnceLock::new();
@ -30,7 +32,7 @@ pub struct IsolatedFilePathDataParts<'a> {
relative_path: &'a str,
}
#[derive(Serialize, Deserialize, Debug, Hash, Eq, PartialEq)]
#[derive(Serialize, Deserialize, Debug, Hash, Eq, PartialEq, Clone, Default)]
#[non_exhaustive]
pub struct IsolatedFilePathData<'a> {
// WARN! These fields MUST NOT be changed outside the location module, that's why they have this visibility
@ -88,14 +90,22 @@ impl IsolatedFilePathData<'static> {
}
impl<'a> IsolatedFilePathData<'a> {
pub fn location_id(&self) -> location::id::Type {
#[must_use]
pub const fn location_id(&self) -> location::id::Type {
self.location_id
}
#[must_use]
pub fn extension(&self) -> &str {
self.extension.as_ref()
}
#[must_use]
pub const fn is_dir(&self) -> bool {
self.is_dir
}
#[must_use]
pub fn is_root(&self) -> bool {
self.is_dir
&& self.materialized_path == "/"
@ -103,6 +113,7 @@ impl<'a> IsolatedFilePathData<'a> {
&& self.relative_path.is_empty()
}
#[must_use]
pub fn to_parts(&self) -> IsolatedFilePathDataParts<'_> {
IsolatedFilePathDataParts {
location_id: self.location_id,
@ -114,6 +125,12 @@ impl<'a> IsolatedFilePathData<'a> {
}
}
/// Return the `IsolatedFilePath` for the parent of the current file or directory.
///
/// # Panics
/// May panic if the materialized path was malformed, without a slash for the parent directory.
/// Considering that the parent can be just `/` for the root directory.
#[must_use]
pub fn parent(&'a self) -> Self {
let (parent_path_str, name, relative_path) = if self.materialized_path == "/" {
("/", "", "")
@ -124,7 +141,7 @@ impl<'a> IsolatedFilePathData<'a> {
.expect("malformed materialized path at `parent` method");
(
&self.materialized_path[..last_slash_idx + 1],
&self.materialized_path[..=last_slash_idx],
&self.materialized_path[last_slash_idx + 1..trailing_slash_idx],
&self.materialized_path[1..trailing_slash_idx],
)
@ -159,6 +176,7 @@ impl<'a> IsolatedFilePathData<'a> {
}
}
#[must_use]
pub fn full_name(&self) -> String {
if self.extension.is_empty() {
self.name.to_string()
@ -167,6 +185,7 @@ impl<'a> IsolatedFilePathData<'a> {
}
}
#[must_use]
pub fn materialized_path_for_children(&self) -> Option<String> {
if self.materialized_path == "/" && self.name.is_empty() && self.is_dir {
// We're at the root file_path
@ -186,19 +205,21 @@ impl<'a> IsolatedFilePathData<'a> {
));
}
if let Some(last_dot_idx) = source.rfind('.') {
if last_dot_idx == 0 {
// The dot is the first character, so it's a hidden file
Ok((source, ""))
} else {
Ok((&source[..last_dot_idx], &source[last_dot_idx + 1..]))
}
} else {
// It's a file without extension
Ok((source, ""))
}
source.rfind('.').map_or_else(
|| Ok((source, "")), // It's a file without extension
|last_dot_idx| {
if last_dot_idx == 0 {
// The dot is the first character, so it's a hidden file
Ok((source, ""))
} else {
Ok((&source[..last_dot_idx], &source[last_dot_idx + 1..]))
}
},
)
}
#[allow(clippy::missing_panics_doc)] // Don't actually panic as the regexes are hardcoded
#[must_use]
pub fn accept_file_name(name: &str) -> bool {
let reg = {
// Maybe we should enforce windows more restrictive rules on all platforms?
@ -224,6 +245,7 @@ impl<'a> IsolatedFilePathData<'a> {
!reg.is_match(name)
}
#[must_use]
pub fn separate_path_name_and_extension_from_str(
source: &'a str,
is_dir: bool,
@ -253,20 +275,23 @@ impl<'a> IsolatedFilePathData<'a> {
} else {
let first_name_char_idx = source.rfind('/').unwrap_or(0) + 1;
let end_idx = first_name_char_idx - 1;
if let Some(last_dot_relative_idx) = source[first_name_char_idx..].rfind('.') {
let last_dot_idx = first_name_char_idx + last_dot_relative_idx;
(
&source[..end_idx],
Some(&source[first_name_char_idx..last_dot_idx]),
Some(&source[last_dot_idx + 1..]),
)
} else {
(
&source[..end_idx],
Some(&source[first_name_char_idx..]),
None,
)
}
source[first_name_char_idx..].rfind('.').map_or_else(
|| {
(
&source[..end_idx],
Some(&source[first_name_char_idx..]),
None,
)
},
|last_dot_relative_idx| {
let last_dot_idx = first_name_char_idx + last_dot_relative_idx;
(
&source[..end_idx],
Some(&source[first_name_char_idx..last_dot_idx]),
Some(&source[last_dot_idx + 1..]),
)
},
)
}
}
@ -282,6 +307,7 @@ impl<'a> IsolatedFilePathData<'a> {
.unwrap_or_default()
}
#[must_use]
pub fn from_db_data(
location_id: location::id::Type,
is_dir: bool,
@ -465,6 +491,7 @@ mod macros {
impl_from_db!(
file_path,
file_path_to_isolate,
file_path_to_isolate_with_pub_id,
file_path_walker,
file_path_to_isolate_with_id,
file_path_with_object
@ -514,19 +541,21 @@ pub fn extract_normalized_materialized_path_str(
path: path.into(),
})?
.parent()
.map(|materialized_path| {
materialized_path
.to_str()
.map(|materialized_path_str| {
if !materialized_path_str.is_empty() {
format!("/{}/", materialized_path_str.replace('\\', "/"))
} else {
"/".to_string()
}
})
.ok_or_else(|| NonUtf8PathError(path.into()))
})
.unwrap_or_else(|| Ok("/".to_string()))
.map_or_else(
|| Ok("/".to_string()),
|materialized_path| {
materialized_path
.to_str()
.map(|materialized_path_str| {
if materialized_path_str.is_empty() {
"/".to_string()
} else {
format!("/{}/", materialized_path_str.replace('\\', "/"))
}
})
.ok_or_else(|| NonUtf8PathError(path.into()))
},
)
.map_err(Into::into)
}
@ -544,6 +573,7 @@ fn assemble_relative_path(
}
}
#[allow(clippy::missing_panics_doc)] // Don't actually panic as we check before `expect`
pub fn join_location_relative_path(
location_path: impl AsRef<Path>,
relative_path: impl AsRef<Path>,
@ -561,6 +591,7 @@ pub fn join_location_relative_path(
})
}
#[allow(clippy::missing_panics_doc)] // Don't actually panic as we check before `expect`
pub fn push_location_relative_path(
mut location_path: PathBuf,
relative_path: impl AsRef<Path>,

View file

@ -1,3 +1,32 @@
#![warn(
clippy::all,
clippy::pedantic,
clippy::correctness,
clippy::perf,
clippy::style,
clippy::suspicious,
clippy::complexity,
clippy::nursery,
clippy::unwrap_used,
unused_qualifications,
rust_2018_idioms,
trivial_casts,
trivial_numeric_casts,
unused_allocation,
clippy::unnecessary_cast,
clippy::cast_lossless,
clippy::cast_possible_truncation,
clippy::cast_possible_wrap,
clippy::cast_precision_loss,
clippy::cast_sign_loss,
clippy::dbg_macro,
clippy::deprecated_cfg_attr,
clippy::separated_literal_suffix,
deprecated
)]
#![forbid(deprecated_in_future)]
#![allow(clippy::missing_errors_doc, clippy::module_name_repetitions)]
use sd_prisma::prisma::{file_path, location, PrismaClient};
use sd_utils::error::{FileIOError, NonUtf8PathError};
@ -21,107 +50,6 @@ pub use isolated_file_path_data::{
IsolatedFilePathDataParts,
};
// File Path selectables!
file_path::select!(file_path_pub_and_cas_ids { id pub_id cas_id });
file_path::select!(file_path_just_pub_id_materialized_path {
pub_id
materialized_path
});
file_path::select!(file_path_for_file_identifier {
id
pub_id
materialized_path
date_created
is_dir
name
extension
object_id
});
file_path::select!(file_path_for_object_validator {
pub_id
materialized_path
is_dir
name
extension
integrity_checksum
});
file_path::select!(file_path_for_media_processor {
id
materialized_path
is_dir
name
extension
cas_id
object_id
});
file_path::select!(file_path_to_isolate {
location_id
materialized_path
is_dir
name
extension
});
file_path::select!(file_path_to_isolate_with_id {
id
location_id
materialized_path
is_dir
name
extension
});
file_path::select!(file_path_walker {
pub_id
location_id
object_id
materialized_path
is_dir
name
extension
date_modified
inode
size_in_bytes_bytes
hidden
});
file_path::select!(file_path_to_handle_custom_uri {
pub_id
materialized_path
is_dir
name
extension
location: select {
id
path
instance: select {
identity
remote_identity
}
}
});
file_path::select!(file_path_to_handle_p2p_serve_file {
materialized_path
name
extension
is_dir // For isolated file path
location: select {
id
path
}
});
file_path::select!(file_path_to_full_path {
id
materialized_path
is_dir
name
extension
location: select {
id
path
}
});
// File Path includes!
file_path::include!(file_path_with_object { object });
#[derive(Clone, Copy, Debug, Serialize, Deserialize)]
pub struct FilePathMetadata {
pub inode: u64,
@ -140,8 +68,7 @@ pub fn path_is_hidden(path: impl AsRef<Path>, metadata: &Metadata) -> bool {
.as_ref()
.file_name()
.and_then(OsStr::to_str)
.map(|s| s.starts_with('.'))
.unwrap_or_default()
.is_some_and(|s| s.starts_with('.'))
{
return true;
}
@ -176,10 +103,8 @@ pub fn path_is_hidden(path: impl AsRef<Path>, metadata: &Metadata) -> bool {
}
impl FilePathMetadata {
pub async fn from_path(
path: impl AsRef<Path>,
metadata: &Metadata,
) -> Result<Self, FilePathError> {
pub fn from_path(path: impl AsRef<Path>, metadata: &Metadata) -> Result<Self, FilePathError> {
let path = path.as_ref();
let inode = {
#[cfg(target_family = "unix")]
{
@ -188,13 +113,21 @@ impl FilePathMetadata {
#[cfg(target_family = "windows")]
{
get_inode_from_path(path.as_ref()).await?
use winapi_util::{file::information, Handle};
let info = tokio::task::block_in_place(|| {
Handle::from_path_any(path)
.and_then(|ref handle| information(handle))
.map_err(|e| FileIOError::from((path, e)))
})?;
info.file_index()
}
};
Ok(Self {
inode,
hidden: path_is_hidden(path.as_ref(), metadata),
hidden: path_is_hidden(path, metadata),
size_in_bytes: metadata.len(),
created_at: metadata.created_or_now().into(),
modified_at: metadata.modified_or_now().into(),
@ -242,6 +175,7 @@ pub enum FilePathError {
InvalidFilenameAndExtension(String),
}
#[must_use]
pub fn filter_existing_file_path_params(
IsolatedFilePathData {
materialized_path,
@ -250,7 +184,7 @@ pub fn filter_existing_file_path_params(
name,
extension,
..
}: &IsolatedFilePathData,
}: &IsolatedFilePathData<'_>,
) -> Vec<file_path::WhereParam> {
vec![
file_path::location_id::equals(Some(*location_id)),
@ -294,9 +228,10 @@ pub fn loose_find_existing_file_path_params(
])
}
#[allow(clippy::missing_panics_doc)] // Don't actually panic
pub async fn ensure_sub_path_is_in_location(
location_path: impl AsRef<Path>,
sub_path: impl AsRef<Path>,
location_path: impl AsRef<Path> + Send,
sub_path: impl AsRef<Path> + Send,
) -> Result<PathBuf, FilePathError> {
let mut sub_path = sub_path.as_ref();
let location_path = location_path.as_ref();
@ -311,7 +246,9 @@ pub async fn ensure_sub_path_is_in_location(
.expect("we just checked that it starts with the separator");
}
if !sub_path.starts_with(location_path) {
if sub_path.starts_with(location_path) {
Ok(sub_path.to_path_buf())
} else {
// If the sub_path doesn't start with the location_path, we have to check if it's a
// materialized path received from the frontend, then we check if the full path exists
let full_path = location_path.join(sub_path);
@ -324,24 +261,22 @@ pub async fn ensure_sub_path_is_in_location(
}),
Err(e) => Err(FileIOError::from((full_path, e)).into()),
}
} else {
Ok(sub_path.to_path_buf())
}
}
pub async fn ensure_file_path_exists<E>(
sub_path: impl AsRef<Path>,
sub_path: impl AsRef<Path> + Send,
iso_file_path: &IsolatedFilePathData<'_>,
db: &PrismaClient,
error_fn: impl FnOnce(Box<Path>) -> E,
error_fn: impl FnOnce(Box<Path>) -> E + Send,
) -> Result<(), E>
where
E: From<QueryError>,
{
if !check_file_path_exists(iso_file_path, db).await? {
Err(error_fn(sub_path.as_ref().into()))
} else {
if check_file_path_exists(iso_file_path, db).await? {
Ok(())
} else {
Err(error_fn(sub_path.as_ref().into()))
}
}
@ -360,9 +295,10 @@ where
.await? > 0)
}
#[allow(clippy::missing_panics_doc)] // Don't actually panic
pub async fn ensure_sub_path_is_directory(
location_path: impl AsRef<Path>,
sub_path: impl AsRef<Path>,
location_path: impl AsRef<Path> + Send,
sub_path: impl AsRef<Path> + Send,
) -> Result<(), FilePathError> {
let mut sub_path = sub_path.as_ref();
@ -410,7 +346,7 @@ pub async fn ensure_sub_path_is_directory(
}
}
#[allow(unused)] // TODO remove this annotation when we can use it on windows
#[must_use]
pub fn get_inode(metadata: &Metadata) -> u64 {
#[cfg(target_family = "unix")]
{
@ -435,8 +371,7 @@ pub fn get_inode(metadata: &Metadata) -> u64 {
}
}
#[allow(unused)]
pub async fn get_inode_from_path(path: impl AsRef<Path>) -> Result<u64, FilePathError> {
pub async fn get_inode_from_path(path: impl AsRef<Path> + Send) -> Result<u64, FilePathError> {
#[cfg(target_family = "unix")]
{
// TODO use this when it's stable and remove winapi-utils dependency
@ -451,9 +386,11 @@ pub async fn get_inode_from_path(path: impl AsRef<Path>) -> Result<u64, FilePath
{
use winapi_util::{file::information, Handle};
let info = Handle::from_path_any(path.as_ref())
.and_then(|ref handle| information(handle))
.map_err(|e| FileIOError::from((path, e)))?;
let info = tokio::task::block_in_place(|| {
Handle::from_path_any(path.as_ref())
.and_then(|ref handle| information(handle))
.map_err(|e| FileIOError::from((path, e)))
})?;
Ok(info.file_index())
}

View file

@ -0,0 +1,50 @@
[package]
name = "sd-core-heavy-lifting"
version = "0.1.0"
authors = ["Ericson Soares <ericson@spacedrive.com>"]
license = { workspace = true }
repository = { workspace = true }
edition = { workspace = true }
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
# Inner Core Sub-crates
sd-core-file-path-helper = { path = "../file-path-helper" }
sd-core-indexer-rules = { path = "../indexer-rules" }
sd-core-prisma-helpers = { path = "../prisma-helpers" }
sd-core-sync = { path = "../sync" }
# Sub-crates
sd-prisma = { path = "../../../crates/prisma" }
sd-sync = { path = "../../../crates/sync" }
sd-task-system = { path = "../../../crates/task-system" }
sd-utils = { path = "../../../crates/utils" }
async-channel = { workspace = true }
async-trait = { workspace = true }
chrono = { workspace = true, features = ["serde"] }
futures = { workspace = true }
futures-concurrency = { workspace = true }
globset = { workspace = true }
itertools = { workspace = true }
lending-stream = { workspace = true }
prisma-client-rust = { workspace = true }
rmp-serde = { workspace = true }
rmpv = { workspace = true }
rspc = { workspace = true }
serde = { workspace = true, features = ["derive"] }
serde_json = { workspace = true }
specta = { workspace = true }
strum = { workspace = true, features = ["derive", "phf"] }
thiserror = { workspace = true }
tokio = { workspace = true, features = ["fs", "sync", "parking_lot"] }
tokio-stream = { workspace = true, features = ["fs"] }
tracing = { workspace = true }
uuid = { workspace = true, features = ["v4", "serde"] }
[dev-dependencies]
tempfile = { workspace = true }
tracing-test = { workspace.dev-dependencies = true }

View file

@ -0,0 +1,738 @@
use crate::{
indexer::BATCH_SIZE,
job_system::{
job::{
Job, JobContext, JobName, JobReturn, JobTaskDispatcher, ProgressUpdate, ReturnStatus,
},
report::ReportOutputMetadata,
utils::cancel_pending_tasks,
SerializableJob, SerializedTasks,
},
Error, NonCriticalJobError,
};
use sd_core_file_path_helper::IsolatedFilePathData;
use sd_core_indexer_rules::{IndexerRule, IndexerRuler};
use sd_core_prisma_helpers::location_with_indexer_rules;
use sd_task_system::{
AnyTaskOutput, IntoTask, SerializableTask, Task, TaskDispatcher, TaskHandle, TaskId,
TaskOutput, TaskStatus,
};
use sd_utils::db::maybe_missing;
use std::{
collections::{HashMap, HashSet},
hash::{Hash, Hasher},
mem,
path::PathBuf,
sync::Arc,
time::Duration,
};
use futures::{stream::FuturesUnordered, StreamExt};
use futures_concurrency::future::TryJoin;
use itertools::Itertools;
use serde::{Deserialize, Serialize};
use serde_json::json;
use tokio::time::Instant;
use tracing::warn;
use super::{
determine_initial_walk_path, remove_non_existing_file_paths, reverse_update_directories_sizes,
tasks::{
saver::{SaveTask, SaveTaskOutput},
updater::{UpdateTask, UpdateTaskOutput},
walker::{WalkDirTask, WalkTaskOutput, WalkedEntry},
},
update_directory_sizes, update_location_size, IndexerError, IsoFilePathFactory, WalkerDBProxy,
};
#[derive(Debug)]
pub struct IndexerJob {
location: location_with_indexer_rules::Data,
sub_path: Option<PathBuf>,
metadata: Metadata,
iso_file_path_factory: IsoFilePathFactory,
indexer_ruler: IndexerRuler,
walker_root_path: Option<Arc<PathBuf>>,
ancestors_needing_indexing: HashSet<WalkedEntry>,
ancestors_already_indexed: HashSet<IsolatedFilePathData<'static>>,
iso_paths_and_sizes: HashMap<IsolatedFilePathData<'static>, u64>,
errors: Vec<NonCriticalJobError>,
pending_tasks_on_resume: Vec<TaskHandle<Error>>,
tasks_for_shutdown: Vec<Box<dyn Task<Error>>>,
}
impl Job for IndexerJob {
const NAME: JobName = JobName::Indexer;
async fn run(
mut self,
dispatcher: JobTaskDispatcher,
ctx: impl JobContext,
) -> Result<ReturnStatus, Error> {
let mut pending_running_tasks = FuturesUnordered::new();
self.init_or_resume(&mut pending_running_tasks, &ctx, &dispatcher)
.await?;
if let Some(res) = self
.process_handles(&mut pending_running_tasks, &ctx, &dispatcher)
.await
{
return res;
}
if !self.tasks_for_shutdown.is_empty() {
return Ok(ReturnStatus::Shutdown(self.serialize().await));
}
if !self.ancestors_needing_indexing.is_empty() {
let save_tasks = self
.ancestors_needing_indexing
.drain()
.chunks(BATCH_SIZE)
.into_iter()
.map(|chunk| {
let chunked_saves = chunk.collect::<Vec<_>>();
self.metadata.total_paths += chunked_saves.len() as u64;
self.metadata.total_save_steps += 1;
SaveTask::new(
self.location.id,
self.location.pub_id.clone(),
chunked_saves,
Arc::clone(ctx.db()),
Arc::clone(ctx.sync()),
)
})
.collect::<Vec<_>>();
pending_running_tasks.extend(dispatcher.dispatch_many(save_tasks).await);
if let Some(res) = self
.process_handles(&mut pending_running_tasks, &ctx, &dispatcher)
.await
{
return res;
}
if !self.tasks_for_shutdown.is_empty() {
return Ok(ReturnStatus::Shutdown(self.serialize().await));
}
}
// From here onward, job will not be interrupted anymore
let Self {
location,
mut metadata,
iso_file_path_factory,
walker_root_path,
iso_paths_and_sizes,
mut errors,
tasks_for_shutdown,
..
} = self;
if metadata.indexed_count > 0 || metadata.removed_count > 0 || metadata.updated_count > 0 {
let start_size_update_time = Instant::now();
update_directory_sizes(iso_paths_and_sizes, ctx.db(), ctx.sync()).await?;
let root_path = walker_root_path.expect("must be set");
if root_path != iso_file_path_factory.location_path {
reverse_update_directories_sizes(
&*root_path,
location.id,
&*iso_file_path_factory.location_path,
ctx.db(),
ctx.sync(),
&mut errors,
)
.await?;
}
update_location_size(location.id, ctx.db(), &ctx.query_invalidator()).await?;
metadata.db_write_time += start_size_update_time.elapsed();
}
if metadata.indexed_count > 0 || metadata.removed_count > 0 {
ctx.invalidate_query("search.paths");
}
assert!(
tasks_for_shutdown.is_empty(),
"all tasks must be completed here"
);
Ok(ReturnStatus::Completed(
JobReturn::builder()
.with_metadata(metadata)
.with_non_critical_errors(errors)
.build(),
))
}
async fn resume_tasks(
&mut self,
dispatcher: &JobTaskDispatcher,
ctx: &impl JobContext,
SerializedTasks(serialized_tasks): SerializedTasks,
) -> Result<(), Error> {
let location_id = self.location.id;
self.pending_tasks_on_resume = dispatcher
.dispatch_many_boxed(
rmp_serde::from_slice::<Vec<(TaskKind, Vec<u8>)>>(&serialized_tasks)
.map_err(IndexerError::from)?
.into_iter()
.map(|(task_kind, task_bytes)| {
let indexer_ruler = self.indexer_ruler.clone();
let iso_file_path_factory = self.iso_file_path_factory.clone();
async move {
match task_kind {
TaskKind::Walk => WalkDirTask::deserialize(
&task_bytes,
(
indexer_ruler.clone(),
WalkerDBProxy {
location_id,
db: Arc::clone(ctx.db()),
},
iso_file_path_factory.clone(),
dispatcher.clone(),
),
)
.await
.map(IntoTask::into_task),
TaskKind::Save => SaveTask::deserialize(
&task_bytes,
(Arc::clone(ctx.db()), Arc::clone(ctx.sync())),
)
.await
.map(IntoTask::into_task),
TaskKind::Update => UpdateTask::deserialize(
&task_bytes,
(Arc::clone(ctx.db()), Arc::clone(ctx.sync())),
)
.await
.map(IntoTask::into_task),
}
}
})
.collect::<Vec<_>>()
.try_join()
.await
.map_err(IndexerError::from)?,
)
.await;
Ok(())
}
}
impl IndexerJob {
pub fn new(
location: location_with_indexer_rules::Data,
sub_path: Option<PathBuf>,
) -> Result<Self, IndexerError> {
Ok(Self {
indexer_ruler: location
.indexer_rules
.iter()
.map(|rule| IndexerRule::try_from(&rule.indexer_rule))
.collect::<Result<Vec<_>, _>>()
.map(IndexerRuler::new)?,
iso_file_path_factory: IsoFilePathFactory {
location_id: location.id,
location_path: maybe_missing(&location.path, "location.path")
.map(PathBuf::from)
.map(Arc::new)?,
},
walker_root_path: None,
ancestors_needing_indexing: HashSet::new(),
ancestors_already_indexed: HashSet::new(),
iso_paths_and_sizes: HashMap::new(),
location,
sub_path,
metadata: Metadata::default(),
errors: Vec::new(),
pending_tasks_on_resume: Vec::new(),
tasks_for_shutdown: Vec::new(),
})
}
/// Process output of tasks, according to the downcasted output type
///
/// # Panics
/// Will panic if another task type is added in the job, but this function wasn't updated to handle it
///
async fn process_task_output(
&mut self,
task_id: TaskId,
any_task_output: Box<dyn AnyTaskOutput>,
job_ctx: &impl JobContext,
dispatcher: &JobTaskDispatcher,
) -> Result<Vec<TaskHandle<Error>>, IndexerError> {
if any_task_output.is::<WalkTaskOutput>() {
return self
.process_walk_output(
*any_task_output
.downcast::<WalkTaskOutput>()
.expect("just checked"),
job_ctx,
dispatcher,
)
.await;
} else if any_task_output.is::<SaveTaskOutput>() {
self.process_save_output(
*any_task_output
.downcast::<SaveTaskOutput>()
.expect("just checked"),
job_ctx,
);
} else if any_task_output.is::<UpdateTaskOutput>() {
self.process_update_output(
*any_task_output
.downcast::<UpdateTaskOutput>()
.expect("just checked"),
job_ctx,
);
} else {
unreachable!("Unexpected task output type: <id='{task_id}'>");
}
self.metadata.completed_tasks += 1;
job_ctx.progress(vec![ProgressUpdate::CompletedTaskCount(
self.metadata.completed_tasks,
)]);
Ok(Vec::new())
}
async fn process_walk_output(
&mut self,
WalkTaskOutput {
to_create,
to_update,
to_remove,
accepted_ancestors,
errors,
directory_iso_file_path,
total_size,
mut handles,
scan_time,
}: WalkTaskOutput,
job_ctx: &impl JobContext,
dispatcher: &JobTaskDispatcher,
) -> Result<Vec<TaskHandle<Error>>, IndexerError> {
self.metadata.scan_read_time += scan_time;
let (to_create_count, to_update_count) = (to_create.len(), to_update.len());
*self
.iso_paths_and_sizes
.entry(directory_iso_file_path)
.or_default() += total_size;
for ancestor_iso_file_path in accepted_ancestors
.iter()
.map(|ancestor_entry| &ancestor_entry.iso_file_path)
{
if self
.iso_paths_and_sizes
.contains_key(ancestor_iso_file_path)
{
*self
.iso_paths_and_sizes
.get_mut(ancestor_iso_file_path)
.expect("we just checked") += total_size;
} else {
self.iso_paths_and_sizes
.insert(ancestor_iso_file_path.clone(), total_size);
}
}
// First we add ancestors, filtering out ancestors already indexed in previous iterations
self.ancestors_needing_indexing
.extend(accepted_ancestors.into_iter().filter(|ancestor_entry| {
!self
.ancestors_already_indexed
.contains(&ancestor_entry.iso_file_path)
}));
// Then we add new directories to be indexed as they can be received as ancestors in coming iterations
self.ancestors_already_indexed.extend(
to_create
.iter()
.filter(|&WalkedEntry { iso_file_path, .. }| iso_file_path.is_dir())
.map(|WalkedEntry { iso_file_path, .. }| iso_file_path.clone()),
);
self.errors.extend(errors);
let db_delete_time = Instant::now();
self.metadata.removed_count +=
remove_non_existing_file_paths(to_remove, job_ctx.db(), job_ctx.sync()).await?;
self.metadata.db_write_time += db_delete_time.elapsed();
let save_tasks = to_create
.into_iter()
.chunks(BATCH_SIZE)
.into_iter()
.map(|chunk| {
let chunked_saves = chunk.collect::<Vec<_>>();
self.metadata.total_paths += chunked_saves.len() as u64;
self.metadata.total_save_steps += 1;
SaveTask::new(
self.location.id,
self.location.pub_id.clone(),
chunked_saves,
Arc::clone(job_ctx.db()),
Arc::clone(job_ctx.sync()),
)
})
.collect::<Vec<_>>();
let update_tasks = to_update
.into_iter()
.chunks(BATCH_SIZE)
.into_iter()
.map(|chunk| {
let chunked_updates = chunk.collect::<Vec<_>>();
self.metadata.total_updated_paths += chunked_updates.len() as u64;
self.metadata.total_update_steps += 1;
UpdateTask::new(
chunked_updates,
Arc::clone(job_ctx.db()),
Arc::clone(job_ctx.sync()),
)
})
.collect::<Vec<_>>();
handles.extend(dispatcher.dispatch_many(save_tasks).await);
handles.extend(dispatcher.dispatch_many(update_tasks).await);
self.metadata.total_tasks += handles.len() as u64;
job_ctx.progress(vec![
ProgressUpdate::TaskCount(handles.len() as u64),
ProgressUpdate::message(format!(
"Found {to_create_count} new files and {to_update_count} to update"
)),
]);
Ok(handles)
}
fn process_save_output(
&mut self,
SaveTaskOutput {
saved_count,
save_duration,
}: SaveTaskOutput,
job_ctx: &impl JobContext,
) {
self.metadata.indexed_count += saved_count;
self.metadata.db_write_time += save_duration;
job_ctx.progress_msg(format!("Saved {saved_count} files"));
}
fn process_update_output(
&mut self,
UpdateTaskOutput {
updated_count,
update_duration,
}: UpdateTaskOutput,
job_ctx: &impl JobContext,
) {
self.metadata.updated_count += updated_count;
self.metadata.db_write_time += update_duration;
job_ctx.progress_msg(format!("Updated {updated_count} files"));
}
async fn process_handles(
&mut self,
pending_running_tasks: &mut FuturesUnordered<TaskHandle<Error>>,
job_ctx: &impl JobContext,
dispatcher: &JobTaskDispatcher,
) -> Option<Result<ReturnStatus, Error>> {
while let Some(task) = pending_running_tasks.next().await {
match task {
Ok(TaskStatus::Done((task_id, TaskOutput::Out(out)))) => {
let more_handles = match self
.process_task_output(task_id, out, job_ctx, dispatcher)
.await
{
Ok(more_handles) => more_handles,
Err(e) => {
cancel_pending_tasks(&*pending_running_tasks).await;
return Some(Err(e.into()));
}
};
pending_running_tasks.extend(more_handles);
}
Ok(TaskStatus::Done((task_id, TaskOutput::Empty))) => {
warn!("Task <id='{task_id}'> returned an empty output");
}
Ok(TaskStatus::Shutdown(task)) => {
self.tasks_for_shutdown.push(task);
}
Ok(TaskStatus::Error(e)) => {
cancel_pending_tasks(&*pending_running_tasks).await;
return Some(Err(e));
}
Ok(TaskStatus::Canceled | TaskStatus::ForcedAbortion) => {
cancel_pending_tasks(&*pending_running_tasks).await;
return Some(Ok(ReturnStatus::Canceled));
}
Err(e) => {
cancel_pending_tasks(&*pending_running_tasks).await;
return Some(Err(e.into()));
}
}
}
None
}
async fn init_or_resume(
&mut self,
pending_running_tasks: &mut FuturesUnordered<TaskHandle<Error>>,
job_ctx: &impl JobContext,
dispatcher: &JobTaskDispatcher,
) -> Result<(), IndexerError> {
// if we don't have any pending task, then this is a fresh job
if self.pending_tasks_on_resume.is_empty() {
let walker_root_path = Arc::new(
determine_initial_walk_path(
self.location.id,
&self.sub_path,
&*self.iso_file_path_factory.location_path,
job_ctx.db(),
)
.await?,
);
pending_running_tasks.push(
dispatcher
.dispatch(WalkDirTask::new(
walker_root_path.as_ref(),
Arc::clone(&walker_root_path),
self.indexer_ruler.clone(),
self.iso_file_path_factory.clone(),
WalkerDBProxy {
location_id: self.location.id,
db: Arc::clone(job_ctx.db()),
},
Some(dispatcher.clone()),
)?)
.await,
);
self.walker_root_path = Some(walker_root_path);
} else {
pending_running_tasks.extend(mem::take(&mut self.pending_tasks_on_resume));
}
Ok(())
}
}
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
pub struct Metadata {
db_write_time: Duration,
scan_read_time: Duration,
total_tasks: u64,
completed_tasks: u64,
total_paths: u64,
total_updated_paths: u64,
total_save_steps: u64,
total_update_steps: u64,
indexed_count: u64,
updated_count: u64,
removed_count: u64,
}
impl From<Metadata> for ReportOutputMetadata {
fn from(value: Metadata) -> Self {
Self::Metrics(HashMap::from([
("db_write_time".into(), json!(value.db_write_time)),
("scan_read_time".into(), json!(value.scan_read_time)),
("total_tasks".into(), json!(value.total_tasks)),
("total_paths".into(), json!(value.total_paths)),
(
"total_updated_paths".into(),
json!(value.total_updated_paths),
),
("total_save_steps".into(), json!(value.total_save_steps)),
("total_update_steps".into(), json!(value.total_update_steps)),
("indexed_count".into(), json!(value.indexed_count)),
("updated_count".into(), json!(value.updated_count)),
("removed_count".into(), json!(value.removed_count)),
]))
}
}
#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
enum TaskKind {
Walk,
Save,
Update,
}
#[derive(Serialize, Deserialize)]
struct SaveState {
location: location_with_indexer_rules::Data,
sub_path: Option<PathBuf>,
metadata: Metadata,
iso_file_path_factory: IsoFilePathFactory,
indexer_ruler_bytes: Vec<u8>,
walker_root_path: Option<Arc<PathBuf>>,
ancestors_needing_indexing: HashSet<WalkedEntry>,
ancestors_already_indexed: HashSet<IsolatedFilePathData<'static>>,
paths_and_sizes: HashMap<IsolatedFilePathData<'static>, u64>,
errors: Vec<NonCriticalJobError>,
tasks_for_shutdown_bytes: Option<SerializedTasks>,
}
impl SerializableJob for IndexerJob {
async fn serialize(self) -> Result<Option<Vec<u8>>, rmp_serde::encode::Error> {
let Self {
location,
sub_path,
metadata,
iso_file_path_factory,
indexer_ruler,
walker_root_path,
ancestors_needing_indexing,
ancestors_already_indexed,
iso_paths_and_sizes: paths_and_sizes,
errors,
tasks_for_shutdown,
..
} = self;
rmp_serde::to_vec_named(&SaveState {
location,
sub_path,
metadata,
iso_file_path_factory,
indexer_ruler_bytes: indexer_ruler.serialize().await?,
walker_root_path,
ancestors_needing_indexing,
ancestors_already_indexed,
paths_and_sizes,
tasks_for_shutdown_bytes: Some(SerializedTasks(rmp_serde::to_vec_named(
&tasks_for_shutdown
.into_iter()
.map(|task| async move {
if task
.is::<WalkDirTask<WalkerDBProxy, IsoFilePathFactory, JobTaskDispatcher>>(
) {
task
.downcast::<WalkDirTask<WalkerDBProxy, IsoFilePathFactory, JobTaskDispatcher>>(
)
.expect("just checked")
.serialize()
.await
.map(|bytes| (TaskKind::Walk, bytes))
} else if task.is::<SaveTask>() {
task.downcast::<SaveTask>()
.expect("just checked")
.serialize()
.await
.map(|bytes| (TaskKind::Save, bytes))
} else if task.is::<UpdateTask>() {
task.downcast::<UpdateTask>()
.expect("just checked")
.serialize()
.await
.map(|bytes| (TaskKind::Update, bytes))
} else {
unreachable!("Unexpected task type")
}
})
.collect::<Vec<_>>()
.try_join()
.await?,
)?)),
errors,
})
.map(Some)
}
async fn deserialize(
serialized_job: &[u8],
_: &impl JobContext,
) -> Result<Option<(Self, Option<SerializedTasks>)>, rmp_serde::decode::Error> {
let SaveState {
location,
sub_path,
metadata,
iso_file_path_factory,
indexer_ruler_bytes,
walker_root_path,
ancestors_needing_indexing,
ancestors_already_indexed,
paths_and_sizes,
errors,
tasks_for_shutdown_bytes,
} = rmp_serde::from_slice::<SaveState>(serialized_job)?;
let indexer_ruler = IndexerRuler::deserialize(&indexer_ruler_bytes)?;
Ok(Some((
Self {
location,
sub_path,
metadata,
iso_file_path_factory,
indexer_ruler,
walker_root_path,
ancestors_needing_indexing,
ancestors_already_indexed,
iso_paths_and_sizes: paths_and_sizes,
errors,
pending_tasks_on_resume: Vec::new(),
tasks_for_shutdown: Vec::new(),
},
tasks_for_shutdown_bytes,
)))
}
}
impl Hash for IndexerJob {
fn hash<H: Hasher>(&self, state: &mut H) {
self.location.id.hash(state);
if let Some(ref sub_path) = self.sub_path {
sub_path.hash(state);
}
}
}

View file

@ -0,0 +1,542 @@
use crate::NonCriticalJobError;
use sd_core_file_path_helper::{
ensure_file_path_exists, ensure_sub_path_is_directory, ensure_sub_path_is_in_location,
FilePathError, IsolatedFilePathData,
};
use sd_core_indexer_rules::IndexerRuleError;
use sd_core_prisma_helpers::{
file_path_pub_and_cas_ids, file_path_to_isolate_with_pub_id, file_path_walker,
};
use sd_core_sync::Manager as SyncManager;
use sd_prisma::{
prisma::{file_path, location, PrismaClient, SortOrder},
prisma_sync,
};
use sd_sync::OperationFactory;
use sd_utils::{
db::{size_in_bytes_from_db, size_in_bytes_to_db, MissingFieldError},
error::{FileIOError, NonUtf8PathError},
from_bytes_to_uuid, msgpack,
};
use std::{
collections::{HashMap, HashSet},
hash::BuildHasher,
mem,
path::{Path, PathBuf},
sync::Arc,
};
use itertools::Itertools;
use prisma_client_rust::{operator::or, Select};
use rspc::ErrorCode;
use serde::{Deserialize, Serialize};
use specta::Type;
use tracing::warn;
mod job;
mod shallow;
mod tasks;
pub use job::IndexerJob;
pub use shallow::shallow;
use tasks::walker;
/// `BATCH_SIZE` is the number of files to index at each task, writing the chunk of files metadata in the database.
const BATCH_SIZE: usize = 1000;
#[derive(thiserror::Error, Debug)]
pub enum IndexerError {
// Not Found errors
#[error("indexer rule not found: <id='{0}'>")]
IndexerRuleNotFound(i32),
#[error("received sub path not in database: <path='{}'>", .0.display())]
SubPathNotFound(Box<Path>),
// Internal Errors
#[error("database Error: {0}")]
Database(#[from] prisma_client_rust::QueryError),
#[error(transparent)]
FileIO(#[from] FileIOError),
#[error(transparent)]
NonUtf8Path(#[from] NonUtf8PathError),
#[error(transparent)]
IsoFilePath(#[from] FilePathError),
#[error("missing field on database: {0}")]
MissingField(#[from] MissingFieldError),
#[error("failed to deserialized stored tasks for job resume: {0}")]
DeserializeTasks(#[from] rmp_serde::decode::Error),
// Mixed errors
#[error(transparent)]
Rules(#[from] IndexerRuleError),
}
impl From<IndexerError> for rspc::Error {
fn from(err: IndexerError) -> Self {
match err {
IndexerError::IndexerRuleNotFound(_) | IndexerError::SubPathNotFound(_) => {
Self::with_cause(ErrorCode::NotFound, err.to_string(), err)
}
IndexerError::Rules(rule_err) => rule_err.into(),
_ => Self::with_cause(ErrorCode::InternalServerError, err.to_string(), err),
}
}
}
#[derive(thiserror::Error, Debug, Serialize, Deserialize, Type)]
pub enum NonCriticalIndexerError {
#[error("failed to read directory entry: {0}")]
FailedDirectoryEntry(String),
#[error("failed to fetch metadata: {0}")]
Metadata(String),
#[error("error applying indexer rule: {0}")]
IndexerRule(String),
#[error("error trying to extract file path metadata from a file: {0}")]
FilePathMetadata(String),
#[error("failed to fetch file paths ids from existing files on database: {0}")]
FetchAlreadyExistingFilePathIds(String),
#[error("failed to fetch file paths to be removed from database: {0}")]
FetchFilePathsToRemove(String),
#[error("error constructing isolated file path: {0}")]
IsoFilePath(String),
#[error("failed to dispatch new task to keep walking a directory: {0}")]
DispatchKeepWalking(String),
#[error("missing file_path data on database: {0}")]
MissingFilePathData(String),
}
async fn determine_initial_walk_path(
location_id: location::id::Type,
sub_path: &Option<impl AsRef<Path> + Send + Sync>,
location_path: impl AsRef<Path> + Send,
db: &PrismaClient,
) -> Result<PathBuf, IndexerError> {
let location_path = location_path.as_ref();
match sub_path {
Some(sub_path) if sub_path.as_ref() != Path::new("") => {
let sub_path = sub_path.as_ref();
let full_path = ensure_sub_path_is_in_location(location_path, sub_path).await?;
ensure_sub_path_is_directory(location_path, sub_path).await?;
ensure_file_path_exists(
sub_path,
&IsolatedFilePathData::new(location_id, location_path, &full_path, true)
.map_err(IndexerError::from)?,
db,
IndexerError::SubPathNotFound,
)
.await?;
Ok(full_path)
}
_ => Ok(location_path.to_path_buf()),
}
}
fn chunk_db_queries<'db, 'iso>(
iso_file_paths: impl IntoIterator<Item = &'iso IsolatedFilePathData<'iso>>,
db: &'db PrismaClient,
) -> Vec<Select<'db, Vec<file_path_to_isolate_with_pub_id::Data>>> {
iso_file_paths
.into_iter()
.chunks(200)
.into_iter()
.map(|paths_chunk| {
db.file_path()
.find_many(vec![or(paths_chunk
.into_iter()
.map(file_path::WhereParam::from)
.collect())])
.select(file_path_to_isolate_with_pub_id::select())
})
.collect::<Vec<_>>()
}
#[allow(clippy::missing_panics_doc)] // Can't actually panic as we use the hashmap to fetch entries from db
async fn update_directory_sizes(
iso_paths_and_sizes: HashMap<IsolatedFilePathData<'_>, u64, impl BuildHasher + Send>,
db: &PrismaClient,
sync: &SyncManager,
) -> Result<(), IndexerError> {
let to_sync_and_update = db
._batch(chunk_db_queries(iso_paths_and_sizes.keys(), db))
.await?
.into_iter()
.flatten()
.map(|file_path| {
let size_bytes = iso_paths_and_sizes
.get(&IsolatedFilePathData::try_from(&file_path)?)
.map(|size| size.to_be_bytes().to_vec())
.expect("must be here");
Ok((
sync.shared_update(
prisma_sync::file_path::SyncId {
pub_id: file_path.pub_id.clone(),
},
file_path::size_in_bytes_bytes::NAME,
msgpack!(size_bytes),
),
db.file_path().update(
file_path::pub_id::equals(file_path.pub_id),
vec![file_path::size_in_bytes_bytes::set(Some(size_bytes))],
),
))
})
.collect::<Result<Vec<_>, IndexerError>>()?
.into_iter()
.unzip::<_, _, Vec<_>, Vec<_>>();
sync.write_ops(db, to_sync_and_update).await?;
Ok(())
}
async fn update_location_size<InvalidateQuery: Fn(&'static str) + Send + Sync>(
location_id: location::id::Type,
db: &PrismaClient,
invalidate_query: &InvalidateQuery,
) -> Result<(), IndexerError> {
let total_size = db
.file_path()
.find_many(vec![
file_path::location_id::equals(Some(location_id)),
file_path::materialized_path::equals(Some("/".to_string())),
])
.select(file_path::select!({ size_in_bytes_bytes }))
.exec()
.await?
.into_iter()
.filter_map(|file_path| {
file_path
.size_in_bytes_bytes
.map(|size_in_bytes_bytes| size_in_bytes_from_db(&size_in_bytes_bytes))
})
.sum::<u64>();
db.location()
.update(
location::id::equals(location_id),
vec![location::size_in_bytes::set(Some(
total_size.to_be_bytes().to_vec(),
))],
)
.exec()
.await?;
invalidate_query("locations.list");
invalidate_query("locations.get");
Ok(())
}
async fn remove_non_existing_file_paths(
to_remove: Vec<file_path_pub_and_cas_ids::Data>,
db: &PrismaClient,
sync: &sd_core_sync::Manager,
) -> Result<u64, IndexerError> {
#[allow(clippy::cast_sign_loss)]
let (sync_params, db_params): (Vec<_>, Vec<_>) = to_remove
.into_iter()
.map(|file_path| {
(
sync.shared_delete(prisma_sync::file_path::SyncId {
pub_id: file_path.pub_id,
}),
file_path.id,
)
})
.unzip();
sync.write_ops(
db,
(
sync_params,
db.file_path()
.delete_many(vec![file_path::id::in_vec(db_params)]),
),
)
.await
.map(
#[allow(clippy::cast_sign_loss)]
|count| count as u64,
)
.map_err(Into::into)
}
#[allow(clippy::missing_panics_doc)] // Can't actually panic as we only deal with directories
async fn reverse_update_directories_sizes(
base_path: impl AsRef<Path> + Send,
location_id: location::id::Type,
location_path: impl AsRef<Path> + Send,
db: &PrismaClient,
sync: &SyncManager,
errors: &mut Vec<NonCriticalJobError>,
) -> Result<(), IndexerError> {
let location_path = location_path.as_ref();
let ancestors = base_path
.as_ref()
.ancestors()
.take_while(|&ancestor| ancestor != location_path)
.map(|ancestor| {
IsolatedFilePathData::new(location_id, location_path, ancestor, true).map(
|iso_file_path| {
let materialized_path = iso_file_path
.materialized_path_for_children()
.expect("each ancestor is a directory");
(iso_file_path, materialized_path)
},
)
})
.collect::<Result<HashMap<_, _>, _>>()?;
let mut pub_id_by_ancestor_materialized_path = db
._batch(chunk_db_queries(ancestors.keys(), db))
.await?
.into_iter()
.flatten()
.filter_map(|mut file_path| {
let pub_id = mem::take(&mut file_path.pub_id);
IsolatedFilePathData::try_from(file_path)
.map_err(|e| {
errors.push(
NonCriticalIndexerError::MissingFilePathData(format!(
"Found a file_path missing data: <pub_id='{:#?}'>, error: {e:#?}",
from_bytes_to_uuid(&pub_id)
))
.into(),
);
})
.map(|iso_file_path| {
(
iso_file_path
.materialized_path_for_children()
.expect("we know it's a directory"),
(pub_id, 0),
)
})
.ok()
})
.collect::<HashMap<_, _>>();
compute_sizes(
location_id,
ancestors.values().cloned().collect(),
&mut pub_id_by_ancestor_materialized_path,
db,
errors,
)
.await?;
let to_sync_and_update = ancestors
.into_values()
.filter_map(|materialized_path| {
if let Some((pub_id, size)) =
pub_id_by_ancestor_materialized_path.remove(&materialized_path)
{
let size_bytes = size_in_bytes_to_db(size);
Some((
sync.shared_update(
prisma_sync::file_path::SyncId {
pub_id: pub_id.clone(),
},
file_path::size_in_bytes_bytes::NAME,
msgpack!(size_bytes),
),
db.file_path().update(
file_path::pub_id::equals(pub_id),
vec![file_path::size_in_bytes_bytes::set(Some(size_bytes))],
),
))
} else {
warn!("Got a missing ancestor for a file_path in the database, maybe we have a corruption");
None
}
})
.unzip::<_, _, Vec<_>, Vec<_>>();
sync.write_ops(db, to_sync_and_update).await?;
Ok(())
}
async fn compute_sizes(
location_id: location::id::Type,
materialized_paths: Vec<String>,
pub_id_by_ancestor_materialized_path: &mut HashMap<String, (file_path::pub_id::Type, u64)>,
db: &PrismaClient,
errors: &mut Vec<NonCriticalJobError>,
) -> Result<(), IndexerError> {
db.file_path()
.find_many(vec![
file_path::location_id::equals(Some(location_id)),
file_path::materialized_path::in_vec(materialized_paths),
])
.select(file_path::select!({ pub_id materialized_path size_in_bytes_bytes }))
.exec()
.await?
.into_iter()
.for_each(|file_path| {
if let Some(materialized_path) = file_path.materialized_path {
if let Some((_, size)) =
pub_id_by_ancestor_materialized_path.get_mut(&materialized_path)
{
*size += file_path.size_in_bytes_bytes.map_or_else(
|| {
warn!("Got a directory missing its size in bytes");
0
},
|size_in_bytes_bytes| size_in_bytes_from_db(&size_in_bytes_bytes),
);
}
} else {
errors.push(
NonCriticalIndexerError::MissingFilePathData(format!(
"Corrupt database possessing a file_path entry without materialized_path: <pub_id='{:#?}'>",
from_bytes_to_uuid(&file_path.pub_id)
))
.into(),
);
}
});
Ok(())
}
#[derive(Debug, Clone, Serialize, Deserialize)]
struct IsoFilePathFactory {
pub location_id: location::id::Type,
pub location_path: Arc<PathBuf>,
}
impl walker::IsoFilePathFactory for IsoFilePathFactory {
fn build(
&self,
path: impl AsRef<Path>,
is_dir: bool,
) -> Result<IsolatedFilePathData<'static>, FilePathError> {
IsolatedFilePathData::new(self.location_id, self.location_path.as_ref(), path, is_dir)
}
}
#[derive(Debug, Clone)]
struct WalkerDBProxy {
location_id: location::id::Type,
db: Arc<PrismaClient>,
}
impl walker::WalkerDBProxy for WalkerDBProxy {
async fn fetch_file_paths(
&self,
found_paths: Vec<file_path::WhereParam>,
) -> Result<Vec<file_path_walker::Data>, IndexerError> {
// Each found path is a AND with 4 terms, and SQLite has a expression tree limit of 1000 terms
// so we will use chunks of 200 just to be safe
self.db
._batch(
found_paths
.into_iter()
.chunks(200)
.into_iter()
.map(|founds| {
self.db
.file_path()
.find_many(vec![or(founds.collect::<Vec<_>>())])
.select(file_path_walker::select())
})
.collect::<Vec<_>>(),
)
.await
.map(|fetched| fetched.into_iter().flatten().collect::<Vec<_>>())
.map_err(Into::into)
}
async fn fetch_file_paths_to_remove(
&self,
parent_iso_file_path: &IsolatedFilePathData<'_>,
unique_location_id_materialized_path_name_extension_params: Vec<file_path::WhereParam>,
) -> Result<Vec<file_path_pub_and_cas_ids::Data>, NonCriticalIndexerError> {
// NOTE: This batch size can be increased if we wish to trade memory for more performance
const BATCH_SIZE: i64 = 1000;
let founds_ids = self
.db
._batch(
unique_location_id_materialized_path_name_extension_params
.into_iter()
.chunks(200)
.into_iter()
.map(|unique_params| {
self.db
.file_path()
.find_many(vec![or(unique_params.collect())])
.select(file_path::select!({ id }))
})
.collect::<Vec<_>>(),
)
.await
.map(|founds_chunk| {
founds_chunk
.into_iter()
.flat_map(|file_paths| file_paths.into_iter().map(|file_path| file_path.id))
.collect::<HashSet<_>>()
})
.map_err(|e| NonCriticalIndexerError::FetchAlreadyExistingFilePathIds(e.to_string()))?;
let mut to_remove = vec![];
let mut cursor = 1;
loop {
let found = self
.db
.file_path()
.find_many(vec![
file_path::location_id::equals(Some(self.location_id)),
file_path::materialized_path::equals(Some(
parent_iso_file_path
.materialized_path_for_children()
.expect("the received isolated file path must be from a directory"),
)),
])
.order_by(file_path::id::order(SortOrder::Asc))
.take(BATCH_SIZE)
.cursor(file_path::id::equals(cursor))
.select(file_path_pub_and_cas_ids::select())
.exec()
.await
.map_err(|e| NonCriticalIndexerError::FetchFilePathsToRemove(e.to_string()))?;
#[allow(clippy::cast_possible_truncation)] // Safe because we are using a constant
let should_stop = found.len() < BATCH_SIZE as usize;
if let Some(last) = found.last() {
cursor = last.id;
} else {
break;
}
to_remove.extend(
found
.into_iter()
.filter(|file_path| !founds_ids.contains(&file_path.id)),
);
if should_stop {
break;
}
}
Ok(to_remove)
}
}

View file

@ -0,0 +1,261 @@
use crate::{Error, NonCriticalJobError};
use sd_core_indexer_rules::{IndexerRule, IndexerRuler};
use sd_core_prisma_helpers::location_with_indexer_rules;
use sd_core_sync::Manager as SyncManager;
use sd_prisma::prisma::PrismaClient;
use sd_task_system::{BaseTaskDispatcher, CancelTaskOnDrop, IntoTask, TaskDispatcher, TaskOutput};
use sd_utils::db::maybe_missing;
use std::{
collections::HashMap,
path::{Path, PathBuf},
sync::Arc,
};
use futures_concurrency::future::TryJoin;
use itertools::Itertools;
use tracing::{debug, warn};
use super::{
determine_initial_walk_path, remove_non_existing_file_paths, reverse_update_directories_sizes,
tasks::{
saver::{SaveTask, SaveTaskOutput},
updater::{UpdateTask, UpdateTaskOutput},
walker::{ToWalkEntry, WalkDirTask, WalkTaskOutput, WalkedEntry},
},
update_directory_sizes, update_location_size, IndexerError, IsoFilePathFactory, WalkerDBProxy,
BATCH_SIZE,
};
pub async fn shallow(
location: location_with_indexer_rules::Data,
sub_path: impl AsRef<Path> + Send,
dispatcher: BaseTaskDispatcher<Error>,
db: Arc<PrismaClient>,
sync: Arc<SyncManager>,
invalidate_query: impl Fn(&'static str) + Send + Sync,
) -> Result<Vec<NonCriticalJobError>, Error> {
let sub_path = sub_path.as_ref();
let location_path = maybe_missing(&location.path, "location.path")
.map(PathBuf::from)
.map(Arc::new)
.map_err(IndexerError::from)?;
let to_walk_path = Arc::new(
determine_initial_walk_path(location.id, &Some(sub_path), &*location_path, &db).await?,
);
let Some(WalkTaskOutput {
to_create,
to_update,
to_remove,
mut errors,
directory_iso_file_path,
total_size,
..
}) = walk(
&location,
Arc::clone(&location_path),
Arc::clone(&to_walk_path),
Arc::clone(&db),
&dispatcher,
)
.await?
else {
return Ok(vec![]);
};
let removed_count = remove_non_existing_file_paths(to_remove, &db, &sync).await?;
let Some(Metadata {
indexed_count,
updated_count,
}) = save_and_update(
&location,
to_create,
to_update,
Arc::clone(&db),
Arc::clone(&sync),
&dispatcher,
)
.await?
else {
return Ok(errors);
};
if indexed_count > 0 || removed_count > 0 || updated_count > 0 {
update_directory_sizes(
HashMap::from([(directory_iso_file_path, total_size)]),
&db,
&sync,
)
.await?;
if to_walk_path != location_path {
reverse_update_directories_sizes(
&*to_walk_path,
location.id,
&*location_path,
&db,
&sync,
&mut errors,
)
.await?;
}
update_location_size(location.id, &db, &invalidate_query).await?;
}
if indexed_count > 0 || removed_count > 0 {
invalidate_query("search.paths");
}
Ok(errors)
}
async fn walk(
location: &location_with_indexer_rules::Data,
location_path: Arc<PathBuf>,
to_walk_path: Arc<PathBuf>,
db: Arc<PrismaClient>,
dispatcher: &BaseTaskDispatcher<Error>,
) -> Result<Option<WalkTaskOutput>, Error> {
match dispatcher
.dispatch(WalkDirTask::new(
ToWalkEntry::from(&*to_walk_path),
to_walk_path,
location
.indexer_rules
.iter()
.map(|rule| IndexerRule::try_from(&rule.indexer_rule))
.collect::<Result<Vec<_>, _>>()
.map(IndexerRuler::new)
.map_err(IndexerError::from)?,
IsoFilePathFactory {
location_id: location.id,
location_path,
},
WalkerDBProxy {
location_id: location.id,
db,
},
None::<BaseTaskDispatcher<Error>>,
)?)
.await
.await?
{
sd_task_system::TaskStatus::Done((_, TaskOutput::Out(data))) => Ok(Some(
*data
.downcast::<WalkTaskOutput>()
.expect("we just dispatched this task"),
)),
sd_task_system::TaskStatus::Done((_, TaskOutput::Empty)) => {
warn!("Shallow indexer's walker task finished without any output");
Ok(None)
}
sd_task_system::TaskStatus::Error(e) => Err(e),
sd_task_system::TaskStatus::Shutdown(_) => {
debug!("Spacedrive is shuting down while a shallow indexer was in progress");
Ok(None)
}
sd_task_system::TaskStatus::Canceled | sd_task_system::TaskStatus::ForcedAbortion => {
unreachable!("WalkDirTask on shallow indexer can never be canceled or aborted")
}
}
}
struct Metadata {
indexed_count: u64,
updated_count: u64,
}
async fn save_and_update(
location: &location_with_indexer_rules::Data,
to_create: Vec<WalkedEntry>,
to_update: Vec<WalkedEntry>,
db: Arc<PrismaClient>,
sync: Arc<SyncManager>,
dispatcher: &BaseTaskDispatcher<Error>,
) -> Result<Option<Metadata>, Error> {
let save_and_update_tasks = to_create
.into_iter()
.chunks(BATCH_SIZE)
.into_iter()
.map(|chunk| {
SaveTask::new(
location.id,
location.pub_id.clone(),
chunk.collect::<Vec<_>>(),
Arc::clone(&db),
Arc::clone(&sync),
)
})
.map(IntoTask::into_task)
.chain(
to_update
.into_iter()
.chunks(BATCH_SIZE)
.into_iter()
.map(|chunk| {
UpdateTask::new(
chunk.collect::<Vec<_>>(),
Arc::clone(&db),
Arc::clone(&sync),
)
})
.map(IntoTask::into_task),
)
.collect::<Vec<_>>();
let mut metadata = Metadata {
indexed_count: 0,
updated_count: 0,
};
for task_status in dispatcher
.dispatch_many_boxed(save_and_update_tasks)
.await
.into_iter()
.map(CancelTaskOnDrop)
.collect::<Vec<_>>()
.try_join()
.await?
{
match task_status {
sd_task_system::TaskStatus::Done((_, TaskOutput::Out(data))) => {
if data.is::<SaveTaskOutput>() {
metadata.indexed_count += data
.downcast::<SaveTaskOutput>()
.expect("just checked")
.saved_count;
} else {
metadata.updated_count += data
.downcast::<UpdateTaskOutput>()
.expect("just checked")
.updated_count;
}
}
sd_task_system::TaskStatus::Done((_, TaskOutput::Empty)) => {
warn!("Shallow indexer's saver or updater task finished without any output");
return Ok(None);
}
sd_task_system::TaskStatus::Error(e) => return Err(e),
sd_task_system::TaskStatus::Shutdown(_) => {
debug!("Spacedrive is shuting down while a shallow indexer was in progress");
return Ok(None);
}
sd_task_system::TaskStatus::Canceled | sd_task_system::TaskStatus::ForcedAbortion => {
unreachable!(
"Save or Updater tasks on shallow indexer can never be canceled or aborted"
);
}
}
}
Ok(Some(metadata))
}

View file

@ -0,0 +1,3 @@
pub mod saver;
pub mod updater;
pub mod walker;

View file

@ -0,0 +1,218 @@
use crate::{indexer::IndexerError, Error};
use sd_core_file_path_helper::IsolatedFilePathDataParts;
use sd_core_sync::Manager as SyncManager;
use sd_prisma::{
prisma::{file_path, location, PrismaClient},
prisma_sync,
};
use sd_sync::{sync_db_entry, OperationFactory};
use sd_task_system::{ExecStatus, Interrupter, IntoAnyTaskOutput, SerializableTask, Task, TaskId};
use sd_utils::{db::inode_to_db, msgpack};
use std::{sync::Arc, time::Duration};
use chrono::Utc;
use serde::{Deserialize, Serialize};
use tokio::time::Instant;
use tracing::trace;
use super::walker::WalkedEntry;
#[derive(Debug)]
pub struct SaveTask {
id: TaskId,
location_id: location::id::Type,
location_pub_id: location::pub_id::Type,
walked_entries: Vec<WalkedEntry>,
db: Arc<PrismaClient>,
sync: Arc<SyncManager>,
}
impl SaveTask {
#[must_use]
pub fn new(
location_id: location::id::Type,
location_pub_id: location::pub_id::Type,
walked_entries: Vec<WalkedEntry>,
db: Arc<PrismaClient>,
sync: Arc<SyncManager>,
) -> Self {
Self {
id: TaskId::new_v4(),
location_id,
location_pub_id,
walked_entries,
db,
sync,
}
}
}
#[derive(Debug, Serialize, Deserialize)]
struct SaveTaskSaveState {
id: TaskId,
location_id: location::id::Type,
location_pub_id: location::pub_id::Type,
walked_entries: Vec<WalkedEntry>,
}
impl SerializableTask<Error> for SaveTask {
type SerializeError = rmp_serde::encode::Error;
type DeserializeError = rmp_serde::decode::Error;
type DeserializeCtx = (Arc<PrismaClient>, Arc<SyncManager>);
async fn serialize(self) -> Result<Vec<u8>, Self::SerializeError> {
let Self {
id,
location_id,
location_pub_id,
walked_entries,
..
} = self;
rmp_serde::to_vec_named(&SaveTaskSaveState {
id,
location_id,
location_pub_id,
walked_entries,
})
}
async fn deserialize(
data: &[u8],
(db, sync): Self::DeserializeCtx,
) -> Result<Self, Self::DeserializeError> {
rmp_serde::from_slice(data).map(
|SaveTaskSaveState {
id,
location_id,
location_pub_id,
walked_entries,
}| Self {
id,
location_id,
location_pub_id,
walked_entries,
db,
sync,
},
)
}
}
#[derive(Debug)]
pub struct SaveTaskOutput {
pub saved_count: u64,
pub save_duration: Duration,
}
#[async_trait::async_trait]
impl Task<Error> for SaveTask {
fn id(&self) -> TaskId {
self.id
}
async fn run(&mut self, _: &Interrupter) -> Result<ExecStatus, Error> {
use file_path::{
create_unchecked, date_created, date_indexed, date_modified, extension, hidden, inode,
is_dir, location, location_id, materialized_path, name, size_in_bytes_bytes,
};
let start_time = Instant::now();
let Self {
location_id,
location_pub_id,
walked_entries,
db,
sync,
..
} = self;
let (sync_stuff, paths): (Vec<_>, Vec<_>) = walked_entries
.drain(..)
.map(|entry| {
let IsolatedFilePathDataParts {
materialized_path,
is_dir,
name,
extension,
..
} = entry.iso_file_path.to_parts();
let pub_id = sd_utils::uuid_to_bytes(entry.pub_id);
let (sync_params, db_params): (Vec<_>, Vec<_>) = [
(
(
location::NAME,
msgpack!(prisma_sync::location::SyncId {
pub_id: location_pub_id.clone()
}),
),
location_id::set(Some(*location_id)),
),
sync_db_entry!(materialized_path.to_string(), materialized_path),
sync_db_entry!(name.to_string(), name),
sync_db_entry!(is_dir, is_dir),
sync_db_entry!(extension.to_string(), extension),
sync_db_entry!(
entry.metadata.size_in_bytes.to_be_bytes().to_vec(),
size_in_bytes_bytes
),
sync_db_entry!(inode_to_db(entry.metadata.inode), inode),
{
let v = entry.metadata.created_at.into();
sync_db_entry!(v, date_created)
},
{
let v = entry.metadata.modified_at.into();
sync_db_entry!(v, date_modified)
},
{
let v = Utc::now().into();
sync_db_entry!(v, date_indexed)
},
sync_db_entry!(entry.metadata.hidden, hidden),
]
.into_iter()
.unzip();
(
sync.shared_create(
prisma_sync::file_path::SyncId {
pub_id: sd_utils::uuid_to_bytes(entry.pub_id),
},
sync_params,
),
create_unchecked(pub_id, db_params),
)
})
.unzip();
#[allow(clippy::cast_sign_loss)]
let saved_count = sync
.write_ops(
db,
(
sync_stuff.into_iter().flatten().collect(),
db.file_path().create_many(paths).skip_duplicates(),
),
)
.await
.map_err(IndexerError::from)? as u64;
trace!("Inserted {saved_count} records");
Ok(ExecStatus::Done(
SaveTaskOutput {
saved_count,
save_duration: start_time.elapsed(),
}
.into_output(),
))
}
}

View file

@ -0,0 +1,236 @@
use crate::{indexer::IndexerError, Error};
use sd_core_file_path_helper::IsolatedFilePathDataParts;
use sd_core_sync::Manager as SyncManager;
use sd_prisma::{
prisma::{file_path, object, PrismaClient},
prisma_sync,
};
use sd_sync::{sync_db_entry, OperationFactory};
use sd_task_system::{
check_interruption, ExecStatus, Interrupter, IntoAnyTaskOutput, SerializableTask, Task, TaskId,
};
use sd_utils::{chain_optional_iter, db::inode_to_db, msgpack};
use std::{collections::HashSet, sync::Arc, time::Duration};
use serde::{Deserialize, Serialize};
use tokio::time::Instant;
use tracing::trace;
use super::walker::WalkedEntry;
#[derive(Debug)]
pub struct UpdateTask {
id: TaskId,
walked_entries: Vec<WalkedEntry>,
object_ids_that_should_be_unlinked: HashSet<object::id::Type>,
db: Arc<PrismaClient>,
sync: Arc<SyncManager>,
}
impl UpdateTask {
#[must_use]
pub fn new(
walked_entries: Vec<WalkedEntry>,
db: Arc<PrismaClient>,
sync: Arc<SyncManager>,
) -> Self {
Self {
id: TaskId::new_v4(),
walked_entries,
db,
sync,
object_ids_that_should_be_unlinked: HashSet::new(),
}
}
}
#[derive(Debug, Serialize, Deserialize)]
struct UpdateTaskSaveState {
id: TaskId,
walked_entries: Vec<WalkedEntry>,
object_ids_that_should_be_unlinked: HashSet<object::id::Type>,
}
impl SerializableTask<Error> for UpdateTask {
type SerializeError = rmp_serde::encode::Error;
type DeserializeError = rmp_serde::decode::Error;
type DeserializeCtx = (Arc<PrismaClient>, Arc<SyncManager>);
async fn serialize(self) -> Result<Vec<u8>, Self::SerializeError> {
rmp_serde::to_vec_named(&UpdateTaskSaveState {
id: self.id,
walked_entries: self.walked_entries,
object_ids_that_should_be_unlinked: self.object_ids_that_should_be_unlinked,
})
}
async fn deserialize(
data: &[u8],
(db, sync): Self::DeserializeCtx,
) -> Result<Self, Self::DeserializeError> {
rmp_serde::from_slice(data).map(
|UpdateTaskSaveState {
id,
walked_entries,
object_ids_that_should_be_unlinked,
}| Self {
id,
walked_entries,
object_ids_that_should_be_unlinked,
db,
sync,
},
)
}
}
#[derive(Debug)]
pub struct UpdateTaskOutput {
pub updated_count: u64,
pub update_duration: Duration,
}
#[async_trait::async_trait]
impl Task<Error> for UpdateTask {
fn id(&self) -> TaskId {
self.id
}
async fn run(&mut self, interrupter: &Interrupter) -> Result<ExecStatus, Error> {
use file_path::{
cas_id, date_created, date_modified, hidden, inode, is_dir, object, object_id,
size_in_bytes_bytes,
};
let start_time = Instant::now();
let Self {
walked_entries,
db,
sync,
object_ids_that_should_be_unlinked,
..
} = self;
fetch_objects_ids_to_unlink(walked_entries, object_ids_that_should_be_unlinked, db).await?;
check_interruption!(interrupter);
let (sync_stuff, paths_to_update) = walked_entries
.drain(..)
.map(|entry| {
let IsolatedFilePathDataParts { is_dir, .. } = &entry.iso_file_path.to_parts();
let pub_id = sd_utils::uuid_to_bytes(entry.pub_id);
let should_unlink_object = entry.maybe_object_id.map_or(false, |object_id| {
object_ids_that_should_be_unlinked.contains(&object_id)
});
let (sync_params, db_params) = chain_optional_iter(
[
((cas_id::NAME, msgpack!(nil)), cas_id::set(None)),
sync_db_entry!(*is_dir, is_dir),
sync_db_entry!(
entry.metadata.size_in_bytes.to_be_bytes().to_vec(),
size_in_bytes_bytes
),
sync_db_entry!(inode_to_db(entry.metadata.inode), inode),
{
let v = entry.metadata.created_at.into();
sync_db_entry!(v, date_created)
},
{
let v = entry.metadata.modified_at.into();
sync_db_entry!(v, date_modified)
},
sync_db_entry!(entry.metadata.hidden, hidden),
],
[
// As this file was updated while Spacedrive was offline, we mark the object_id and cas_id as null
// So this file_path will be updated at file identifier job
should_unlink_object
.then_some(((object_id::NAME, msgpack!(nil)), object::disconnect())),
],
)
.into_iter()
.unzip::<_, _, Vec<_>, Vec<_>>();
(
sync_params
.into_iter()
.map(|(field, value)| {
sync.shared_update(
prisma_sync::file_path::SyncId {
pub_id: pub_id.clone(),
},
field,
value,
)
})
.collect::<Vec<_>>(),
db.file_path()
.update(file_path::pub_id::equals(pub_id), db_params)
.select(file_path::select!({ id })),
)
})
.unzip::<_, _, Vec<_>, Vec<_>>();
let updated = sync
.write_ops(
db,
(sync_stuff.into_iter().flatten().collect(), paths_to_update),
)
.await
.map_err(IndexerError::from)?;
trace!("Updated {updated:?} records");
Ok(ExecStatus::Done(
UpdateTaskOutput {
updated_count: updated.len() as u64,
update_duration: start_time.elapsed(),
}
.into_output(),
))
}
}
async fn fetch_objects_ids_to_unlink(
walked_entries: &[WalkedEntry],
object_ids_that_should_be_unlinked: &mut HashSet<object::id::Type>,
db: &PrismaClient,
) -> Result<(), IndexerError> {
if object_ids_that_should_be_unlinked.is_empty() {
// First we consult which file paths we should unlink
let object_ids = walked_entries
.iter()
.filter_map(|entry| entry.maybe_object_id)
.collect::<HashSet<_>>() // Removing possible duplicates
.into_iter()
.collect::<Vec<_>>();
*object_ids_that_should_be_unlinked = db
._batch(
object_ids
.iter()
.map(|object_id| {
db.file_path()
.count(vec![file_path::object_id::equals(Some(*object_id))])
})
.collect::<Vec<_>>(),
)
.await?
.into_iter()
.zip(object_ids)
.filter_map(|(count, object_id)| (count > 1).then_some(object_id))
.collect::<HashSet<_>>();
}
Ok(())
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,61 @@
use crate::Error;
use sd_utils::error::FileIOError;
use prisma_client_rust::QueryError;
use super::{job::JobName, report::ReportError, JobId};
#[derive(thiserror::Error, Debug)]
pub enum JobSystemError {
#[error("job not found: <id='{0}'>")]
NotFound(JobId),
#[error("job already running: <new_id='{new_id}', name='{job_name}', already_running_id='{already_running_id}'>")]
AlreadyRunning {
new_id: JobId,
job_name: JobName,
already_running_id: JobId,
},
#[error("job canceled: <id='{0}'>")]
Canceled(JobId),
#[error("failed to load job reports from database to resume jobs: {0}")]
LoadReportsForResume(#[from] QueryError),
#[error("failed to serialize job to be saved and resumed later: {0}")]
Serialize(#[from] rmp_serde::encode::Error),
#[error("failed to deserialize job to be resumed: {0}")]
Deserialize(#[from] rmp_serde::decode::Error),
#[error("failed to save or load jobs on disk: {0}")]
StoredJobs(FileIOError),
#[error(transparent)]
Report(#[from] ReportError),
#[error(transparent)]
Processing(#[from] Error),
}
impl From<JobSystemError> for rspc::Error {
fn from(e: JobSystemError) -> Self {
match e {
JobSystemError::NotFound(_) => {
Self::with_cause(rspc::ErrorCode::NotFound, e.to_string(), e)
}
JobSystemError::AlreadyRunning { .. } => {
Self::with_cause(rspc::ErrorCode::Conflict, e.to_string(), e)
}
JobSystemError::Canceled(_) => {
Self::with_cause(rspc::ErrorCode::ClientClosedRequest, e.to_string(), e)
}
JobSystemError::Processing(e) => e.into(),
JobSystemError::Report(e) => e.into(),
_ => Self::with_cause(rspc::ErrorCode::InternalServerError, e.to_string(), e),
}
}
}

View file

@ -0,0 +1,784 @@
use crate::{Error, NonCriticalJobError};
use sd_core_sync::Manager as SyncManager;
use sd_prisma::prisma::PrismaClient;
use sd_task_system::{
BaseTaskDispatcher, Task, TaskDispatcher, TaskHandle, TaskRemoteController, TaskSystemError,
};
use std::{
collections::VecDeque,
hash::{DefaultHasher, Hash, Hasher},
marker::PhantomData,
pin::pin,
sync::Arc,
};
use async_channel as chan;
use chrono::{DateTime, Utc};
use futures::{stream, Future, StreamExt};
use futures_concurrency::{
future::{Join, TryJoin},
stream::Merge,
};
use serde::{Deserialize, Serialize};
use specta::Type;
use strum::{Display, EnumString};
use tokio::spawn;
use tracing::{debug, error, info, warn};
use uuid::Uuid;
use super::{
report::{
Report, ReportBuilder, ReportInputMetadata, ReportMetadata, ReportOutputMetadata, Status,
},
Command, JobId, JobSystemError, SerializableJob, SerializedTasks,
};
#[derive(
Debug, Serialize, Deserialize, EnumString, Display, Clone, Copy, Type, Hash, PartialEq, Eq,
)]
#[strum(use_phf, serialize_all = "snake_case")]
pub enum JobName {
Indexer,
// TODO: Add more job names as needed
}
pub enum ReturnStatus {
Completed(JobReturn),
Shutdown(Result<Option<Vec<u8>>, rmp_serde::encode::Error>),
Canceled,
}
pub enum ProgressUpdate {
TaskCount(u64),
CompletedTaskCount(u64),
Message(String),
Phase(String),
}
impl ProgressUpdate {
pub fn message(message: impl Into<String>) -> Self {
Self::Message(message.into())
}
pub fn phase(phase: impl Into<String>) -> Self {
Self::Phase(phase.into())
}
}
pub trait JobContext: Send + Sync + Clone + 'static {
fn id(&self) -> Uuid;
fn db(&self) -> &Arc<PrismaClient>;
fn sync(&self) -> &Arc<SyncManager>;
fn invalidate_query(&self, query: &'static str);
fn query_invalidator(&self) -> impl Fn(&'static str) + Send + Sync;
fn progress(&self, updates: Vec<ProgressUpdate>);
fn progress_msg(&self, msg: impl Into<String>) {
self.progress(vec![ProgressUpdate::Message(msg.into())]);
}
}
pub trait Job: Send + Sync + Hash + 'static {
const NAME: JobName;
#[allow(unused_variables)]
fn resume_tasks(
&mut self,
dispatcher: &JobTaskDispatcher,
ctx: &impl JobContext,
serialized_tasks: SerializedTasks,
) -> impl Future<Output = Result<(), Error>> + Send {
async move { Ok(()) }
}
fn run(
self,
dispatcher: JobTaskDispatcher,
ctx: impl JobContext,
) -> impl Future<Output = Result<ReturnStatus, Error>> + Send;
}
pub trait IntoJob<J, Ctx>
where
J: Job + SerializableJob,
Ctx: JobContext,
{
fn into_job(self) -> Box<dyn DynJob<Ctx>>;
}
impl<J, Ctx> IntoJob<J, Ctx> for J
where
J: Job + SerializableJob,
Ctx: JobContext,
{
fn into_job(self) -> Box<dyn DynJob<Ctx>> {
let id = JobId::new_v4();
Box::new(JobHolder {
id,
job: self,
report: ReportBuilder::new(id, J::NAME).build(),
next_jobs: VecDeque::new(),
_ctx: PhantomData,
})
}
}
impl<J, Ctx> IntoJob<J, Ctx> for JobBuilder<J, Ctx>
where
J: Job + SerializableJob,
Ctx: JobContext,
{
fn into_job(self) -> Box<dyn DynJob<Ctx>> {
self.build()
}
}
#[derive(Debug)]
pub struct JobReturn {
data: JobOutputData,
metadata: Option<ReportOutputMetadata>,
non_critical_errors: Vec<NonCriticalJobError>,
}
impl JobReturn {
#[must_use]
pub fn builder() -> JobReturnBuilder {
JobReturnBuilder {
job_return: Self::default(),
}
}
}
impl Default for JobReturn {
fn default() -> Self {
Self {
data: JobOutputData::Empty,
metadata: None,
non_critical_errors: vec![],
}
}
}
#[derive(Debug, Default)]
pub struct JobReturnBuilder {
job_return: JobReturn,
}
impl JobReturnBuilder {
#[must_use]
pub const fn with_data(mut self, data: JobOutputData) -> Self {
self.job_return.data = data;
self
}
#[must_use]
pub fn with_metadata(mut self, metadata: impl Into<ReportOutputMetadata>) -> Self {
self.job_return.metadata = Some(metadata.into());
self
}
#[must_use]
pub fn with_non_critical_errors(mut self, errors: Vec<NonCriticalJobError>) -> Self {
if self.job_return.non_critical_errors.is_empty() {
self.job_return.non_critical_errors = errors;
} else {
self.job_return.non_critical_errors.extend(errors);
}
self
}
#[must_use]
pub fn build(self) -> JobReturn {
self.job_return
}
}
#[derive(Serialize, Type)]
pub struct JobOutput {
id: JobId,
status: Status,
job_name: JobName,
data: JobOutputData,
metadata: Vec<ReportMetadata>,
non_critical_errors: Vec<NonCriticalJobError>,
}
impl JobOutput {
pub fn prepare_output_and_report(
JobReturn {
data,
metadata,
non_critical_errors,
}: JobReturn,
report: &mut Report,
) -> Self {
if non_critical_errors.is_empty() {
report.status = Status::Completed;
debug!("Job<id='{}', name='{}'> completed", report.id, report.name);
} else {
report.status = Status::CompletedWithErrors;
report.non_critical_errors = non_critical_errors
.iter()
.map(ToString::to_string)
.collect();
warn!(
"Job<id='{}', name='{}'> completed with errors: {non_critical_errors:#?}",
report.id, report.name
);
}
if let Some(metadata) = metadata {
report.metadata.push(ReportMetadata::Output(metadata));
}
report.completed_at = Some(Utc::now());
Self {
id: report.id,
status: report.status,
job_name: report.name,
data,
metadata: report.metadata.clone(),
non_critical_errors,
}
}
}
#[derive(Debug, Serialize, Type)]
pub enum JobOutputData {
Empty,
// TODO: Add more types
}
pub struct JobBuilder<J, Ctx>
where
J: Job + SerializableJob,
Ctx: JobContext,
{
id: JobId,
job: J,
report_builder: ReportBuilder,
next_jobs: VecDeque<Box<dyn DynJob<Ctx>>>,
_ctx: PhantomData<Ctx>,
}
impl<J, Ctx> JobBuilder<J, Ctx>
where
J: Job + SerializableJob,
Ctx: JobContext,
{
pub fn build(self) -> Box<JobHolder<J, Ctx>> {
Box::new(JobHolder {
id: self.id,
job: self.job,
report: self.report_builder.build(),
next_jobs: VecDeque::new(),
_ctx: PhantomData,
})
}
pub fn new(job: J) -> Self {
let id = JobId::new_v4();
Self {
id,
job,
report_builder: ReportBuilder::new(id, J::NAME),
next_jobs: VecDeque::new(),
_ctx: PhantomData,
}
}
#[must_use]
pub fn with_action(mut self, action: impl Into<String>) -> Self {
self.report_builder = self.report_builder.with_action(action);
self
}
#[must_use]
pub fn with_parent_id(mut self, parent_id: JobId) -> Self {
self.report_builder = self.report_builder.with_parent_id(parent_id);
self
}
#[must_use]
pub fn with_metadata(mut self, metadata: ReportInputMetadata) -> Self {
self.report_builder = self.report_builder.with_metadata(metadata);
self
}
#[must_use]
pub fn enqueue_next(mut self, next: impl Job + SerializableJob) -> Self {
let next_job_order = self.next_jobs.len() + 1;
let mut child_job_builder = JobBuilder::new(next).with_parent_id(self.id);
if let Some(parent_action) = &self.report_builder.action {
child_job_builder =
child_job_builder.with_action(format!("{parent_action}-{next_job_order}"));
}
self.next_jobs.push_back(child_job_builder.build());
self
}
}
pub struct JobHolder<J, Ctx>
where
J: Job + SerializableJob,
Ctx: JobContext,
{
pub(super) id: JobId,
pub(super) job: J,
pub(super) report: Report,
pub(super) next_jobs: VecDeque<Box<dyn DynJob<Ctx>>>,
pub(super) _ctx: PhantomData<Ctx>,
}
pub struct JobHandle<Ctx: JobContext> {
pub(crate) next_jobs: VecDeque<Box<dyn DynJob<Ctx>>>,
pub(crate) job_ctx: Ctx,
pub(crate) report: Report,
pub(crate) commands_tx: chan::Sender<Command>,
}
impl<Ctx: JobContext> JobHandle<Ctx> {
pub async fn send_command(&mut self, command: Command) -> Result<(), JobSystemError> {
if self.commands_tx.send(command).await.is_err() {
warn!("Tried to send a {command:?} to a job that was already completed");
Ok(())
} else {
self.command_children(command).await
}
}
pub async fn command_children(&mut self, command: Command) -> Result<(), JobSystemError> {
let (new_status, completed_at) = match command {
Command::Pause => (Status::Paused, None),
Command::Resume => return Ok(()),
Command::Cancel => (Status::Canceled, Some(Utc::now())),
};
self.next_jobs
.iter_mut()
.map(|dyn_job| dyn_job.report_mut())
.map(|next_job_report| async {
next_job_report.status = new_status;
next_job_report.completed_at = completed_at;
next_job_report.update(self.job_ctx.db()).await
})
.collect::<Vec<_>>()
.try_join()
.await
.map(|_| ())
.map_err(Into::into)
}
pub async fn register_start(
&mut self,
start_time: DateTime<Utc>,
) -> Result<(), JobSystemError> {
let Self {
next_jobs,
report,
job_ctx,
..
} = self;
report.status = Status::Running;
if report.started_at.is_none() {
report.started_at = Some(start_time);
}
let db = job_ctx.db();
// If the report doesn't have a created_at date, it's a new report
if report.created_at.is_none() {
report.create(db).await?;
} else {
// Otherwise it can be a job being resumed or a children job that was already been created
report.update(db).await?;
}
// Registering children jobs
next_jobs
.iter_mut()
.map(|dyn_job| dyn_job.report_mut())
.map(|next_job_report| async {
if next_job_report.created_at.is_none() {
next_job_report.create(db).await
} else {
Ok(())
}
})
.collect::<Vec<_>>()
.try_join()
.await
.map(|_| ())
.map_err(Into::into)
}
pub async fn complete_job(
&mut self,
job_return: JobReturn,
) -> Result<JobOutput, JobSystemError> {
let Self {
report, job_ctx, ..
} = self;
let output = JobOutput::prepare_output_and_report(job_return, report);
report.update(job_ctx.db()).await?;
Ok(output)
}
pub async fn failed_job(&mut self, e: &Error) -> Result<(), JobSystemError> {
let Self {
report, job_ctx, ..
} = self;
error!(
"Job<id='{}', name='{}'> failed with a critical error: {e:#?};",
report.id, report.name
);
report.status = Status::Failed;
report.critical_error = Some(e.to_string());
report.completed_at = Some(Utc::now());
report.update(job_ctx.db()).await?;
self.command_children(Command::Cancel).await
}
pub async fn shutdown_pause_job(&mut self) -> Result<(), JobSystemError> {
let Self {
report, job_ctx, ..
} = self;
info!(
"Job<id='{}', name='{}'> paused due to system shutdown, we will pause all children jobs",
report.id, report.name
);
report.status = Status::Paused;
report.update(job_ctx.db()).await?;
self.command_children(Command::Pause).await
}
pub async fn cancel_job(&mut self) -> Result<(), JobSystemError> {
let Self {
report, job_ctx, ..
} = self;
info!(
"Job<id='{}', name='{}'> canceled, we will cancel all children jobs",
report.id, report.name
);
report.status = Status::Canceled;
report.completed_at = Some(Utc::now());
report.update(job_ctx.db()).await?;
self.command_children(Command::Cancel).await
}
}
#[async_trait::async_trait]
pub trait DynJob<Ctx: JobContext>: Send + Sync + 'static {
fn id(&self) -> JobId;
fn job_name(&self) -> JobName;
fn hash(&self) -> u64;
fn report_mut(&mut self) -> &mut Report;
fn set_next_jobs(&mut self, next_jobs: VecDeque<Box<dyn DynJob<Ctx>>>);
fn next_jobs(&self) -> &VecDeque<Box<dyn DynJob<Ctx>>>;
async fn serialize(self: Box<Self>) -> Result<Option<Vec<u8>>, rmp_serde::encode::Error>;
fn dispatch(
self: Box<Self>,
base_dispatcher: BaseTaskDispatcher<Error>,
job_ctx: Ctx,
done_tx: chan::Sender<(JobId, Result<ReturnStatus, Error>)>,
) -> JobHandle<Ctx>;
fn resume(
self: Box<Self>,
base_dispatcher: BaseTaskDispatcher<Error>,
job_ctx: Ctx,
serialized_tasks: Option<SerializedTasks>,
done_tx: chan::Sender<(JobId, Result<ReturnStatus, Error>)>,
) -> JobHandle<Ctx>;
}
#[async_trait::async_trait]
impl<J, Ctx> DynJob<Ctx> for JobHolder<J, Ctx>
where
J: Job + SerializableJob,
Ctx: JobContext,
{
fn id(&self) -> JobId {
self.id
}
fn job_name(&self) -> JobName {
J::NAME
}
fn hash(&self) -> u64 {
let mut hasher = DefaultHasher::new();
J::NAME.hash(&mut hasher);
self.job.hash(&mut hasher);
hasher.finish()
}
fn report_mut(&mut self) -> &mut Report {
&mut self.report
}
fn set_next_jobs(&mut self, next_jobs: VecDeque<Box<dyn DynJob<Ctx>>>) {
self.next_jobs = next_jobs;
}
fn next_jobs(&self) -> &VecDeque<Box<dyn DynJob<Ctx>>> {
&self.next_jobs
}
async fn serialize(self: Box<Self>) -> Result<Option<Vec<u8>>, rmp_serde::encode::Error> {
self.job.serialize().await
}
fn dispatch(
self: Box<Self>,
base_dispatcher: BaseTaskDispatcher<Error>,
job_ctx: Ctx,
done_tx: chan::Sender<(JobId, Result<ReturnStatus, Error>)>,
) -> JobHandle<Ctx> {
let (commands_tx, commands_rx) = chan::bounded(8);
spawn(to_spawn_job(
self.id,
self.job,
job_ctx.clone(),
None,
base_dispatcher,
commands_rx,
done_tx,
));
JobHandle {
next_jobs: self.next_jobs,
job_ctx,
report: self.report,
commands_tx,
}
}
fn resume(
self: Box<Self>,
base_dispatcher: BaseTaskDispatcher<Error>,
job_ctx: Ctx,
serialized_tasks: Option<SerializedTasks>,
done_tx: chan::Sender<(JobId, Result<ReturnStatus, Error>)>,
) -> JobHandle<Ctx> {
let (commands_tx, commands_rx) = chan::bounded(8);
spawn(to_spawn_job(
self.id,
self.job,
job_ctx.clone(),
serialized_tasks,
base_dispatcher,
commands_rx,
done_tx,
));
JobHandle {
next_jobs: self.next_jobs,
job_ctx,
report: self.report,
commands_tx,
}
}
}
async fn to_spawn_job<Ctx: JobContext>(
id: JobId,
mut job: impl Job,
job_ctx: Ctx,
existing_tasks: Option<SerializedTasks>,
base_dispatcher: BaseTaskDispatcher<Error>,
commands_rx: chan::Receiver<Command>,
done_tx: chan::Sender<(JobId, Result<ReturnStatus, Error>)>,
) {
enum StreamMessage {
Commands(Command),
NewRemoteController(TaskRemoteController),
Done(Result<ReturnStatus, Error>),
}
let mut remote_controllers = vec![];
let (dispatcher, remote_controllers_rx) = JobTaskDispatcher::new(base_dispatcher);
if let Some(existing_tasks) = existing_tasks {
if let Err(e) = job
.resume_tasks(&dispatcher, &job_ctx, existing_tasks)
.await
{
done_tx
.send((id, Err(e)))
.await
.expect("jobs done tx closed on error at resume_tasks");
return;
}
}
let mut msgs_stream = pin!((
commands_rx.map(StreamMessage::Commands),
remote_controllers_rx.map(StreamMessage::NewRemoteController),
stream::once(job.run(dispatcher, job_ctx)).map(StreamMessage::Done),
)
.merge());
while let Some(msg) = msgs_stream.next().await {
match msg {
StreamMessage::NewRemoteController(remote_controller) => {
remote_controllers.push(remote_controller);
}
StreamMessage::Commands(command) => {
remote_controllers.retain(|controller| !controller.is_done());
match command {
Command::Pause => {
remote_controllers
.iter()
.map(TaskRemoteController::pause)
.collect::<Vec<_>>()
.join()
.await
.into_iter()
.for_each(|res| {
if let Err(e) = res {
assert!(matches!(e, TaskSystemError::TaskNotFound(_)));
warn!("Tried to pause a task that was already completed");
}
});
}
Command::Resume => {
remote_controllers
.iter()
.map(TaskRemoteController::resume)
.collect::<Vec<_>>()
.join()
.await
.into_iter()
.for_each(|res| {
if let Err(e) = res {
assert!(matches!(e, TaskSystemError::TaskNotFound(_)));
warn!("Tried to pause a task that was already completed");
}
});
}
Command::Cancel => {
remote_controllers
.iter()
.map(TaskRemoteController::cancel)
.collect::<Vec<_>>()
.join()
.await;
return done_tx
.send((id, Ok(ReturnStatus::Canceled)))
.await
.expect("jobs done tx closed");
}
}
}
StreamMessage::Done(res) => {
#[cfg(debug_assertions)]
{
// Just a sanity check to make sure we don't have any pending tasks left
remote_controllers.retain(|controller| !controller.is_done());
assert!(remote_controllers.is_empty());
// Using #[cfg(debug_assertions)] to don't pay this retain cost in release builds
}
return done_tx.send((id, res)).await.expect("jobs done tx closed");
}
}
}
}
#[derive(Debug, Clone)]
pub struct JobTaskDispatcher {
dispatcher: BaseTaskDispatcher<Error>,
remote_controllers_tx: chan::Sender<TaskRemoteController>,
}
impl TaskDispatcher<Error> for JobTaskDispatcher {
async fn dispatch_boxed(&self, boxed_task: Box<dyn Task<Error>>) -> TaskHandle<Error> {
let handle = self.dispatcher.dispatch_boxed(boxed_task).await;
self.remote_controllers_tx
.send(handle.remote_controller())
.await
.expect("remote controllers tx closed");
handle
}
async fn dispatch_many_boxed(
&self,
boxed_tasks: impl IntoIterator<Item = Box<dyn Task<Error>>> + Send,
) -> Vec<TaskHandle<Error>> {
let handles = self.dispatcher.dispatch_many_boxed(boxed_tasks).await;
for handle in &handles {
self.remote_controllers_tx
.send(handle.remote_controller())
.await
.expect("remote controllers tx closed");
}
handles
.iter()
.map(|handle| self.remote_controllers_tx.send(handle.remote_controller()))
.collect::<Vec<_>>()
.try_join()
.await
.expect("remote controllers tx closed");
handles
}
}
impl JobTaskDispatcher {
fn new(dispatcher: BaseTaskDispatcher<Error>) -> (Self, chan::Receiver<TaskRemoteController>) {
let (remote_controllers_tx, remote_controllers_rx) = chan::unbounded();
(
Self {
dispatcher,
remote_controllers_tx,
},
remote_controllers_rx,
)
}
}

View file

@ -0,0 +1,313 @@
use crate::Error;
use sd_prisma::prisma::location;
use sd_task_system::BaseTaskDispatcher;
use sd_utils::error::FileIOError;
use std::{cell::RefCell, collections::hash_map::HashMap, path::Path, sync::Arc};
use async_channel as chan;
use futures::Stream;
use futures_concurrency::future::{Join, TryJoin};
use tokio::{fs, spawn, sync::oneshot, task::JoinHandle};
use tracing::{error, info, trace, warn};
use uuid::Uuid;
mod error;
pub mod job;
pub mod report;
mod runner;
mod store;
pub mod utils;
use error::JobSystemError;
use job::{IntoJob, Job, JobContext, JobName, JobOutput};
use runner::{run, JobSystemRunner, RunnerMessage};
use store::{load_jobs, StoredJobEntry};
pub use store::{SerializableJob, SerializedTasks};
const PENDING_JOBS_FILE: &str = "pending_jobs.bin";
pub type JobId = Uuid;
#[derive(Debug, Clone, Copy)]
pub enum Command {
Pause,
Resume,
Cancel,
}
pub struct JobSystem<Ctx: JobContext> {
msgs_tx: chan::Sender<RunnerMessage<Ctx>>,
job_outputs_rx: chan::Receiver<(JobId, Result<JobOutput, JobSystemError>)>,
runner_handle: RefCell<Option<JoinHandle<()>>>,
}
impl<Ctx: JobContext> JobSystem<Ctx> {
pub async fn new(
base_dispatcher: BaseTaskDispatcher<Error>,
data_directory: impl AsRef<Path> + Send,
previously_existing_contexts: &HashMap<Uuid, Ctx>,
) -> Result<Self, JobSystemError> {
let (job_outputs_tx, job_outputs_rx) = chan::unbounded();
let (job_return_status_tx, job_return_status_rx) = chan::bounded(16);
let (msgs_tx, msgs_rx) = chan::bounded(8);
let store_jobs_file = Arc::new(data_directory.as_ref().join(PENDING_JOBS_FILE));
let runner_handle = RefCell::new(Some(spawn({
let store_jobs_file = Arc::clone(&store_jobs_file);
async move {
trace!("Job System Runner starting...");
while let Err(e) = spawn({
let store_jobs_file = Arc::clone(&store_jobs_file);
let base_dispatcher = base_dispatcher.clone();
let job_return_status_tx = job_return_status_tx.clone();
let job_return_status_rx = job_return_status_rx.clone();
let job_outputs_tx = job_outputs_tx.clone();
let msgs_rx = msgs_rx.clone();
async move {
run(
JobSystemRunner::new(
base_dispatcher,
job_return_status_tx,
job_outputs_tx,
),
store_jobs_file.as_ref(),
msgs_rx,
job_return_status_rx,
)
.await;
}
})
.await
{
if e.is_panic() {
error!("Job system panicked: {e:#?}");
} else {
trace!("JobSystemRunner received shutdown signal and will exit...");
break;
}
trace!("Restarting JobSystemRunner processing task...");
}
info!("JobSystemRunner gracefully shutdown");
}
})));
load_stored_job_entries(
store_jobs_file.as_ref(),
previously_existing_contexts,
&msgs_tx,
)
.await?;
Ok(Self {
msgs_tx,
job_outputs_rx,
runner_handle,
})
}
/// Checks if *any* of the desired jobs is running for the desired location
/// # Panics
/// Panics only happen if internal channels are unexpectedly closed
pub async fn check_running_jobs(
&self,
job_names: Vec<JobName>,
location_id: location::id::Type,
) -> bool {
let (ack_tx, ack_rx) = oneshot::channel();
self.msgs_tx
.send(RunnerMessage::CheckIfJobAreRunning {
job_names,
location_id,
ack_tx,
})
.await
.expect("runner msgs channel unexpectedly closed on check running job request");
ack_rx
.await
.expect("ack channel closed before receiving check running job response")
}
/// Shutdown the job system
/// # Panics
/// Panics only happen if internal channels are unexpectedly closed
pub async fn shutdown(&self) {
if let Some(handle) = self
.runner_handle
.try_borrow_mut()
.ok()
.and_then(|mut maybe_handle| maybe_handle.take())
{
self.msgs_tx
.send(RunnerMessage::Shutdown)
.await
.expect("runner msgs channel unexpectedly closed on shutdown request");
if let Err(e) = handle.await {
if e.is_panic() {
error!("JobSystem panicked: {e:#?}");
}
}
info!("JobSystem gracefully shutdown");
} else {
warn!("JobSystem already shutdown");
}
}
/// Dispatch a new job to the system
/// # Panics
/// Panics only happen if internal channels are unexpectedly closed
pub async fn dispatch<J: Job + SerializableJob>(
&mut self,
job: impl IntoJob<J, Ctx> + Send,
location_id: location::id::Type,
job_ctx: Ctx,
) -> Result<JobId, JobSystemError> {
let dyn_job = job.into_job();
let id = dyn_job.id();
let (ack_tx, ack_rx) = oneshot::channel();
self.msgs_tx
.send(RunnerMessage::NewJob {
id,
location_id,
dyn_job,
job_ctx,
ack_tx,
})
.await
.expect("runner msgs channel unexpectedly closed on new job request");
ack_rx
.await
.expect("ack channel closed before receiving new job request")
.map(|()| id)
}
pub fn receive_job_outputs(
&self,
) -> impl Stream<Item = (JobId, Result<JobOutput, JobSystemError>)> {
self.job_outputs_rx.clone()
}
async fn send_command(&self, id: JobId, command: Command) -> Result<(), JobSystemError> {
let (ack_tx, ack_rx) = oneshot::channel();
self.msgs_tx
.send(RunnerMessage::Command {
id,
command,
ack_tx,
})
.await
.unwrap_or_else(|_| {
panic!("runner msgs channel unexpectedly closed on {command:?} request")
});
ack_rx
.await
.unwrap_or_else(|_| panic!("ack channel closed before receiving {command:?} response"))
}
pub async fn pause(&self, id: JobId) -> Result<(), JobSystemError> {
self.send_command(id, Command::Pause).await
}
pub async fn resume(&self, id: JobId) -> Result<(), JobSystemError> {
self.send_command(id, Command::Resume).await
}
pub async fn cancel(&self, id: JobId) -> Result<(), JobSystemError> {
self.send_command(id, Command::Cancel).await
}
}
/// SAFETY: Due to usage of refcell we lost `Sync` impl, but we only use it to have a shutdown method
/// receiving `&self` which is called once, and we also use `try_borrow_mut` so we never panic
unsafe impl<Ctx: JobContext> Sync for JobSystem<Ctx> {}
async fn load_stored_job_entries<Ctx: JobContext>(
store_jobs_file: impl AsRef<Path> + Send,
previously_existing_job_contexts: &HashMap<Uuid, Ctx>,
msgs_tx: &chan::Sender<RunnerMessage<Ctx>>,
) -> Result<(), JobSystemError> {
let store_jobs_file = store_jobs_file.as_ref();
let stores_jobs_by_db = rmp_serde::from_slice::<HashMap<Uuid, Vec<StoredJobEntry>>>(
&fs::read(store_jobs_file).await.map_err(|e| {
JobSystemError::StoredJobs(FileIOError::from((
store_jobs_file,
e,
"Failed to load jobs from disk",
)))
})?,
)?;
stores_jobs_by_db
.into_iter()
.filter_map(|(ctx_id, entries)| {
previously_existing_job_contexts.get(&ctx_id).map_or_else(
|| {
warn!("Found stored jobs for a database that doesn't exist anymore: <ctx_id='{ctx_id}'>");
None
},
|ctx| Some((entries, ctx.clone())),
)
})
.map(|(entries, ctx)| async move {
load_jobs(entries, &ctx)
.await
.map(|stored_jobs| (stored_jobs, ctx))
})
.collect::<Vec<_>>()
.join()
.await
.into_iter()
.filter_map(|res| {
res.map_err(|e| error!("Failed to load stored jobs: {e:#?}"))
.ok()
})
.flat_map(|(stored_jobs, job_ctx)| {
stored_jobs
.into_iter()
.map(move |(location_id, dyn_job, serialized_tasks)| {
let job_ctx = job_ctx.clone();
async move {
let (ack_tx, ack_rx) = oneshot::channel();
msgs_tx
.send(RunnerMessage::ResumeStoredJob {
id: dyn_job.id(),
location_id,
dyn_job,
job_ctx,
serialized_tasks,
ack_tx,
})
.await
.expect("runner msgs channel unexpectedly closed on stored job resume");
ack_rx.await.expect(
"ack channel closed before receiving stored job resume response",
)
}
})
})
.collect::<Vec<_>>()
.try_join()
.await?;
fs::remove_file(store_jobs_file).await.map_err(|e| {
JobSystemError::StoredJobs(FileIOError::from((
store_jobs_file,
e,
"Failed to clean stored jobs file",
)))
})
}

View file

@ -0,0 +1,359 @@
use sd_prisma::prisma::{job, PrismaClient};
use sd_utils::db::{maybe_missing, MissingFieldError};
use std::{collections::HashMap, fmt, str::FromStr};
use chrono::{DateTime, Utc};
use prisma_client_rust::QueryError;
use serde::{Deserialize, Serialize};
use specta::Type;
use strum::ParseError;
use tracing::error;
use super::{job::JobName, JobId};
#[derive(thiserror::Error, Debug)]
pub enum ReportError {
#[error("failed to create job report in database: {0}")]
Create(QueryError),
#[error("failed to update job report in database: {0}")]
Update(QueryError),
#[error("invalid job status integer: {0}")]
InvalidJobStatusInt(i32),
#[error("job not found in database: <id='{0}'>")]
MissingReport(JobId),
#[error("serialization error: {0}")]
Serialization(#[from] rmp_serde::encode::Error),
#[error("deserialization error: {0}")]
Deserialization(#[from] rmp_serde::decode::Error),
#[error(transparent)]
MissingField(#[from] MissingFieldError),
#[error("failed to parse job name from database: {0}")]
JobNameParse(#[from] ParseError),
}
impl From<ReportError> for rspc::Error {
fn from(e: ReportError) -> Self {
match e {
ReportError::Create(_)
| ReportError::Update(_)
| ReportError::InvalidJobStatusInt(_) => {
Self::with_cause(rspc::ErrorCode::BadRequest, e.to_string(), e)
}
ReportError::MissingReport(_) => {
Self::with_cause(rspc::ErrorCode::NotFound, e.to_string(), e)
}
ReportError::Serialization(_)
| ReportError::Deserialization(_)
| ReportError::MissingField(_)
| ReportError::JobNameParse(_) => {
Self::with_cause(rspc::ErrorCode::InternalServerError, e.to_string(), e)
}
}
}
}
#[derive(Debug, Serialize, Deserialize, Type, Clone)]
pub enum ReportMetadata {
Input(ReportInputMetadata),
Output(ReportOutputMetadata),
}
#[derive(Debug, Serialize, Deserialize, Type, Clone)]
pub enum ReportInputMetadata {
Placeholder,
// TODO: Add more types
}
#[derive(Debug, Serialize, Deserialize, Type, Clone)]
pub enum ReportOutputMetadata {
Metrics(HashMap<String, serde_json::Value>),
// TODO: Add more types
}
#[derive(Debug, Serialize, Type, Clone)]
pub struct Report {
pub id: JobId,
pub name: JobName,
pub action: Option<String>,
pub metadata: Vec<ReportMetadata>,
pub critical_error: Option<String>,
pub non_critical_errors: Vec<String>,
pub created_at: Option<DateTime<Utc>>,
pub started_at: Option<DateTime<Utc>>,
pub completed_at: Option<DateTime<Utc>>,
pub parent_id: Option<JobId>,
pub status: Status,
pub task_count: i32,
pub completed_task_count: i32,
pub phase: String,
pub message: String,
pub estimated_completion: DateTime<Utc>,
}
impl fmt::Display for Report {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(
f,
"Job <name='{}', uuid='{}'> {:#?}",
self.name, self.id, self.status
)
}
}
// convert database struct into a resource struct
impl TryFrom<job::Data> for Report {
type Error = ReportError;
fn try_from(data: job::Data) -> Result<Self, Self::Error> {
Ok(Self {
id: JobId::from_slice(&data.id).expect("corrupted database"),
name: JobName::from_str(&maybe_missing(data.name, "job.name")?)?,
action: data.action,
metadata: data
.metadata
.map(|m| {
rmp_serde::from_slice(&m).unwrap_or_else(|e| {
error!("Failed to deserialize job metadata: {e:#?}");
vec![]
})
})
.unwrap_or_default(),
critical_error: data.critical_error,
non_critical_errors: data.non_critical_errors.map_or_else(
Default::default,
|non_critical_errors| {
serde_json::from_slice(&non_critical_errors).unwrap_or_else(|e| {
error!("Failed to deserialize job non-critical errors: {e:#?}");
vec![]
})
},
),
created_at: data.date_created.map(DateTime::into),
started_at: data.date_started.map(DateTime::into),
completed_at: data.date_completed.map(DateTime::into),
parent_id: data
.parent_id
.map(|id| JobId::from_slice(&id).expect("corrupted database")),
status: Status::try_from(maybe_missing(data.status, "job.status")?)
.expect("corrupted database"),
task_count: data.task_count.unwrap_or(0),
completed_task_count: data.completed_task_count.unwrap_or(0),
phase: String::new(),
message: String::new(),
estimated_completion: data
.date_estimated_completion
.map_or_else(Utc::now, DateTime::into),
})
}
}
impl Report {
#[must_use]
pub fn new(uuid: JobId, name: JobName) -> Self {
Self {
id: uuid,
name,
action: None,
created_at: None,
started_at: None,
completed_at: None,
status: Status::Queued,
critical_error: None,
non_critical_errors: vec![],
task_count: 0,
metadata: vec![],
parent_id: None,
completed_task_count: 0,
phase: String::new(),
message: String::new(),
estimated_completion: Utc::now(),
}
}
#[must_use]
pub fn get_action_name_and_group_key(&self) -> (String, Option<String>) {
// actions are formatted like "added_location" or "added_location-1"
let Some(action_name) = self
.action
.as_ref()
.and_then(|action| action.split('-').next().map(str::to_string))
else {
return (self.id.to_string(), None);
};
// create a unique group_key, EG: "added_location-<location_id>"
let group_key = self.parent_id.map_or_else(
|| format!("{action_name}-{}", self.id),
|parent_id| format!("{action_name}-{parent_id}"),
);
(action_name, Some(group_key))
}
pub async fn create(&mut self, db: &PrismaClient) -> Result<(), ReportError> {
let now = Utc::now();
db.job()
.create(
self.id.as_bytes().to_vec(),
sd_utils::chain_optional_iter(
[
job::name::set(Some(self.name.to_string())),
job::action::set(self.action.clone()),
job::date_created::set(Some(now.into())),
job::metadata::set(Some(rmp_serde::to_vec(&self.metadata)?)),
job::status::set(Some(self.status as i32)),
job::date_started::set(self.started_at.map(Into::into)),
job::task_count::set(Some(1)),
job::completed_task_count::set(Some(0)),
],
[self
.parent_id
.map(|id| job::parent::connect(job::id::equals(id.as_bytes().to_vec())))],
),
)
.exec()
.await
.map_err(ReportError::Create)?;
// Only setting created_at after we successfully created the job in DB
self.created_at = Some(now);
Ok(())
}
pub async fn update(&mut self, db: &PrismaClient) -> Result<(), ReportError> {
db.job()
.update(
job::id::equals(self.id.as_bytes().to_vec()),
vec![
job::status::set(Some(self.status as i32)),
job::critical_error::set(self.critical_error.clone()),
job::non_critical_errors::set(Some(rmp_serde::to_vec(
&self.non_critical_errors,
)?)),
job::metadata::set(Some(rmp_serde::to_vec(&self.metadata)?)),
job::task_count::set(Some(self.task_count)),
job::completed_task_count::set(Some(self.completed_task_count)),
job::date_started::set(self.started_at.map(Into::into)),
job::date_completed::set(self.completed_at.map(Into::into)),
],
)
.exec()
.await
.map_err(ReportError::Update)?;
Ok(())
}
}
#[repr(i32)]
#[derive(Debug, Clone, Copy, Serialize, Deserialize, Type, Eq, PartialEq)]
pub enum Status {
Queued = 0,
Running = 1,
Completed = 2,
Canceled = 3,
Failed = 4,
Paused = 5,
CompletedWithErrors = 6,
}
impl Status {
#[must_use]
pub const fn is_finished(self) -> bool {
matches!(
self,
Self::Completed
| Self::Canceled | Self::Paused
| Self::Failed | Self::CompletedWithErrors
)
}
}
impl TryFrom<i32> for Status {
type Error = ReportError;
fn try_from(value: i32) -> Result<Self, Self::Error> {
let s = match value {
0 => Self::Queued,
1 => Self::Running,
2 => Self::Completed,
3 => Self::Canceled,
4 => Self::Failed,
5 => Self::Paused,
6 => Self::CompletedWithErrors,
_ => return Err(Self::Error::InvalidJobStatusInt(value)),
};
Ok(s)
}
}
pub struct ReportBuilder {
pub id: JobId,
pub name: JobName,
pub action: Option<String>,
pub metadata: Vec<ReportMetadata>,
pub parent_id: Option<JobId>,
}
impl ReportBuilder {
#[must_use]
pub fn build(self) -> Report {
Report {
id: self.id,
name: self.name,
action: self.action,
created_at: None,
started_at: None,
completed_at: None,
status: Status::Queued,
critical_error: None,
task_count: 0,
non_critical_errors: vec![],
metadata: self.metadata,
parent_id: self.parent_id,
completed_task_count: 0,
phase: String::new(),
message: String::new(),
estimated_completion: Utc::now(),
}
}
#[must_use]
pub fn new(id: JobId, name: JobName) -> Self {
Self {
id,
name,
action: None,
metadata: vec![],
parent_id: None,
}
}
#[must_use]
pub fn with_action(mut self, action: impl Into<String>) -> Self {
self.action = Some(action.into());
self
}
#[must_use]
pub fn with_metadata(mut self, metadata: ReportInputMetadata) -> Self {
self.metadata.push(ReportMetadata::Input(metadata));
self
}
#[must_use]
pub const fn with_parent_id(mut self, parent_id: JobId) -> Self {
self.parent_id = Some(parent_id);
self
}
}

View file

@ -0,0 +1,535 @@
use crate::Error;
use sd_prisma::prisma::location;
use sd_task_system::BaseTaskDispatcher;
use sd_utils::error::FileIOError;
use std::{
collections::{hash_map::Entry, HashMap, HashSet},
mem,
path::Path,
pin::pin,
time::Duration,
};
use async_channel as chan;
use chrono::Utc;
use futures::StreamExt;
use futures_concurrency::{future::TryJoin, stream::Merge};
use tokio::{
fs,
sync::oneshot,
time::{interval_at, Instant},
};
use tokio_stream::wrappers::IntervalStream;
use tracing::{debug, error, info, warn};
use uuid::Uuid;
use super::{
job::{DynJob, JobContext, JobHandle, JobName, JobOutput, ReturnStatus},
report,
store::{StoredJob, StoredJobEntry},
Command, JobId, JobSystemError, SerializedTasks,
};
const JOBS_INITIAL_CAPACITY: usize = 32;
const FIVE_MINUTES: Duration = Duration::from_secs(5 * 60);
pub(super) enum RunnerMessage<Ctx: JobContext> {
NewJob {
id: JobId,
location_id: location::id::Type,
dyn_job: Box<dyn DynJob<Ctx>>,
job_ctx: Ctx,
ack_tx: oneshot::Sender<Result<(), JobSystemError>>,
},
ResumeStoredJob {
id: JobId,
location_id: location::id::Type,
dyn_job: Box<dyn DynJob<Ctx>>,
job_ctx: Ctx,
serialized_tasks: Option<SerializedTasks>,
ack_tx: oneshot::Sender<Result<(), JobSystemError>>,
},
Command {
id: JobId,
command: Command,
ack_tx: oneshot::Sender<Result<(), JobSystemError>>,
},
CheckIfJobAreRunning {
job_names: Vec<JobName>,
location_id: location::id::Type,
ack_tx: oneshot::Sender<bool>,
},
Shutdown,
}
pub(super) struct JobSystemRunner<Ctx: JobContext> {
base_dispatcher: BaseTaskDispatcher<Error>,
handles: HashMap<JobId, JobHandle<Ctx>>,
job_hashes: HashMap<u64, JobId>,
job_hashes_by_id: HashMap<JobId, u64>,
running_jobs_by_job_id: HashMap<JobId, (JobName, location::id::Type)>,
running_jobs_set: HashSet<(JobName, location::id::Type)>,
jobs_to_store_by_ctx_id: HashMap<Uuid, Vec<StoredJobEntry>>,
job_return_status_tx: chan::Sender<(JobId, Result<ReturnStatus, Error>)>,
job_outputs_tx: chan::Sender<(JobId, Result<JobOutput, JobSystemError>)>,
}
impl<Ctx: JobContext> JobSystemRunner<Ctx> {
pub(super) fn new(
base_dispatcher: BaseTaskDispatcher<Error>,
job_return_status_tx: chan::Sender<(JobId, Result<ReturnStatus, Error>)>,
job_outputs_tx: chan::Sender<(JobId, Result<JobOutput, JobSystemError>)>,
) -> Self {
Self {
base_dispatcher,
handles: HashMap::with_capacity(JOBS_INITIAL_CAPACITY),
job_hashes: HashMap::with_capacity(JOBS_INITIAL_CAPACITY),
job_hashes_by_id: HashMap::with_capacity(JOBS_INITIAL_CAPACITY),
running_jobs_by_job_id: HashMap::with_capacity(JOBS_INITIAL_CAPACITY),
running_jobs_set: HashSet::with_capacity(JOBS_INITIAL_CAPACITY),
jobs_to_store_by_ctx_id: HashMap::new(),
job_return_status_tx,
job_outputs_tx,
}
}
async fn new_job(
&mut self,
id: JobId,
location_id: location::id::Type,
dyn_job: Box<dyn DynJob<Ctx>>,
job_ctx: Ctx,
maybe_existing_tasks: Option<SerializedTasks>,
) -> Result<(), JobSystemError> {
let Self {
base_dispatcher,
handles,
job_hashes,
job_hashes_by_id,
job_return_status_tx,
running_jobs_by_job_id,
running_jobs_set,
..
} = self;
let db = job_ctx.db();
let job_name = dyn_job.job_name();
let job_hash = dyn_job.hash();
if let Some(&already_running_id) = job_hashes.get(&job_hash) {
return Err(JobSystemError::AlreadyRunning {
new_id: id,
already_running_id,
job_name,
});
}
running_jobs_by_job_id.insert(id, (job_name, location_id));
running_jobs_set.insert((job_name, location_id));
job_hashes.insert(job_hash, id);
job_hashes_by_id.insert(id, job_hash);
let start_time = Utc::now();
let mut handle = if maybe_existing_tasks.is_some() {
dyn_job.resume(
base_dispatcher.clone(),
job_ctx.clone(),
maybe_existing_tasks,
job_return_status_tx.clone(),
)
} else {
dyn_job.dispatch(
base_dispatcher.clone(),
job_ctx.clone(),
job_return_status_tx.clone(),
)
};
handle.report.status = report::Status::Running;
if handle.report.started_at.is_none() {
handle.report.started_at = Some(start_time);
}
// If the report doesn't have a created_at date, it's a new report
if handle.report.created_at.is_none() {
handle.report.create(db).await?;
} else {
// Otherwise it can be a job being resumed or a children job that was already been created
handle.report.update(db).await?;
}
// Registering children jobs
handle
.next_jobs
.iter_mut()
.map(|dyn_job| dyn_job.report_mut())
.map(|next_job_report| async {
if next_job_report.created_at.is_none() {
next_job_report.create(job_ctx.db()).await
} else {
Ok(())
}
})
.collect::<Vec<_>>()
.try_join()
.await?;
handles.insert(id, handle);
Ok(())
}
async fn process_command(&mut self, id: JobId, command: Command) -> Result<(), JobSystemError> {
if let Some(handle) = self.handles.get_mut(&id) {
handle.send_command(command).await?;
Ok(())
} else {
Err(JobSystemError::NotFound(id))
}
}
fn is_empty(&self) -> bool {
self.handles.is_empty() && self.job_hashes.is_empty() && self.job_hashes_by_id.is_empty()
}
fn check_if_job_are_running(
&self,
job_names: Vec<JobName>,
location_id: location::id::Type,
) -> bool {
job_names
.into_iter()
.any(|job_name| self.running_jobs_set.contains(&(job_name, location_id)))
}
async fn process_return_status(&mut self, job_id: JobId, status: Result<ReturnStatus, Error>) {
let Self {
handles,
job_hashes,
job_hashes_by_id,
job_outputs_tx,
job_return_status_tx,
base_dispatcher,
jobs_to_store_by_ctx_id,
running_jobs_by_job_id,
running_jobs_set,
..
} = self;
let job_hash = job_hashes_by_id.remove(&job_id).expect("it must be here");
let (job_name, location_id) = running_jobs_by_job_id
.remove(&job_id)
.expect("a JobName and location_id must've been inserted in the map with the job id");
assert!(running_jobs_set.remove(&(job_name, location_id)));
assert!(job_hashes.remove(&job_hash).is_some());
let mut handle = handles.remove(&job_id).expect("it must be here");
let res = match status {
Ok(ReturnStatus::Completed(job_return)) => {
try_dispatch_next_job(
&mut handle,
base_dispatcher.clone(),
(job_hashes, job_hashes_by_id),
handles,
job_return_status_tx.clone(),
);
handle.complete_job(job_return).await
}
Ok(ReturnStatus::Shutdown(Ok(Some(serialized_job)))) => {
let name = handle.report.name;
let Ok(next_jobs) = handle
.next_jobs
.into_iter()
.map(|next_job| async move {
let next_id = next_job.id();
let next_name = next_job.job_name();
next_job
.serialize()
.await
.map(|maybe_serialized_job| {
maybe_serialized_job.map(|serialized_job| StoredJob {
id: next_id,
name: next_name,
serialized_job,
})
})
.map_err(|e| {
error!(
"Failed to serialize next job: \
<parent_id='{job_id}', parent_name='{name}', \
next_id='{next_id}', next_name='{next_name}'>: {e:#?}"
);
})
})
.collect::<Vec<_>>()
.try_join()
.await
else {
return;
};
jobs_to_store_by_ctx_id
.entry(handle.job_ctx.id())
.or_default()
.push(StoredJobEntry {
location_id,
root_job: StoredJob {
id: job_id,
name,
serialized_job,
},
next_jobs: next_jobs.into_iter().flatten().collect(),
});
return;
}
Ok(ReturnStatus::Shutdown(Ok(None))) => {
debug!(
"Job was shutdown but didn't returned any serialized data, \
probably it isn't resumable job: <id='{job_id}'>"
);
return;
}
Ok(ReturnStatus::Shutdown(Err(e))) => {
error!("Failed to serialize job: {e:#?}");
return;
}
Ok(ReturnStatus::Canceled) => handle
.cancel_job()
.await
.and_then(|()| Err(JobSystemError::Canceled(job_id))),
Err(e) => handle.failed_job(&e).await.and_then(|()| Err(e.into())),
};
job_outputs_tx
.send((job_id, res))
.await
.expect("job outputs channel unexpectedly closed on job completion");
}
fn clean_memory(&mut self) {
if self.handles.capacity() > JOBS_INITIAL_CAPACITY
&& self.handles.len() < JOBS_INITIAL_CAPACITY
{
self.handles.shrink_to(JOBS_INITIAL_CAPACITY);
}
if self.job_hashes.capacity() > JOBS_INITIAL_CAPACITY
&& self.job_hashes.len() < JOBS_INITIAL_CAPACITY
{
self.job_hashes.shrink_to(JOBS_INITIAL_CAPACITY);
}
if self.job_hashes_by_id.capacity() > JOBS_INITIAL_CAPACITY
&& self.job_hashes_by_id.len() < JOBS_INITIAL_CAPACITY
{
self.job_hashes_by_id.shrink_to(JOBS_INITIAL_CAPACITY);
}
if self.running_jobs_by_job_id.capacity() > JOBS_INITIAL_CAPACITY
&& self.running_jobs_by_job_id.len() < JOBS_INITIAL_CAPACITY
{
self.running_jobs_by_job_id.shrink_to(JOBS_INITIAL_CAPACITY);
}
if self.running_jobs_set.capacity() > JOBS_INITIAL_CAPACITY
&& self.running_jobs_set.len() < JOBS_INITIAL_CAPACITY
{
self.running_jobs_set.shrink_to(JOBS_INITIAL_CAPACITY);
}
}
async fn save_jobs(
self,
store_jobs_file: impl AsRef<Path> + Send,
) -> Result<(), JobSystemError> {
let store_jobs_file = store_jobs_file.as_ref();
let Self {
handles,
job_hashes,
job_hashes_by_id,
jobs_to_store_by_ctx_id,
..
} = self;
assert!(
handles.is_empty() && job_hashes.is_empty() && job_hashes_by_id.is_empty(),
"All jobs must be completed before saving"
);
if jobs_to_store_by_ctx_id.is_empty() {
info!("No jobs to store in disk for job system shutdown!");
return Ok(());
}
fs::write(
store_jobs_file,
rmp_serde::to_vec_named(&jobs_to_store_by_ctx_id)?,
)
.await
.map_err(|e| JobSystemError::StoredJobs(FileIOError::from((store_jobs_file, e))))
}
}
fn try_dispatch_next_job<Ctx: JobContext>(
handle: &mut JobHandle<Ctx>,
base_dispatcher: BaseTaskDispatcher<Error>,
(job_hashes, job_hashes_by_id): (&mut HashMap<u64, JobId>, &mut HashMap<JobId, u64>),
handles: &mut HashMap<JobId, JobHandle<Ctx>>,
job_return_status_tx: chan::Sender<(JobId, Result<ReturnStatus, Error>)>,
) {
if let Some(next) = handle.next_jobs.pop_front() {
let next_id = next.id();
let next_hash = next.hash();
if let Entry::Vacant(e) = job_hashes.entry(next_hash) {
e.insert(next_id);
job_hashes_by_id.insert(next_id, next_hash);
let mut next_handle = next.dispatch(
base_dispatcher,
handle.job_ctx.clone(),
job_return_status_tx,
);
assert!(
next_handle.next_jobs.is_empty(),
"Only the root job will have next jobs, the rest will be empty and \
we will swap with remaining ones from the previous job"
);
next_handle.next_jobs = mem::take(&mut handle.next_jobs);
handles.insert(next_id, next_handle);
} else {
warn!("Unexpectedly found a job with the same hash as the next job: <id='{next_id}', name='{}'>", next.job_name());
}
}
}
pub(super) async fn run<Ctx: JobContext>(
mut runner: JobSystemRunner<Ctx>,
store_jobs_file: impl AsRef<Path> + Send,
msgs_rx: chan::Receiver<RunnerMessage<Ctx>>,
job_return_status_rx: chan::Receiver<(JobId, Result<ReturnStatus, Error>)>,
) {
enum StreamMessage<Ctx: JobContext> {
ReturnStatus((JobId, Result<ReturnStatus, Error>)),
RunnerMessage(RunnerMessage<Ctx>),
CleanMemoryTick,
}
let memory_cleanup_interval = interval_at(Instant::now() + FIVE_MINUTES, FIVE_MINUTES);
let job_return_status_rx_to_shutdown = job_return_status_rx.clone();
let mut msg_stream = pin!((
msgs_rx.map(StreamMessage::RunnerMessage),
job_return_status_rx.map(StreamMessage::ReturnStatus),
IntervalStream::new(memory_cleanup_interval).map(|_| StreamMessage::CleanMemoryTick),
)
.merge());
while let Some(msg) = msg_stream.next().await {
match msg {
// Job return status messages
StreamMessage::ReturnStatus((job_id, status)) => {
runner.process_return_status(job_id, status).await;
}
// Runner messages
StreamMessage::RunnerMessage(RunnerMessage::NewJob {
id,
location_id,
dyn_job,
job_ctx,
ack_tx,
}) => {
ack_tx
.send(
runner
.new_job(id, location_id, dyn_job, job_ctx, None)
.await,
)
.expect("ack channel closed before sending new job response");
}
StreamMessage::RunnerMessage(RunnerMessage::ResumeStoredJob {
id,
location_id,
dyn_job,
job_ctx,
serialized_tasks,
ack_tx,
}) => {
ack_tx
.send(
runner
.new_job(id, location_id, dyn_job, job_ctx, serialized_tasks)
.await,
)
.expect("ack channel closed before sending resume job response");
}
StreamMessage::RunnerMessage(RunnerMessage::Command {
id,
command,
ack_tx,
}) => {
ack_tx
.send(runner.process_command(id, command).await)
.unwrap_or_else(|_| {
panic!("ack channel closed before sending {command:?} response")
});
}
StreamMessage::RunnerMessage(RunnerMessage::Shutdown) => {
// Consuming all pending return status messages
loop {
while let Ok((job_id, status)) = job_return_status_rx_to_shutdown.try_recv() {
runner.process_return_status(job_id, status).await;
}
if runner.is_empty() {
break;
}
debug!("Waiting for all jobs to complete before shutting down...");
}
// Now the runner can shutdown
if let Err(e) = runner.save_jobs(store_jobs_file).await {
error!("Failed to save jobs before shutting down: {e:#?}");
}
return;
}
StreamMessage::RunnerMessage(RunnerMessage::CheckIfJobAreRunning {
job_names,
location_id,
ack_tx,
}) => {
ack_tx
.send(runner.check_if_job_are_running(job_names, location_id))
.expect("ack channel closed before sending resume job response");
}
// Memory cleanup tick
StreamMessage::CleanMemoryTick => {
runner.clean_memory();
}
}
}
}

View file

@ -0,0 +1,219 @@
use crate::indexer::IndexerJob;
use sd_prisma::prisma::{job, location};
use sd_utils::uuid_to_bytes;
use std::{
collections::{HashMap, VecDeque},
future::Future,
iter,
marker::PhantomData,
};
use futures_concurrency::future::TryJoin;
use serde::{Deserialize, Serialize};
use super::{
job::{DynJob, Job, JobContext, JobHolder, JobName},
report::{Report, ReportError},
JobId, JobSystemError,
};
#[derive(Debug, Serialize, Deserialize)]
pub struct SerializedTasks(pub Vec<u8>);
pub trait SerializableJob: 'static
where
Self: Sized,
{
fn serialize(
self,
) -> impl Future<Output = Result<Option<Vec<u8>>, rmp_serde::encode::Error>> + Send {
async move { Ok(None) }
}
#[allow(unused_variables)]
fn deserialize(
serialized_job: &[u8],
ctx: &impl JobContext,
) -> impl Future<
Output = Result<Option<(Self, Option<SerializedTasks>)>, rmp_serde::decode::Error>,
> + Send {
async move { Ok(None) }
}
}
#[derive(Debug, Serialize, Deserialize)]
pub struct StoredJob {
pub(super) id: JobId,
pub(super) name: JobName,
pub(super) serialized_job: Vec<u8>,
}
#[derive(Debug, Serialize, Deserialize)]
pub struct StoredJobEntry {
pub(super) location_id: location::id::Type,
pub(super) root_job: StoredJob,
pub(super) next_jobs: Vec<StoredJob>,
}
pub async fn load_jobs<Ctx: JobContext>(
entries: Vec<StoredJobEntry>,
job_ctx: &Ctx,
) -> Result<
Vec<(
location::id::Type,
Box<dyn DynJob<Ctx>>,
Option<SerializedTasks>,
)>,
JobSystemError,
> {
let mut reports = job_ctx
.db()
.job()
.find_many(vec![job::id::in_vec(
entries
.iter()
.flat_map(
|StoredJobEntry {
root_job: StoredJob { id, .. },
next_jobs,
..
}| { iter::once(*id).chain(next_jobs.iter().map(|StoredJob { id, .. }| *id)) },
)
.map(uuid_to_bytes)
.collect::<Vec<_>>(),
)])
.exec()
.await
.map_err(JobSystemError::LoadReportsForResume)?
.into_iter()
.map(Report::try_from)
.map(|report_res| report_res.map(|report| (report.id, report)))
.collect::<Result<HashMap<_, _>, _>>()?;
entries
.into_iter()
.map(
|StoredJobEntry {
location_id,
root_job,
next_jobs,
}| {
let report = reports
.remove(&root_job.id)
.ok_or(ReportError::MissingReport(root_job.id))?;
Ok(async move {
load_job(root_job, report, job_ctx)
.await
.map(|maybe_loaded_job| {
maybe_loaded_job
.map(|(dyn_job, tasks)| (location_id, dyn_job, tasks, next_jobs))
})
})
},
)
.collect::<Result<Vec<_>, JobSystemError>>()?
.try_join()
.await?
.into_iter()
.flatten()
.map(|(location_id, mut dyn_job, tasks, next_jobs)| {
let next_jobs_and_reports = next_jobs
.into_iter()
.map(|next_job| {
let next_job_id = next_job.id;
reports
.remove(&next_job.id)
.map(|report| (next_job, report))
.ok_or(ReportError::MissingReport(next_job_id))
})
.collect::<Result<Vec<_>, _>>()?;
Ok(async move {
next_jobs_and_reports
.into_iter()
.map(|(next_job, report)| async move {
load_job(next_job, report, job_ctx)
.await
.map(|maybe_loaded_next_job| {
maybe_loaded_next_job.map(|(next_dyn_job, next_tasks)| {
assert!(
next_tasks.is_none(),
"Next jobs must not have tasks as they haven't run yet"
);
assert!(
next_dyn_job.next_jobs().is_empty(),
"Next jobs must not have next jobs"
);
next_dyn_job
})
})
})
.collect::<Vec<_>>()
.try_join()
.await
.map(|maybe_next_dyn_jobs| {
dyn_job.set_next_jobs(maybe_next_dyn_jobs.into_iter().flatten().collect());
(location_id, dyn_job, tasks)
})
})
})
.collect::<Result<Vec<_>, JobSystemError>>()?
.try_join()
.await
}
macro_rules! match_deserialize_job {
($stored_job:ident, $report:ident, $job_ctx:ident, $ctx_type:ty, [$($job_type:ty),+ $(,)?]) => {{
let StoredJob {
id,
name,
serialized_job,
} = $stored_job;
match name {
$(<$job_type as Job>::NAME => <$job_type as SerializableJob>::deserialize(
&serialized_job,
$job_ctx,
).await
.map(|maybe_job| maybe_job.map(|(job, tasks)| -> (
Box<dyn DynJob<$ctx_type>>,
Option<SerializedTasks>
) {
(
Box::new(JobHolder {
id,
job,
report: $report,
next_jobs: VecDeque::new(),
_ctx: PhantomData,
}),
tasks,
)
}
))
.map_err(Into::into),)+
}
}};
}
async fn load_job<Ctx: JobContext>(
stored_job: StoredJob,
report: Report,
job_ctx: &Ctx,
) -> Result<Option<(Box<dyn DynJob<Ctx>>, Option<SerializedTasks>)>, JobSystemError> {
match_deserialize_job!(
stored_job,
report,
job_ctx,
Ctx,
[
IndexerJob,
// TODO: Add more jobs here
// e.g.: FileIdentifierJob, MediaProcessorJob, etc.,
]
)
}

View file

@ -0,0 +1,16 @@
use crate::Error;
use sd_task_system::TaskHandle;
use futures_concurrency::future::Join;
pub async fn cancel_pending_tasks(
pending_tasks: impl IntoIterator<Item = &TaskHandle<Error>> + Send,
) {
pending_tasks
.into_iter()
.map(TaskHandle::cancel)
.collect::<Vec<_>>()
.join()
.await;
}

View file

@ -0,0 +1,71 @@
#![warn(
clippy::all,
clippy::pedantic,
clippy::correctness,
clippy::perf,
clippy::style,
clippy::suspicious,
clippy::complexity,
clippy::nursery,
clippy::unwrap_used,
unused_qualifications,
rust_2018_idioms,
trivial_casts,
trivial_numeric_casts,
unused_allocation,
clippy::unnecessary_cast,
clippy::cast_lossless,
clippy::cast_possible_truncation,
clippy::cast_possible_wrap,
clippy::cast_precision_loss,
clippy::cast_sign_loss,
clippy::dbg_macro,
clippy::deprecated_cfg_attr,
clippy::separated_literal_suffix,
deprecated
)]
#![forbid(deprecated_in_future)]
#![allow(clippy::missing_errors_doc, clippy::module_name_repetitions)]
use sd_task_system::TaskSystemError;
use serde::{Deserialize, Serialize};
use specta::Type;
use thiserror::Error;
pub mod indexer;
pub mod job_system;
use indexer::{IndexerError, NonCriticalIndexerError};
pub use job_system::{
job::{IntoJob, JobBuilder, JobContext, JobName, JobOutput, JobOutputData, ProgressUpdate},
JobId, JobSystem,
};
#[derive(Error, Debug)]
pub enum Error {
#[error(transparent)]
Indexer(#[from] IndexerError),
#[error(transparent)]
TaskSystem(#[from] TaskSystemError),
}
impl From<Error> for rspc::Error {
fn from(e: Error) -> Self {
match e {
Error::Indexer(e) => e.into(),
Error::TaskSystem(e) => {
Self::with_cause(rspc::ErrorCode::InternalServerError, e.to_string(), e)
}
}
}
}
#[derive(thiserror::Error, Debug, Serialize, Deserialize, Type)]
pub enum NonCriticalJobError {
// TODO: Add variants as needed
#[error(transparent)]
Indexer(#[from] NonCriticalIndexerError),
}

View file

@ -0,0 +1,30 @@
[package]
name = "sd-core-indexer-rules"
version = "0.1.0"
authors = ["Ericson Soares <ericson@spacedrive.com>"]
license = { workspace = true }
repository = { workspace = true }
edition = { workspace = true }
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
# Spacedrive Sub-crates
sd-prisma = { path = "../../../crates/prisma" }
sd-utils = { path = "../../../crates/utils" }
chrono = { workspace = true }
futures-concurrency = { workspace = true }
globset = { workspace = true, features = ["serde1"] }
prisma-client-rust = { workspace = true }
rmp-serde = { workspace = true }
rspc = { workspace = true }
serde = { workspace = true, features = ["derive"] }
specta = { workspace = true }
thiserror = { workspace = true }
tokio = { workspace = true, features = ["fs"] }
tracing = { workspace = true }
uuid = { workspace = true, features = ["v4", "serde"] }
[dev-dependencies]
tempfile = { workspace = true }

View file

@ -1,30 +1,60 @@
use crate::library::Library;
#![warn(
clippy::all,
clippy::pedantic,
clippy::correctness,
clippy::perf,
clippy::style,
clippy::suspicious,
clippy::complexity,
clippy::nursery,
clippy::unwrap_used,
unused_qualifications,
rust_2018_idioms,
trivial_casts,
trivial_numeric_casts,
unused_allocation,
clippy::unnecessary_cast,
clippy::cast_lossless,
clippy::cast_possible_truncation,
clippy::cast_possible_wrap,
clippy::cast_precision_loss,
clippy::cast_sign_loss,
clippy::dbg_macro,
clippy::deprecated_cfg_attr,
clippy::separated_literal_suffix,
deprecated
)]
#![forbid(deprecated_in_future)]
#![allow(clippy::missing_errors_doc)]
use sd_prisma::prisma::indexer_rule;
use sd_prisma::prisma::{indexer_rule, PrismaClient};
use sd_utils::{
db::{maybe_missing, MissingFieldError},
error::{FileIOError, NonUtf8PathError},
};
use serde::{Deserialize, Serialize};
use std::{
collections::{HashMap, HashSet},
marker::PhantomData,
fs::Metadata,
path::Path,
sync::Arc,
};
use chrono::{DateTime, Utc};
use futures::future::try_join_all;
use futures_concurrency::future::TryJoin;
use globset::{Glob, GlobSet, GlobSetBuilder};
use rmp_serde::{decode, encode};
use rspc::ErrorCode;
use serde::{de, ser, Deserialize, Serialize};
use specta::Type;
use thiserror::Error;
use tokio::fs;
use tokio::{fs, sync::RwLock};
use tracing::debug;
use uuid::Uuid;
pub mod seed;
mod serde_impl;
#[derive(Error, Debug)]
pub enum IndexerRuleError {
@ -57,10 +87,10 @@ impl From<IndexerRuleError> for rspc::Error {
IndexerRuleError::InvalidRuleKindInt(_)
| IndexerRuleError::Glob(_)
| IndexerRuleError::NonUtf8Path(_) => {
rspc::Error::with_cause(ErrorCode::BadRequest, err.to_string(), err)
Self::with_cause(ErrorCode::BadRequest, err.to_string(), err)
}
_ => rspc::Error::with_cause(ErrorCode::InternalServerError, err.to_string(), err),
_ => Self::with_cause(ErrorCode::InternalServerError, err.to_string(), err),
}
}
}
@ -83,8 +113,10 @@ pub struct IndexerRuleCreateArgs {
impl IndexerRuleCreateArgs {
pub async fn create(
self,
library: &Library,
db: &PrismaClient,
) -> Result<Option<indexer_rule::Data>, IndexerRuleError> {
use indexer_rule::{date_created, date_modified, name, rules_per_kind};
debug!(
"{} a new indexer rule (name = {}, params = {:?})",
if self.dry_run {
@ -127,12 +159,8 @@ impl IndexerRuleCreateArgs {
let date_created = Utc::now();
use indexer_rule::*;
Ok(Some(
library
.db
.indexer_rule()
db.indexer_rule()
.create(
sd_utils::uuid_to_bytes(generate_pub_id()),
vec![
@ -159,6 +187,7 @@ pub enum RuleKind {
}
impl RuleKind {
#[must_use]
pub const fn variant_count() -> usize {
// TODO: Use https://doc.rust-lang.org/std/mem/fn.variant_count.html if it ever gets stabilized
4
@ -168,9 +197,10 @@ impl RuleKind {
/// `ParametersPerKind` is a mapping from `RuleKind` to the parameters required for each kind of rule.
/// In case of doubt about globs, consult <https://docs.rs/globset/latest/globset/#syntax>
///
/// We store directly globs in the database, serialized using rmp_serde.
/// We store directly globs in the database, serialized using [rmp_serde](https://docs.rs/rmp-serde).
///
/// In case of `ParametersPerKind::AcceptIfChildrenDirectoriesArePresent` or `ParametersPerKind::RejectIfChildrenDirectoriesArePresent`
/// In case of `ParametersPerKind::AcceptIfChildrenDirectoriesArePresent` or
/// `ParametersPerKind::RejectIfChildrenDirectoriesArePresent`
/// first we change the data structure to a vector, then we serialize it.
#[derive(Debug)]
pub enum RulePerKind {
@ -219,232 +249,67 @@ impl RulePerKind {
}
}
/// We're implementing `Serialize` by hand as `GlobSet`s aren't serializable, so we ignore them on
/// serialization
impl Serialize for RulePerKind {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: ser::Serializer,
{
match *self {
RulePerKind::AcceptFilesByGlob(ref globs, ref _glob_set) => serializer
.serialize_newtype_variant("ParametersPerKind", 0, "AcceptFilesByGlob", globs),
RulePerKind::RejectFilesByGlob(ref globs, ref _glob_set) => serializer
.serialize_newtype_variant("ParametersPerKind", 1, "RejectFilesByGlob", globs),
RulePerKind::AcceptIfChildrenDirectoriesArePresent(ref children) => serializer
.serialize_newtype_variant(
"ParametersPerKind",
2,
"AcceptIfChildrenDirectoriesArePresent",
children,
),
RulePerKind::RejectIfChildrenDirectoriesArePresent(ref children) => serializer
.serialize_newtype_variant(
"ParametersPerKind",
3,
"RejectIfChildrenDirectoriesArePresent",
children,
),
}
}
pub trait MetadataForIndexerRules: Send + Sync + 'static {
fn is_dir(&self) -> bool;
}
impl<'de> Deserialize<'de> for RulePerKind {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: de::Deserializer<'de>,
{
const VARIANTS: &[&str] = &[
"AcceptFilesByGlob",
"RejectFilesByGlob",
"AcceptIfChildrenDirectoriesArePresent",
"RejectIfChildrenDirectoriesArePresent",
];
enum Fields {
AcceptFilesByGlob,
RejectFilesByGlob,
AcceptIfChildrenDirectoriesArePresent,
RejectIfChildrenDirectoriesArePresent,
}
struct FieldsVisitor;
impl<'de> de::Visitor<'de> for FieldsVisitor {
type Value = Fields;
fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
formatter.write_str(
"`AcceptFilesByGlob` \
or `RejectFilesByGlob` \
or `AcceptIfChildrenDirectoriesArePresent` \
or `RejectIfChildrenDirectoriesArePresent`",
)
}
fn visit_u64<E>(self, value: u64) -> Result<Self::Value, E>
where
E: de::Error,
{
match value {
0 => Ok(Fields::AcceptFilesByGlob),
1 => Ok(Fields::RejectFilesByGlob),
2 => Ok(Fields::AcceptIfChildrenDirectoriesArePresent),
3 => Ok(Fields::RejectIfChildrenDirectoriesArePresent),
_ => Err(de::Error::invalid_value(
de::Unexpected::Unsigned(value),
&"variant index 0 <= i < 3",
)),
}
}
fn visit_str<E>(self, value: &str) -> Result<Self::Value, E>
where
E: de::Error,
{
match value {
"AcceptFilesByGlob" => Ok(Fields::AcceptFilesByGlob),
"RejectFilesByGlob" => Ok(Fields::RejectFilesByGlob),
"AcceptIfChildrenDirectoriesArePresent" => {
Ok(Fields::AcceptIfChildrenDirectoriesArePresent)
}
"RejectIfChildrenDirectoriesArePresent" => {
Ok(Fields::RejectIfChildrenDirectoriesArePresent)
}
_ => Err(de::Error::unknown_variant(value, VARIANTS)),
}
}
fn visit_bytes<E>(self, bytes: &[u8]) -> Result<Self::Value, E>
where
E: de::Error,
{
match bytes {
b"AcceptFilesByGlob" => Ok(Fields::AcceptFilesByGlob),
b"RejectFilesByGlob" => Ok(Fields::RejectFilesByGlob),
b"AcceptIfChildrenDirectoriesArePresent" => {
Ok(Fields::AcceptIfChildrenDirectoriesArePresent)
}
b"RejectIfChildrenDirectoriesArePresent" => {
Ok(Fields::RejectIfChildrenDirectoriesArePresent)
}
_ => Err(de::Error::unknown_variant(
&String::from_utf8_lossy(bytes),
VARIANTS,
)),
}
}
}
impl<'de> Deserialize<'de> for Fields {
#[inline]
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: de::Deserializer<'de>,
{
deserializer.deserialize_identifier(FieldsVisitor)
}
}
struct ParametersPerKindVisitor<'de> {
marker: PhantomData<RulePerKind>,
lifetime: PhantomData<&'de ()>,
}
impl<'de> de::Visitor<'de> for ParametersPerKindVisitor<'de> {
type Value = RulePerKind;
fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
formatter.write_str("enum ParametersPerKind")
}
fn visit_enum<PPK>(self, data: PPK) -> Result<Self::Value, PPK::Error>
where
PPK: de::EnumAccess<'de>,
{
use de::Error;
de::EnumAccess::variant(data).and_then(|value| match value {
(Fields::AcceptFilesByGlob, accept_files_by_glob) => {
de::VariantAccess::newtype_variant::<Vec<Glob>>(accept_files_by_glob)
.and_then(|globs| {
globs
.iter()
.fold(&mut GlobSetBuilder::new(), |builder, glob| {
builder.add(glob.to_owned())
})
.build()
.map_or_else(
|e| Err(PPK::Error::custom(e)),
|glob_set| {
Ok(Self::Value::AcceptFilesByGlob(globs, glob_set))
},
)
})
}
(Fields::RejectFilesByGlob, reject_files_by_glob) => {
de::VariantAccess::newtype_variant::<Vec<Glob>>(reject_files_by_glob)
.and_then(|globs| {
globs
.iter()
.fold(&mut GlobSetBuilder::new(), |builder, glob| {
builder.add(glob.to_owned())
})
.build()
.map_or_else(
|e| Err(PPK::Error::custom(e)),
|glob_set| {
Ok(Self::Value::RejectFilesByGlob(globs, glob_set))
},
)
})
}
(
Fields::AcceptIfChildrenDirectoriesArePresent,
accept_if_children_directories_are_present,
) => de::VariantAccess::newtype_variant::<HashSet<String>>(
accept_if_children_directories_are_present,
)
.map(Self::Value::AcceptIfChildrenDirectoriesArePresent),
(
Fields::RejectIfChildrenDirectoriesArePresent,
reject_if_children_directories_are_present,
) => de::VariantAccess::newtype_variant::<HashSet<String>>(
reject_if_children_directories_are_present,
)
.map(Self::Value::RejectIfChildrenDirectoriesArePresent),
})
}
}
deserializer.deserialize_enum(
"ParametersPerKind",
VARIANTS,
ParametersPerKindVisitor {
marker: PhantomData::<RulePerKind>,
lifetime: PhantomData,
},
)
impl MetadataForIndexerRules for Metadata {
fn is_dir(&self) -> bool {
self.is_dir()
}
}
impl RulePerKind {
async fn apply(&self, source: impl AsRef<Path>) -> Result<(RuleKind, bool), IndexerRuleError> {
#[deprecated]
async fn apply(
&self,
source: impl AsRef<Path> + Send,
) -> Result<(RuleKind, bool), IndexerRuleError> {
match self {
RulePerKind::AcceptIfChildrenDirectoriesArePresent(children) => {
Self::AcceptIfChildrenDirectoriesArePresent(children) => {
accept_dir_for_its_children(source, children)
.await
.map(|accepted| (RuleKind::AcceptIfChildrenDirectoriesArePresent, accepted))
}
RulePerKind::RejectIfChildrenDirectoriesArePresent(children) => {
Self::RejectIfChildrenDirectoriesArePresent(children) => {
reject_dir_for_its_children(source, children)
.await
.map(|rejected| (RuleKind::RejectIfChildrenDirectoriesArePresent, rejected))
}
RulePerKind::AcceptFilesByGlob(_globs, accept_glob_set) => Ok((
Self::AcceptFilesByGlob(_globs, accept_glob_set) => Ok((
RuleKind::AcceptFilesByGlob,
accept_by_glob(source, accept_glob_set),
)),
RulePerKind::RejectFilesByGlob(_globs, reject_glob_set) => Ok((
Self::RejectFilesByGlob(_globs, reject_glob_set) => Ok((
RuleKind::RejectFilesByGlob,
reject_by_glob(source, reject_glob_set),
)),
}
}
async fn apply_with_metadata(
&self,
source: impl AsRef<Path> + Send,
metadata: &impl MetadataForIndexerRules,
) -> Result<(RuleKind, bool), IndexerRuleError> {
match self {
Self::AcceptIfChildrenDirectoriesArePresent(children) => {
accept_dir_for_its_children_with_metadata(source, metadata, children)
.await
.map(|accepted| (RuleKind::AcceptIfChildrenDirectoriesArePresent, accepted))
}
Self::RejectIfChildrenDirectoriesArePresent(children) => {
reject_dir_for_its_children_with_metadata(source, metadata, children)
.await
.map(|rejected| (RuleKind::RejectIfChildrenDirectoriesArePresent, rejected))
}
Self::AcceptFilesByGlob(_globs, accept_glob_set) => Ok((
RuleKind::AcceptFilesByGlob,
accept_by_glob(source, accept_glob_set),
)),
Self::RejectFilesByGlob(_globs, reject_glob_set) => Ok((
RuleKind::RejectFilesByGlob,
reject_by_glob(source, reject_glob_set),
)),
@ -463,18 +328,50 @@ pub struct IndexerRule {
}
impl IndexerRule {
#[deprecated]
pub async fn apply(
&self,
source: impl AsRef<Path>,
source: impl AsRef<Path> + Send,
) -> Result<Vec<(RuleKind, bool)>, IndexerRuleError> {
try_join_all(self.rules.iter().map(|rule| rule.apply(source.as_ref()))).await
self.rules
.iter()
.map(|rule| rule.apply(source.as_ref()))
.collect::<Vec<_>>()
.try_join()
.await
}
pub async fn apply_with_metadata(
&self,
source: impl AsRef<Path> + Send,
metadata: &impl MetadataForIndexerRules,
) -> Result<Vec<(RuleKind, bool)>, IndexerRuleError> {
async fn inner(
rules: &[RulePerKind],
source: &Path,
metadata: &impl MetadataForIndexerRules,
) -> Result<Vec<(RuleKind, bool)>, IndexerRuleError> {
rules
.iter()
.map(|rule| rule.apply_with_metadata(source, metadata))
.collect::<Vec<_>>()
.try_join()
.await
}
inner(&self.rules, source.as_ref(), metadata).await
}
#[deprecated]
pub async fn apply_all(
rules: &[IndexerRule],
source: impl AsRef<Path>,
rules: &[Self],
source: impl AsRef<Path> + Send,
) -> Result<HashMap<RuleKind, Vec<bool>>, IndexerRuleError> {
try_join_all(rules.iter().map(|rule| rule.apply(source.as_ref())))
rules
.iter()
.map(|rule| rule.apply(source.as_ref()))
.collect::<Vec<_>>()
.try_join()
.await
.map(|results| {
results.into_iter().flatten().fold(
@ -488,6 +385,59 @@ impl IndexerRule {
}
}
#[derive(Debug, Clone, Default)]
pub struct IndexerRuler {
// TODO(fogodev): Use this RwLock later to acquire new rules while applying rules, like from a .gitignore file
rules: Arc<RwLock<Vec<IndexerRule>>>,
}
impl IndexerRuler {
#[must_use]
pub fn new(rules: Vec<IndexerRule>) -> Self {
Self {
rules: Arc::new(RwLock::new(rules)),
}
}
pub async fn serialize(&self) -> Result<Vec<u8>, rmp_serde::encode::Error> {
rmp_serde::to_vec_named(&*self.rules.read().await)
}
pub fn deserialize(data: &[u8]) -> Result<Self, rmp_serde::decode::Error> {
rmp_serde::from_slice(data).map(Self::new)
}
pub async fn apply_all(
&self,
source: impl AsRef<Path> + Send,
metadata: &impl MetadataForIndexerRules,
) -> Result<HashMap<RuleKind, Vec<bool>>, IndexerRuleError> {
async fn inner(
rules: &[IndexerRule],
source: &Path,
metadata: &impl MetadataForIndexerRules,
) -> Result<HashMap<RuleKind, Vec<bool>>, IndexerRuleError> {
rules
.iter()
.map(|rule| rule.apply_with_metadata(source, metadata))
.collect::<Vec<_>>()
.try_join()
.await
.map(|results| {
results.into_iter().flatten().fold(
HashMap::<_, Vec<_>>::with_capacity(RuleKind::variant_count()),
|mut map, (kind, result)| {
map.entry(kind).or_default().push(result);
map
},
)
})
}
inner(&self.rules.read().await, source.as_ref(), metadata).await
}
}
impl TryFrom<&indexer_rule::Data> for IndexerRule {
type Error = IndexerRuleError;
@ -522,8 +472,9 @@ fn reject_by_glob(source: impl AsRef<Path>, reject_glob_set: &GlobSet) -> bool {
!accept_by_glob(source.as_ref(), reject_glob_set)
}
#[deprecated]
async fn accept_dir_for_its_children(
source: impl AsRef<Path>,
source: impl AsRef<Path> + Send,
children: &HashSet<String>,
) -> Result<bool, IndexerRuleError> {
let source = source.as_ref();
@ -566,8 +517,50 @@ async fn accept_dir_for_its_children(
Ok(false)
}
async fn accept_dir_for_its_children_with_metadata(
source: impl AsRef<Path> + Send,
metadata: &impl MetadataForIndexerRules,
children: &HashSet<String>,
) -> Result<bool, IndexerRuleError> {
let source = source.as_ref();
// FIXME(fogodev): Just check for io::ErrorKind::NotADirectory error instead (feature = "io_error_more", issue = "86442")
if !metadata.is_dir() {
return Ok(false);
}
let mut read_dir = fs::read_dir(source)
.await // TODO: Check NotADirectory error here when available
.map_err(|e| IndexerRuleError::AcceptByItsChildrenFileIO(FileIOError::from((source, e))))?;
while let Some(entry) = read_dir
.next_entry()
.await
.map_err(|e| IndexerRuleError::AcceptByItsChildrenFileIO(FileIOError::from((source, e))))?
{
let entry_name = entry
.file_name()
.to_str()
.ok_or_else(|| NonUtf8PathError(entry.path().into()))?
.to_string();
if entry
.metadata()
.await
.map_err(|e| {
IndexerRuleError::AcceptByItsChildrenFileIO(FileIOError::from((source, e)))
})?
.is_dir() && children.contains(&entry_name)
{
return Ok(true);
}
}
Ok(false)
}
#[deprecated]
async fn reject_dir_for_its_children(
source: impl AsRef<Path>,
source: impl AsRef<Path> + Send,
children: &HashSet<String>,
) -> Result<bool, IndexerRuleError> {
let source = source.as_ref();
@ -608,6 +601,46 @@ async fn reject_dir_for_its_children(
Ok(true)
}
async fn reject_dir_for_its_children_with_metadata(
source: impl AsRef<Path> + Send,
metadata: &impl MetadataForIndexerRules,
children: &HashSet<String>,
) -> Result<bool, IndexerRuleError> {
let source = source.as_ref();
// FIXME(fogodev): Just check for io::ErrorKind::NotADirectory error instead (feature = "io_error_more", issue = "86442")
if !metadata.is_dir() {
return Ok(true);
}
let mut read_dir = fs::read_dir(source)
.await // TODO: Check NotADirectory error here when available
.map_err(|e| IndexerRuleError::RejectByItsChildrenFileIO(FileIOError::from((source, e))))?;
while let Some(entry) = read_dir
.next_entry()
.await
.map_err(|e| IndexerRuleError::RejectByItsChildrenFileIO(FileIOError::from((source, e))))?
{
if entry
.metadata()
.await
.map_err(|e| {
IndexerRuleError::RejectByItsChildrenFileIO(FileIOError::from((source, e)))
})?
.is_dir() && children.contains(
entry
.file_name()
.to_str()
.ok_or_else(|| NonUtf8PathError(entry.path().into()))?,
) {
return Ok(false);
}
}
Ok(true)
}
#[must_use]
pub fn generate_pub_id() -> Uuid {
loop {
let pub_id = Uuid::new_v4();
@ -624,6 +657,7 @@ mod tests {
use tempfile::tempdir;
impl IndexerRule {
#[must_use]
pub fn new(name: String, default: bool, rules: Vec<RulePerKind>) -> Self {
Self {
id: None,
@ -636,7 +670,7 @@ mod tests {
}
}
async fn check_rule(indexer_rule: &IndexerRule, path: impl AsRef<Path>) -> bool {
async fn check_rule(indexer_rule: &IndexerRule, path: impl AsRef<Path> + Send) -> bool {
indexer_rule
.apply(path)
.await
@ -697,6 +731,7 @@ mod tests {
}
#[tokio::test]
#[allow(clippy::similar_names)]
async fn test_only_photos() {
let text = Path::new("file.txt");
let png = Path::new("photo1.png");
@ -748,7 +783,7 @@ mod tests {
fs::create_dir(project2.join(".git")).await.unwrap();
fs::create_dir(project2.join("books")).await.unwrap();
let childrens = [".git".to_string()].into_iter().collect::<HashSet<_>>();
let childrens = HashSet::from([".git".to_string()]);
let rule = IndexerRule::new(
"git projects".to_string(),
@ -779,7 +814,7 @@ mod tests {
fs::create_dir(project2.join(".git")).await.unwrap();
fs::create_dir(project2.join("books")).await.unwrap();
let childrens = [".git".to_string()].into_iter().collect::<HashSet<_>>();
let childrens = HashSet::from([".git".to_string()]);
let rule = IndexerRule::new(
"git projects".to_string(),
@ -798,21 +833,23 @@ mod tests {
fn eq(&self, other: &Self) -> bool {
match (self, other) {
(
RulePerKind::AcceptFilesByGlob(self_globs, _),
RulePerKind::AcceptFilesByGlob(other_globs, _),
Self::AcceptFilesByGlob(self_globs, _),
Self::AcceptFilesByGlob(other_globs, _),
)
| (
Self::RejectFilesByGlob(self_globs, _),
Self::RejectFilesByGlob(other_globs, _),
) => self_globs == other_globs,
(
RulePerKind::RejectFilesByGlob(self_globs, _),
RulePerKind::RejectFilesByGlob(other_globs, _),
) => self_globs == other_globs,
(
RulePerKind::AcceptIfChildrenDirectoriesArePresent(self_childrens),
RulePerKind::AcceptIfChildrenDirectoriesArePresent(other_childrens),
) => self_childrens == other_childrens,
(
RulePerKind::RejectIfChildrenDirectoriesArePresent(self_childrens),
RulePerKind::RejectIfChildrenDirectoriesArePresent(other_childrens),
Self::AcceptIfChildrenDirectoriesArePresent(self_childrens),
Self::AcceptIfChildrenDirectoriesArePresent(other_childrens),
)
| (
Self::RejectIfChildrenDirectoriesArePresent(self_childrens),
Self::RejectIfChildrenDirectoriesArePresent(other_childrens),
) => self_childrens == other_childrens,
_ => false,
}
}

View file

@ -1,14 +1,11 @@
use crate::{
library::Library,
location::indexer::rules::{IndexerRule, IndexerRuleError, RulePerKind},
};
use sd_prisma::prisma::indexer_rule;
use sd_prisma::prisma::{indexer_rule, PrismaClient};
use chrono::Utc;
use thiserror::Error;
use uuid::Uuid;
use super::{IndexerRule, IndexerRuleError, RulePerKind};
#[derive(Error, Debug)]
pub enum SeederError {
#[error("Failed to run indexer rules seeder: {0}")]
@ -37,7 +34,9 @@ impl From<SystemIndexerRule> for IndexerRule {
}
/// Seeds system indexer rules into a new or existing library,
pub async fn new_or_existing_library(library: &Library) -> Result<(), SeederError> {
pub async fn new_or_existing_library(db: &PrismaClient) -> Result<(), SeederError> {
use indexer_rule::{date_created, date_modified, default, name, rules_per_kind};
// DO NOT REORDER THIS ARRAY!
for (i, rule) in [no_os_protected(), no_hidden(), no_git(), only_images()]
.into_iter()
@ -46,8 +45,6 @@ pub async fn new_or_existing_library(library: &Library) -> Result<(), SeederErro
let pub_id = sd_utils::uuid_to_bytes(Uuid::from_u128(i as u128));
let rules = rmp_serde::to_vec_named(&rule.rules).map_err(IndexerRuleError::from)?;
use indexer_rule::*;
let data = vec![
name::set(Some(rule.name.to_string())),
rules_per_kind::set(Some(rules.clone())),
@ -56,9 +53,7 @@ pub async fn new_or_existing_library(library: &Library) -> Result<(), SeederErro
date_modified::set(Some(Utc::now().into())),
];
library
.db
.indexer_rule()
db.indexer_rule()
.upsert(
indexer_rule::pub_id::equals(pub_id.clone()),
indexer_rule::create(pub_id.clone(), data.clone()),
@ -71,6 +66,8 @@ pub async fn new_or_existing_library(library: &Library) -> Result<(), SeederErro
Ok(())
}
#[must_use]
#[allow(clippy::missing_panics_doc)]
pub fn no_os_protected() -> SystemIndexerRule {
SystemIndexerRule {
// TODO: On windows, beside the listed files, any file with the FILE_ATTRIBUTE_SYSTEM should be considered a system file
@ -105,7 +102,7 @@ pub fn no_os_protected() -> SystemIndexerRule {
"C:/Users/*/NTUSER.DAT*",
"C:/Users/*/ntuser.dat*",
"C:/Users/*/{ntuser.ini,ntuser.dat,NTUSER.DAT}",
// User special folders (most of these the user dont even have permission to access)
// User special folders (most of these the user don't even have permission to access)
"C:/Users/*/{Cookies,AppData,NetHood,Recent,PrintHood,SendTo,Templates,Start Menu,Application Data,Local Settings,My Documents}",
// System special folders
"C:/{$Recycle.Bin,$WinREAgent,Documents and Settings,Program Files,Program Files (x86),ProgramData,Recovery,PerfLogs,Windows,Windows.old}",
@ -177,6 +174,8 @@ pub fn no_os_protected() -> SystemIndexerRule {
}
}
#[must_use]
#[allow(clippy::missing_panics_doc)]
pub fn no_hidden() -> SystemIndexerRule {
SystemIndexerRule {
name: "No Hidden",
@ -186,6 +185,8 @@ pub fn no_hidden() -> SystemIndexerRule {
}
}
#[must_use]
#[allow(clippy::missing_panics_doc)]
fn no_git() -> SystemIndexerRule {
SystemIndexerRule {
name: "No Git",
@ -197,6 +198,8 @@ fn no_git() -> SystemIndexerRule {
}
}
#[must_use]
#[allow(clippy::missing_panics_doc)]
fn only_images() -> SystemIndexerRule {
SystemIndexerRule {
name: "Only Images",

View file

@ -0,0 +1,214 @@
use std::{collections::HashSet, marker::PhantomData};
use globset::{Glob, GlobSetBuilder};
use serde::{de, ser, Deserialize, Serialize};
use super::RulePerKind;
/// We're implementing `Serialize` by hand as `GlobSet`s aren't serializable, so we ignore them on
/// serialization
impl Serialize for RulePerKind {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: ser::Serializer,
{
match *self {
Self::AcceptFilesByGlob(ref globs, ref _glob_set) => serializer
.serialize_newtype_variant("ParametersPerKind", 0, "AcceptFilesByGlob", globs),
Self::RejectFilesByGlob(ref globs, ref _glob_set) => serializer
.serialize_newtype_variant("ParametersPerKind", 1, "RejectFilesByGlob", globs),
Self::AcceptIfChildrenDirectoriesArePresent(ref children) => serializer
.serialize_newtype_variant(
"ParametersPerKind",
2,
"AcceptIfChildrenDirectoriesArePresent",
children,
),
Self::RejectIfChildrenDirectoriesArePresent(ref children) => serializer
.serialize_newtype_variant(
"ParametersPerKind",
3,
"RejectIfChildrenDirectoriesArePresent",
children,
),
}
}
}
impl<'de> Deserialize<'de> for RulePerKind {
#[allow(clippy::too_many_lines)]
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: de::Deserializer<'de>,
{
const VARIANTS: &[&str] = &[
"AcceptFilesByGlob",
"RejectFilesByGlob",
"AcceptIfChildrenDirectoriesArePresent",
"RejectIfChildrenDirectoriesArePresent",
];
enum Fields {
AcceptFilesByGlob,
RejectFilesByGlob,
AcceptIfChildrenDirectoriesArePresent,
RejectIfChildrenDirectoriesArePresent,
}
struct FieldsVisitor;
impl<'de> de::Visitor<'de> for FieldsVisitor {
type Value = Fields;
fn expecting(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
formatter.write_str(
"`AcceptFilesByGlob` \
or `RejectFilesByGlob` \
or `AcceptIfChildrenDirectoriesArePresent` \
or `RejectIfChildrenDirectoriesArePresent`",
)
}
fn visit_u64<E>(self, value: u64) -> Result<Self::Value, E>
where
E: de::Error,
{
match value {
0 => Ok(Fields::AcceptFilesByGlob),
1 => Ok(Fields::RejectFilesByGlob),
2 => Ok(Fields::AcceptIfChildrenDirectoriesArePresent),
3 => Ok(Fields::RejectIfChildrenDirectoriesArePresent),
_ => Err(de::Error::invalid_value(
de::Unexpected::Unsigned(value),
&"variant index 0 <= i < 3",
)),
}
}
fn visit_str<E>(self, value: &str) -> Result<Self::Value, E>
where
E: de::Error,
{
match value {
"AcceptFilesByGlob" => Ok(Fields::AcceptFilesByGlob),
"RejectFilesByGlob" => Ok(Fields::RejectFilesByGlob),
"AcceptIfChildrenDirectoriesArePresent" => {
Ok(Fields::AcceptIfChildrenDirectoriesArePresent)
}
"RejectIfChildrenDirectoriesArePresent" => {
Ok(Fields::RejectIfChildrenDirectoriesArePresent)
}
_ => Err(de::Error::unknown_variant(value, VARIANTS)),
}
}
fn visit_bytes<E>(self, bytes: &[u8]) -> Result<Self::Value, E>
where
E: de::Error,
{
match bytes {
b"AcceptFilesByGlob" => Ok(Fields::AcceptFilesByGlob),
b"RejectFilesByGlob" => Ok(Fields::RejectFilesByGlob),
b"AcceptIfChildrenDirectoriesArePresent" => {
Ok(Fields::AcceptIfChildrenDirectoriesArePresent)
}
b"RejectIfChildrenDirectoriesArePresent" => {
Ok(Fields::RejectIfChildrenDirectoriesArePresent)
}
_ => Err(de::Error::unknown_variant(
&String::from_utf8_lossy(bytes),
VARIANTS,
)),
}
}
}
impl<'de> Deserialize<'de> for Fields {
#[inline]
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: de::Deserializer<'de>,
{
deserializer.deserialize_identifier(FieldsVisitor)
}
}
struct ParametersPerKindVisitor<'de> {
marker: PhantomData<RulePerKind>,
lifetime: PhantomData<&'de ()>,
}
impl<'de> de::Visitor<'de> for ParametersPerKindVisitor<'de> {
type Value = RulePerKind;
fn expecting(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
formatter.write_str("enum ParametersPerKind")
}
fn visit_enum<PPK>(self, data: PPK) -> Result<Self::Value, PPK::Error>
where
PPK: de::EnumAccess<'de>,
{
use de::Error;
de::EnumAccess::variant(data).and_then(|value| match value {
(Fields::AcceptFilesByGlob, accept_files_by_glob) => {
de::VariantAccess::newtype_variant::<Vec<Glob>>(accept_files_by_glob)
.and_then(|globs| {
globs
.iter()
.fold(&mut GlobSetBuilder::new(), |builder, glob| {
builder.add(glob.to_owned())
})
.build()
.map_or_else(
|e| Err(PPK::Error::custom(e)),
|glob_set| {
Ok(Self::Value::AcceptFilesByGlob(globs, glob_set))
},
)
})
}
(Fields::RejectFilesByGlob, reject_files_by_glob) => {
de::VariantAccess::newtype_variant::<Vec<Glob>>(reject_files_by_glob)
.and_then(|globs| {
globs
.iter()
.fold(&mut GlobSetBuilder::new(), |builder, glob| {
builder.add(glob.to_owned())
})
.build()
.map_or_else(
|e| Err(PPK::Error::custom(e)),
|glob_set| {
Ok(Self::Value::RejectFilesByGlob(globs, glob_set))
},
)
})
}
(
Fields::AcceptIfChildrenDirectoriesArePresent,
accept_if_children_directories_are_present,
) => de::VariantAccess::newtype_variant::<HashSet<String>>(
accept_if_children_directories_are_present,
)
.map(Self::Value::AcceptIfChildrenDirectoriesArePresent),
(
Fields::RejectIfChildrenDirectoriesArePresent,
reject_if_children_directories_are_present,
) => de::VariantAccess::newtype_variant::<HashSet<String>>(
reject_if_children_directories_are_present,
)
.map(Self::Value::RejectIfChildrenDirectoriesArePresent),
})
}
}
deserializer.deserialize_enum(
"ParametersPerKind",
VARIANTS,
ParametersPerKindVisitor {
marker: PhantomData::<Self>,
lifetime: PhantomData,
},
)
}
}

View file

@ -0,0 +1,16 @@
[package]
name = "sd-core-prisma-helpers"
version = "0.1.0"
authors = ["Ericson Soares <ericson@spacedrive.com>"]
license = { workspace = true }
repository = { workspace = true }
edition = { workspace = true }
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
# Spacedrive Sub-crates
sd-prisma = { path = "../../../crates/prisma" }
prisma-client-rust = { workspace = true }
serde = { workspace = true }

View file

@ -0,0 +1,226 @@
#![warn(
clippy::all,
clippy::pedantic,
clippy::correctness,
clippy::perf,
clippy::style,
clippy::suspicious,
clippy::complexity,
clippy::nursery,
clippy::unwrap_used,
unused_qualifications,
rust_2018_idioms,
trivial_casts,
trivial_numeric_casts,
unused_allocation,
clippy::unnecessary_cast,
clippy::cast_lossless,
clippy::cast_possible_truncation,
clippy::cast_possible_wrap,
clippy::cast_precision_loss,
clippy::cast_sign_loss,
clippy::dbg_macro,
clippy::deprecated_cfg_attr,
clippy::separated_literal_suffix,
deprecated
)]
#![forbid(deprecated_in_future)]
#![allow(clippy::missing_errors_doc, clippy::module_name_repetitions)]
use sd_prisma::prisma::{file_path, job, label, location, object};
// File Path selectables!
file_path::select!(file_path_pub_and_cas_ids { id pub_id cas_id });
file_path::select!(file_path_just_pub_id_materialized_path {
pub_id
materialized_path
});
file_path::select!(file_path_for_file_identifier {
id
pub_id
materialized_path
date_created
is_dir
name
extension
object_id
});
file_path::select!(file_path_for_object_validator {
pub_id
materialized_path
is_dir
name
extension
integrity_checksum
});
file_path::select!(file_path_for_media_processor {
id
materialized_path
is_dir
name
extension
cas_id
object_id
});
file_path::select!(file_path_to_isolate {
location_id
materialized_path
is_dir
name
extension
});
file_path::select!(file_path_to_isolate_with_pub_id {
pub_id
location_id
materialized_path
is_dir
name
extension
});
file_path::select!(file_path_to_isolate_with_id {
id
location_id
materialized_path
is_dir
name
extension
});
file_path::select!(file_path_walker {
pub_id
location_id
object_id
materialized_path
is_dir
name
extension
date_modified
inode
size_in_bytes_bytes
hidden
});
file_path::select!(file_path_to_handle_custom_uri {
pub_id
materialized_path
is_dir
name
extension
location: select {
id
path
instance: select {
identity
remote_identity
}
}
});
file_path::select!(file_path_to_handle_p2p_serve_file {
materialized_path
name
extension
is_dir // For isolated file path
location: select {
id
path
}
});
file_path::select!(file_path_to_full_path {
id
materialized_path
is_dir
name
extension
location: select {
id
path
}
});
// File Path includes!
file_path::include!(file_path_with_object { object });
// Object selectables!
object::select!(object_for_file_identifier {
pub_id
file_paths: select { pub_id cas_id extension is_dir materialized_path name }
});
// Object includes!
object::include!(object_with_file_paths { file_paths });
// Job selectables!
job::select!(job_without_data {
id
name
action
status
parent_id
errors_text
metadata
date_created
date_started
date_completed
task_count
completed_task_count
date_estimated_completion
});
// Location includes!
location::include!(location_with_indexer_rules {
indexer_rules: select { indexer_rule }
});
impl From<location_with_indexer_rules::Data> for location::Data {
fn from(data: location_with_indexer_rules::Data) -> Self {
Self {
id: data.id,
pub_id: data.pub_id,
path: data.path,
instance_id: data.instance_id,
name: data.name,
total_capacity: data.total_capacity,
available_capacity: data.available_capacity,
is_archived: data.is_archived,
size_in_bytes: data.size_in_bytes,
generate_preview_media: data.generate_preview_media,
sync_preview_media: data.sync_preview_media,
hidden: data.hidden,
date_created: data.date_created,
file_paths: None,
indexer_rules: None,
instance: None,
}
}
}
impl From<&location_with_indexer_rules::Data> for location::Data {
fn from(data: &location_with_indexer_rules::Data) -> Self {
Self {
id: data.id,
pub_id: data.pub_id.clone(),
path: data.path.clone(),
instance_id: data.instance_id,
name: data.name.clone(),
total_capacity: data.total_capacity,
available_capacity: data.available_capacity,
size_in_bytes: data.size_in_bytes.clone(),
is_archived: data.is_archived,
generate_preview_media: data.generate_preview_media,
sync_preview_media: data.sync_preview_media,
hidden: data.hidden,
date_created: data.date_created,
file_paths: None,
indexer_rules: None,
instance: None,
}
}
}
// Label includes!
label::include!((take: i64) => label_with_objects {
label_objects(vec![]).take(take): select {
object: select {
id
file_paths(vec![]).take(1)
}
}
});

View file

@ -7,6 +7,7 @@ edition = "2021"
default = []
[dependencies]
# Spacedrive Sub-crates
sd-prisma = { path = "../../../crates/prisma" }
sd-sync = { path = "../../../crates/sync" }
sd-utils = { path = "../../../crates/utils" }

View file

@ -7,6 +7,7 @@ use sd_utils::uuid_to_bytes;
use std::{
cmp::Ordering,
collections::HashMap,
fmt,
ops::Deref,
sync::{
atomic::{self, AtomicBool},
@ -25,6 +26,12 @@ pub struct Manager {
pub shared: Arc<SharedState>,
}
impl fmt::Debug for Manager {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("SyncManager").finish()
}
}
#[derive(serde::Serialize, serde::Deserialize, Debug, PartialEq, Eq)]
pub struct GetOpsArgs {
pub clocks: Vec<(Uuid, NTP64)>,

View file

@ -1,6 +1,7 @@
mod mock_instance;
use sd_core_sync::*;
use sd_prisma::{prisma, prisma_sync};
use sd_sync::*;
use sd_utils::uuid_to_bytes;

View file

@ -403,10 +403,13 @@ model Job {
// Enum: sd_core::job::job_manager:JobStatus
status Int? // 0 = Queued
// List of errors, separated by "\n\n" in case of failed jobs or completed with errors
errors_text String?
data Bytes? // Serialized data to be used on pause/resume
// List of errors, separated by "\n\n" in case of failed jobs or completed with errors
errors_text String? // Deprecated, use `critical_error` or `non_critical_errors` instead
critical_error String? // Serialized error field with info about the failed job after completion
non_critical_errors Bytes? // Serialized non-critical errors field with info about the completed job with errors after completion
data Bytes? // Deprecated
metadata Bytes? // Serialized metadata field with info about the job after completion
parent_id Bytes?

View file

@ -10,8 +10,9 @@ use crate::{
},
};
use sd_core_file_path_helper::IsolatedFilePathData;
use sd_file_ext::extensions::ImageExtension;
use sd_file_path_helper::IsolatedFilePathData;
use sd_media_metadata::MediaMetadata;
use sd_utils::error::FileIOError;
@ -37,9 +38,10 @@ const UNTITLED_FILE_STR: &str = "Untitled";
const UNTITLED_TEXT_FILE_STR: &str = "Untitled.txt";
#[derive(Type, Deserialize)]
#[serde(rename_all = "camelCase")]
enum EphemeralFileCreateContextTypes {
empty,
text,
Empty,
Text,
}
pub(crate) fn mount() -> AlphaRouter<Ctx> {
@ -103,10 +105,10 @@ pub(crate) fn mount() -> AlphaRouter<Ctx> {
context,
}: CreateEphemeralFileArgs| async move {
match context {
EphemeralFileCreateContextTypes::empty => {
EphemeralFileCreateContextTypes::Empty => {
path.push(name.as_deref().unwrap_or(UNTITLED_FILE_STR));
}
EphemeralFileCreateContextTypes::text => {
EphemeralFileCreateContextTypes::Text => {
path.push(name.as_deref().unwrap_or(UNTITLED_TEXT_FILE_STR));
}
}

View file

@ -1,5 +1,5 @@
use crate::{
api::{locations::object_with_file_paths, utils::library},
api::utils::library,
invalidate_query,
library::Library,
location::{get_location_path_from_location_id, LocationError},
@ -14,11 +14,13 @@ use crate::{
old_job::Job,
};
use sd_core_file_path_helper::{FilePathError, IsolatedFilePathData};
use sd_core_prisma_helpers::{
file_path_to_isolate, file_path_to_isolate_with_id, object_with_file_paths,
};
use sd_cache::{CacheNode, Model, NormalisedResult, Reference};
use sd_file_ext::kind::ObjectKind;
use sd_file_path_helper::{
file_path_to_isolate, file_path_to_isolate_with_id, FilePathError, IsolatedFilePathData,
};
use sd_images::ConvertibleExtension;
use sd_media_metadata::MediaMetadata;
use sd_prisma::{
@ -50,9 +52,10 @@ const UNTITLED_FILE_STR: &str = "Untitled";
const UNTITLED_TEXT_FILE_STR: &str = "Untitled.txt";
#[derive(Type, Deserialize)]
#[serde(rename_all = "camelCase")]
enum FileCreateContextTypes {
empty,
text,
Empty,
Text,
}
pub(crate) fn mount() -> AlphaRouter<Ctx> {
@ -329,10 +332,10 @@ pub(crate) fn mount() -> AlphaRouter<Ctx> {
}
match context {
FileCreateContextTypes::empty => {
FileCreateContextTypes::Empty => {
path.push(name.as_deref().unwrap_or(UNTITLED_FILE_STR))
}
FileCreateContextTypes::text => {
FileCreateContextTypes::Text => {
path.push(name.as_deref().unwrap_or(UNTITLED_TEXT_FILE_STR))
}
}
@ -645,7 +648,7 @@ pub(crate) fn mount() -> AlphaRouter<Ctx> {
Ok(())
})
})
.procedure("getConvertableImageExtensions", {
.procedure("getConvertibleImageExtensions", {
R.query(|_, _: ()| async move { Ok(sd_images::all_compatible_extensions()) })
})
.procedure("eraseFiles", {

View file

@ -6,9 +6,11 @@ use crate::{
old_file_identifier::old_file_identifier_job::OldFileIdentifierJobInit,
validation::old_validator_job::OldObjectValidatorJobInit,
},
old_job::{job_without_data, Job, JobReport, JobStatus, OldJobs},
old_job::{Job, JobReport, JobStatus, OldJobs},
};
use sd_core_prisma_helpers::job_without_data;
use sd_prisma::prisma::{job, location, SortOrder};
use std::{

View file

@ -2,6 +2,8 @@ use crate::{
invalidate_query, library::Library, object::media::old_thumbnail::get_indexed_thumb_key,
};
use sd_core_prisma_helpers::label_with_objects;
use sd_prisma::{
prisma::{label, label_on_object, object, SortOrder},
prisma_sync,
@ -14,15 +16,6 @@ use rspc::alpha::AlphaRouter;
use super::{locations::ExplorerItem, utils::library, Ctx, R};
label::include!((take: i64) => label_with_objects {
label_objects(vec![]).take(take): select {
object: select {
id
file_paths(vec![]).take(1)
}
}
});
pub(crate) fn mount() -> AlphaRouter<Ctx> {
R.router()
.procedure("list", {

View file

@ -1,12 +1,9 @@
use crate::{
invalidate_query,
location::{
delete_location, find_location,
indexer::{rules::IndexerRuleCreateArgs, OldIndexerJobInit},
light_scan_location, location_with_indexer_rules,
non_indexed::NonIndexedPathItem,
relink_location, scan_location, scan_location_sub_path, LocationCreateArgs, LocationError,
LocationUpdateArgs,
delete_location, find_location, indexer::OldIndexerJobInit, light_scan_location,
non_indexed::NonIndexedPathItem, relink_location, scan_location, scan_location_sub_path,
LocationCreateArgs, LocationError, LocationUpdateArgs,
},
object::old_file_identifier::old_file_identifier_job::OldFileIdentifierJobInit,
old_job::StatefulJob,
@ -14,11 +11,14 @@ use crate::{
util::AbortOnDrop,
};
use sd_cache::{CacheNode, Model, Normalise, NormalisedResult, NormalisedResults, Reference};
use sd_prisma::prisma::{
file_path, indexer_rule, indexer_rules_in_location, location, object, SortOrder,
use sd_core_indexer_rules::IndexerRuleCreateArgs;
use sd_core_prisma_helpers::{
file_path_with_object, label_with_objects, location_with_indexer_rules, object_with_file_paths,
};
use sd_cache::{CacheNode, Model, Normalise, NormalisedResult, NormalisedResults, Reference};
use sd_prisma::prisma::{file_path, indexer_rule, indexer_rules_in_location, location, SortOrder};
use std::path::{Path, PathBuf};
use chrono::{DateTime, FixedOffset, Utc};
@ -28,7 +28,7 @@ use serde::{Deserialize, Serialize};
use specta::Type;
use tracing::{debug, error};
use super::{labels::label_with_objects, utils::library, Ctx, R};
use super::{utils::library, Ctx, R};
// it includes the shard hex formatted as ([["f02", "cab34a76fbf3469f"]])
// Will be None if no thumbnail exists
@ -197,9 +197,6 @@ impl ExplorerItem {
}
}
file_path::include!(file_path_with_object { object });
object::include!(object_with_file_paths { file_paths });
pub(crate) fn mount() -> AlphaRouter<Ctx> {
R.router()
.procedure("list", {
@ -518,7 +515,7 @@ fn mount_indexer_rule_routes() -> AlphaRouter<Ctx> {
.procedure("create", {
R.with2(library())
.mutation(|(_, library), args: IndexerRuleCreateArgs| async move {
if args.create(&library).await?.is_some() {
if args.create(&library.db).await?.is_some() {
invalidate_query!(library, "locations.indexer_rules.list");
}

View file

@ -1,6 +1,7 @@
use crate::location::LocationError;
use sd_file_path_helper::{check_file_path_exists, IsolatedFilePathData};
use sd_core_file_path_helper::{check_file_path_exists, IsolatedFilePathData};
use sd_prisma::prisma::{self, file_path};
use chrono::{DateTime, FixedOffset, Utc};

View file

@ -1,14 +1,13 @@
use crate::{
api::{
locations::{file_path_with_object, object_with_file_paths, ExplorerItem},
utils::library,
},
api::{locations::ExplorerItem, utils::library},
library::Library,
location::{non_indexed, LocationError},
object::media::old_thumbnail::get_indexed_thumb_key,
util::{unsafe_streamed_query, BatchedStream},
};
use sd_core_prisma_helpers::{file_path_with_object, object_with_file_paths};
use sd_cache::{CacheNode, Model, Normalise, Reference};
use sd_prisma::prisma::{self, PrismaClient};

View file

@ -56,10 +56,10 @@ pub async fn run_actor(
}
debug!(
"Sending {} messages ({} to {}) to ingester",
"Sending {} messages ({:?} to {:?}) to ingester",
ops.len(),
ops.first().unwrap().timestamp.as_u64(),
ops.last().unwrap().timestamp.as_u64(),
ops.first().map(|operation| operation.timestamp.as_u64()),
ops.last().map(|operation| operation.timestamp.as_u64()),
);
err_break!(

View file

@ -1,12 +1,10 @@
use crate::{library::Libraries, Node};
use super::{err_break, CompressedCRDTOperations};
use sd_cloud_api::RequestConfigProvider;
use sd_p2p::RemoteIdentity;
use sd_prisma::prisma::{cloud_crdt_operation, instance, PrismaClient, SortOrder};
use sd_sync::CRDTOperation;
use sd_utils::uuid_to_bytes;
use tracing::{debug, info};
use std::{
collections::{hash_map::Entry, HashMap},
@ -21,8 +19,11 @@ use base64::prelude::*;
use chrono::Utc;
use serde_json::to_vec;
use tokio::{sync::Notify, time::sleep};
use tracing::{debug, info};
use uuid::Uuid;
use super::{err_break, CompressedCRDTOperations};
// Responsible for downloading sync operations from the cloud to be processed by the ingester
#[allow(clippy::too_many_arguments)]
@ -42,7 +43,7 @@ pub async fn run_actor(
active_notify.notify_waiters();
loop {
// We need to know the lastest operations we should be retrieving
// We need to know the latest operations we should be retrieving
let mut cloud_timestamps = {
let timestamps = sync.timestamps.read().await;
@ -181,10 +182,14 @@ pub async fn run_actor(
let operations = compressed_operations.into_ops();
debug!(
"Processing collection. Instance {}, Start {}, End {}",
"Processing collection. Instance {}, Start {:?}, End {:?}",
&collection.instance_uuid,
operations.first().unwrap().timestamp.as_u64(),
operations.last().unwrap().timestamp.as_u64(),
operations
.first()
.map(|operation| operation.timestamp.as_u64()),
operations
.last()
.map(|operation| operation.timestamp.as_u64()),
);
err_break!(write_cloud_ops_to_db(operations, &db).await);
@ -233,6 +238,7 @@ fn crdt_op_db(op: &CRDTOperation) -> cloud_crdt_operation::Create {
}
}
#[allow(clippy::too_many_arguments)]
pub async fn upsert_instance(
library_id: Uuid,
db: &PrismaClient,

View file

@ -1,9 +1,6 @@
use super::CompressedCRDTOperations;
use sd_core_sync::{SyncMessage, NTP64};
use sd_cloud_api::RequestConfigProvider;
use sd_core_sync::{SyncMessage, NTP64};
use tracing::debug;
use uuid::Uuid;
use std::{
sync::{
@ -14,10 +11,10 @@ use std::{
};
use tokio::{sync::Notify, time::sleep};
use tracing::debug;
use uuid::Uuid;
use super::err_break;
// Responsible for sending its instance's sync operations to the cloud.
use super::{err_break, CompressedCRDTOperations};
pub async fn run_actor(
library_id: Uuid,

View file

@ -7,10 +7,10 @@ use crate::{
Node,
};
use http_body::combinators::UnsyncBoxBody;
use hyper::{header, upgrade::OnUpgrade};
use sd_core_file_path_helper::IsolatedFilePathData;
use sd_core_prisma_helpers::file_path_to_handle_custom_uri;
use sd_file_ext::text::is_text;
use sd_file_path_helper::{file_path_to_handle_custom_uri, IsolatedFilePathData};
use sd_p2p::{RemoteIdentity, P2P};
use sd_prisma::prisma::{file_path, location};
use sd_utils::db::maybe_missing;
@ -34,6 +34,8 @@ use axum::{
routing::get,
Router,
};
use http_body::combinators::UnsyncBoxBody;
use hyper::{header, upgrade::OnUpgrade};
use mini_moka::sync::Cache;
use tokio::{
fs::{self, File},
@ -353,7 +355,7 @@ pub fn with_state(node: Arc<Node>) -> LocalState {
if let CoreEvent::InvalidateOperation(e) = event {
match e {
InvalidateOperationEvent::Single(event) => {
// TODO: This is inefficent as any change will invalidate who cache. We need the new invalidation system!!!
// TODO: This is inefficient as any change will invalidate who cache. We need the new invalidation system!!!
// TODO: It's also error prone and a fine-grained resource based invalidation system would avoid that.
if event.key == "search.objects" || event.key == "search.paths" {
file_metadata_cache.invalidate_all();

View file

@ -405,9 +405,7 @@ impl LibraryConfig {
.await?
.into_iter()
.filter_map(|i| {
let Some(identity) = i.identity else {
return None;
};
let identity = i.identity?;
let (remote_identity, identity) = if identity[0] == b'I' {
// We have an `IdentityOrRemoteIdentity::Identity`

View file

@ -2,7 +2,9 @@ use crate::{
api::CoreEvent, cloud, object::media::old_thumbnail::get_indexed_thumbnail_path, sync, Node,
};
use sd_file_path_helper::{file_path_to_full_path, IsolatedFilePathData};
use sd_core_file_path_helper::IsolatedFilePathData;
use sd_core_prisma_helpers::file_path_to_full_path;
use sd_p2p::Identity;
use sd_prisma::prisma::{file_path, location, PrismaClient};
use sd_utils::{db::maybe_missing, error::FileIOError};

View file

@ -1,7 +1,6 @@
use crate::{
library::LibraryConfigError,
location::{indexer, LocationManagerError},
};
use crate::{library::LibraryConfigError, location::LocationManagerError};
use sd_core_indexer_rules::seed::SeederError;
use sd_p2p::IdentityErr;
use sd_utils::{
@ -23,7 +22,7 @@ pub enum LibraryManagerError {
#[error("failed to parse uuid: {0}")]
Uuid(#[from] uuid::Error),
#[error("failed to run indexer rules seeder: {0}")]
IndexerRulesSeeder(#[from] indexer::rules::seed::SeederError),
IndexerRulesSeeder(#[from] SeederError),
// #[error("failed to initialize the key manager: {0}")]
// KeyManager(#[from] sd_crypto::Error),
#[error("error migrating the library: {0}")]

View file

@ -1,10 +1,7 @@
use crate::{
api::{utils::InvalidateOperationEvent, CoreEvent},
cloud, invalidate_query,
location::{
indexer,
metadata::{LocationMetadataError, SpacedriveLocationMetadataFile},
},
location::metadata::{LocationMetadataError, SpacedriveLocationMetadataFile},
object::tag,
p2p, sync,
util::{mpscrr, MaybeUndefined},
@ -160,6 +157,7 @@ impl Libraries {
.await
}
#[allow(clippy::too_many_arguments)]
pub(crate) async fn create_with_uuid(
self: &Arc<Self>,
id: Uuid,
@ -230,7 +228,7 @@ impl Libraries {
if should_seed {
tag::seed::new_library(&library).await?;
indexer::rules::seed::new_or_existing_library(&library).await?;
sd_core_indexer_rules::seed::new_or_existing_library(&library.db).await?;
debug!("Seeded library '{id:?}'");
}
@ -452,7 +450,7 @@ impl Libraries {
instance::node_id::set(node_config.id.as_bytes().to_vec()),
instance::metadata::set(Some(
serde_json::to_vec(&node.p2p.peer_metadata())
.expect("invalid peer metdata"),
.expect("invalid peer metadata"),
)),
],
)
@ -525,7 +523,7 @@ impl Libraries {
if should_seed {
// library.orphan_remover.invoke().await;
indexer::rules::seed::new_or_existing_library(&library).await?;
sd_core_indexer_rules::seed::new_or_existing_library(&library.db).await?;
}
for location in library

View file

@ -1,4 +1,5 @@
use sd_file_path_helper::FilePathError;
use sd_core_file_path_helper::FilePathError;
use sd_prisma::prisma::location;
use sd_utils::{
db::MissingFieldError,

View file

@ -1,10 +1,11 @@
use crate::library::Library;
use sd_file_path_helper::{
file_path_pub_and_cas_ids, FilePathError, IsolatedFilePathData, IsolatedFilePathDataParts,
};
use sd_core_file_path_helper::{FilePathError, IsolatedFilePathData, IsolatedFilePathDataParts};
use sd_core_indexer_rules::IndexerRuleError;
use sd_core_prisma_helpers::file_path_pub_and_cas_ids;
use sd_prisma::{
prisma::{file_path, location, object as prisma_object, PrismaClient},
prisma::{file_path, location, PrismaClient},
prisma_sync,
};
use sd_sync::*;
@ -26,10 +27,8 @@ use super::location_with_indexer_rules;
pub mod old_indexer_job;
mod old_shallow;
mod old_walk;
pub mod rules;
use old_walk::WalkedEntry;
use rules::IndexerRuleError;
pub use old_indexer_job::OldIndexerJobInit;
pub use old_shallow::*;
@ -84,13 +83,12 @@ impl From<IndexerError> for rspc::Error {
async fn execute_indexer_save_step(
location: &location_with_indexer_rules::Data,
save_step: &OldIndexerJobSaveStep,
OldIndexerJobSaveStep { walked, .. }: &OldIndexerJobSaveStep,
library: &Library,
) -> Result<i64, IndexerError> {
let Library { sync, db, .. } = library;
let (sync_stuff, paths): (Vec<_>, Vec<_>) = save_step
.walked
let (sync_stuff, paths): (Vec<_>, Vec<_>) = walked
.iter()
.map(|entry| {
let IsolatedFilePathDataParts {
@ -181,8 +179,8 @@ async fn execute_indexer_update_step(
let pub_id = sd_utils::uuid_to_bytes(entry.pub_id);
let should_unlink_object = if let Some(object_id) = entry.maybe_object_id {
db.object()
.count(vec![prisma_object::id::equals(object_id)])
db.file_path()
.count(vec![file_path::object_id::equals(Some(object_id))])
.exec()
.await? > 1
} else {
@ -310,7 +308,7 @@ macro_rules! file_paths_db_fetcher_fn {
.find_many(vec![::prisma_client_rust::operator::or(
founds.collect::<Vec<_>>(),
)])
.select(::sd_file_path_helper::file_path_walker::select())
.select(::sd_core_prisma_helpers::file_path_walker::select())
})
.collect::<Vec<_>>();
@ -332,7 +330,7 @@ macro_rules! to_remove_db_fetcher_fn {
|parent_iso_file_path, unique_location_id_materialized_path_name_extension_params| async {
let location_id: ::sd_prisma::prisma::location::id::Type = $location_id;
let db: &::sd_prisma::prisma::PrismaClient = $db;
let parent_iso_file_path: ::sd_file_path_helper::IsolatedFilePathData<
let parent_iso_file_path: ::sd_core_file_path_helper::IsolatedFilePathData<
'static,
> = parent_iso_file_path;
let unique_location_id_materialized_path_name_extension_params: ::std::vec::Vec<
@ -396,7 +394,7 @@ macro_rules! to_remove_db_fetcher_fn {
found
.into_iter()
.filter(|file_path| !founds_ids.contains(&file_path.id))
.map(|file_path| ::sd_file_path_helper::file_path_pub_and_cas_ids::Data {
.map(|file_path| ::sd_core_prisma_helpers::file_path_pub_and_cas_ids::Data {
id: file_path.id,
pub_id: file_path.pub_id,
cas_id: file_path.cas_id,

View file

@ -9,10 +9,12 @@ use crate::{
to_remove_db_fetcher_fn,
};
use sd_file_path_helper::{
use sd_core_file_path_helper::{
ensure_file_path_exists, ensure_sub_path_is_directory, ensure_sub_path_is_in_location,
IsolatedFilePathData,
};
use sd_core_indexer_rules::IndexerRule;
use sd_prisma::{
prisma::{file_path, location},
prisma_sync,
@ -38,9 +40,8 @@ use tracing::{debug, info, warn};
use super::{
execute_indexer_save_step, execute_indexer_update_step, iso_file_path_factory,
old_walk::{keep_walking, walk, ToWalkEntry, WalkResult},
remove_non_existing_file_paths, reverse_update_directories_sizes,
rules::IndexerRule,
IndexerError, OldIndexerJobSaveStep, OldIndexerJobUpdateStep,
remove_non_existing_file_paths, reverse_update_directories_sizes, IndexerError,
OldIndexerJobSaveStep, OldIndexerJobUpdateStep,
};
/// BATCH_SIZE is the number of files to index at each step, writing the chunk of files metadata in the database.

View file

@ -11,10 +11,12 @@ use crate::{
to_remove_db_fetcher_fn, Node,
};
use sd_file_path_helper::{
use sd_core_file_path_helper::{
check_file_path_exists, ensure_sub_path_is_directory, ensure_sub_path_is_in_location,
IsolatedFilePathData,
};
use sd_core_indexer_rules::IndexerRule;
use sd_utils::db::maybe_missing;
use std::{
@ -29,8 +31,7 @@ use tracing::{debug, error};
use super::{
execute_indexer_save_step, iso_file_path_factory, location_with_indexer_rules,
old_walk::walk_single_dir, remove_non_existing_file_paths, rules::IndexerRule, IndexerError,
OldIndexerJobSaveStep,
old_walk::walk_single_dir, remove_non_existing_file_paths, IndexerError, OldIndexerJobSaveStep,
};
/// BATCH_SIZE is the number of files to index at each step, writing the chunk of files metadata in the database.

View file

@ -1,6 +1,7 @@
use sd_file_path_helper::{
file_path_pub_and_cas_ids, file_path_walker, FilePathMetadata, IsolatedFilePathData,
};
use sd_core_file_path_helper::{FilePathMetadata, IsolatedFilePathData};
use sd_core_indexer_rules::{IndexerRule, RuleKind};
use sd_core_prisma_helpers::{file_path_pub_and_cas_ids, file_path_walker};
use sd_prisma::prisma::file_path;
use sd_utils::{db::inode_from_db, error::FileIOError};
@ -17,10 +18,7 @@ use tokio::fs;
use tracing::trace;
use uuid::Uuid;
use super::{
rules::{IndexerRule, RuleKind},
IndexerError,
};
use super::IndexerError;
const TO_WALK_QUEUE_INITIAL_CAPACITY: usize = 32;
const WALKER_PATHS_BUFFER_INITIAL_CAPACITY: usize = 256;
@ -299,7 +297,7 @@ where
indexed_paths.insert(WalkingEntry {
iso_file_path: iso_file_path_factory(root, true)?,
maybe_metadata: Some(FilePathMetadata::from_path(&root, &metadata).await?),
maybe_metadata: Some(FilePathMetadata::from_path(root, &metadata)?),
});
}
@ -605,7 +603,6 @@ where
};
let Ok(metadata) = FilePathMetadata::from_path(&current_path, &metadata)
.await
.map_err(|e| errors.push(e.into()))
else {
continue;
@ -643,8 +640,7 @@ where
continue;
};
let Ok(metadata) = FilePathMetadata::from_path(&ancestor, &metadata)
.await
let Ok(metadata) = FilePathMetadata::from_path(ancestor, &metadata)
.map_err(|e| errors.push(e.into()))
else {
continue;
@ -696,10 +692,10 @@ where
#[cfg(test)]
#[allow(clippy::unwrap_used, clippy::panic)]
mod tests {
use super::super::rules::RulePerKind;
use super::*;
use chrono::Utc;
use globset::{Glob, GlobSetBuilder};
use sd_core_indexer_rules::RulePerKind;
use tempfile::{tempdir, TempDir};
// use tracing_test::traced_test;
@ -717,6 +713,21 @@ mod tests {
}
}
fn new_indexer_rule(
name: impl Into<String>,
default: bool,
rules: Vec<RulePerKind>,
) -> IndexerRule {
IndexerRule {
id: None,
name: name.into(),
default,
rules,
date_created: Utc::now(),
date_modified: Utc::now(),
}
}
async fn prepare_location() -> TempDir {
let root = tempdir().unwrap();
let root_path = root.path();
@ -872,7 +883,7 @@ mod tests {
.into_iter()
.collect::<HashSet<_>>();
let only_photos_rule = &[IndexerRule::new(
let only_photos_rule = &[new_indexer_rule(
"only photos".to_string(),
false,
vec![RulePerKind::AcceptFilesByGlob(
@ -950,7 +961,7 @@ mod tests {
.into_iter()
.collect::<HashSet<_>>();
let git_repos = &[IndexerRule::new(
let git_repos = &[new_indexer_rule(
"git repos".to_string(),
false,
vec![RulePerKind::AcceptIfChildrenDirectoriesArePresent(
@ -1019,14 +1030,14 @@ mod tests {
.collect::<HashSet<_>>();
let git_repos_no_deps_no_build_dirs = &[
IndexerRule::new(
new_indexer_rule(
"git repos".to_string(),
false,
vec![RulePerKind::AcceptIfChildrenDirectoriesArePresent(
[".git".to_string()].into_iter().collect(),
)],
),
IndexerRule::new(
new_indexer_rule(
"reject node_modules".to_string(),
false,
vec![RulePerKind::RejectFilesByGlob(
@ -1037,7 +1048,7 @@ mod tests {
.unwrap(),
)],
),
IndexerRule::new(
new_indexer_rule(
"reject rust build dir".to_string(),
false,
vec![RulePerKind::RejectFilesByGlob(

View file

@ -4,7 +4,8 @@ use crate::{
Node,
};
use sd_file_path_helper::FilePathError;
use sd_core_file_path_helper::FilePathError;
use sd_prisma::prisma::location;
use sd_utils::{db::MissingFieldError, error::FileIOError};
@ -18,11 +19,9 @@ use futures::executor::block_on;
use thiserror::Error;
use tokio::sync::{
broadcast::{self, Receiver},
oneshot, RwLock,
mpsc, oneshot, RwLock,
};
use tracing::{debug, error};
use tokio::sync::mpsc;
use uuid::Uuid;
mod watcher;

View file

@ -2,7 +2,10 @@
use crate::{invalidate_query, library::Library, location::manager::LocationManagerError, Node};
use sd_file_path_helper::{check_file_path_exists, get_inode, FilePathError, IsolatedFilePathData};
use sd_core_file_path_helper::{
check_file_path_exists, get_inode, FilePathError, IsolatedFilePathData,
};
use sd_prisma::prisma::location;
use sd_utils::error::FileIOError;

View file

@ -11,7 +11,10 @@
use crate::{invalidate_query, library::Library, location::manager::LocationManagerError, Node};
use sd_file_path_helper::{check_file_path_exists, get_inode, FilePathError, IsolatedFilePathData};
use sd_core_file_path_helper::{
check_file_path_exists, get_inode, FilePathError, IsolatedFilePathData,
};
use sd_prisma::prisma::location;
use sd_utils::error::FileIOError;

View file

@ -18,13 +18,15 @@ use crate::{
Node,
};
use sd_file_ext::{extensions::ImageExtension, kind::ObjectKind};
use sd_file_path_helper::{
check_file_path_exists, file_path_with_object, filter_existing_file_path_params,
use sd_core_file_path_helper::{
check_file_path_exists, filter_existing_file_path_params,
isolated_file_path_data::extract_normalized_materialized_path_str,
loose_find_existing_file_path_params, path_is_hidden, FilePathError, FilePathMetadata,
IsolatedFilePathData, MetadataExt,
};
use sd_core_prisma_helpers::file_path_with_object;
use sd_file_ext::{extensions::ImageExtension, kind::ObjectKind};
use sd_prisma::{
prisma::{file_path, location, media_data, object},
prisma_sync,
@ -37,10 +39,10 @@ use sd_utils::{
};
#[cfg(target_family = "unix")]
use sd_file_path_helper::get_inode;
use sd_core_file_path_helper::get_inode;
#[cfg(target_family = "windows")]
use sd_file_path_helper::get_inode_from_path;
use sd_core_file_path_helper::get_inode_from_path;
use std::{
collections::{HashMap, HashSet},
@ -120,7 +122,7 @@ pub(super) async fn create_dir(
library,
iso_file_path.to_parts(),
None,
FilePathMetadata::from_path(&path, metadata).await?,
FilePathMetadata::from_path(path, metadata)?,
)
.await?;
@ -177,7 +179,7 @@ async fn inner_create_file(
let iso_file_path_parts = iso_file_path.to_parts();
let extension = iso_file_path_parts.extension.to_string();
let metadata = FilePathMetadata::from_path(&path, metadata).await?;
let metadata = FilePathMetadata::from_path(path, metadata)?;
// First we check if already exist a file with this same inode number
// if it does, we just update it

View file

@ -9,7 +9,8 @@
use crate::{invalidate_query, library::Library, location::manager::LocationManagerError, Node};
use sd_file_path_helper::{get_inode_from_path, FilePathError};
use sd_core_file_path_helper::{get_inode_from_path, FilePathError};
use sd_prisma::prisma::location;
use sd_utils::error::FileIOError;

View file

@ -9,7 +9,11 @@ use crate::{
Node,
};
use sd_file_path_helper::{filter_existing_file_path_params, IsolatedFilePathData};
use sd_core_file_path_helper::{
filter_existing_file_path_params, IsolatedFilePathData, IsolatedFilePathDataParts,
};
use sd_core_prisma_helpers::location_with_indexer_rules;
use sd_prisma::{
prisma::{file_path, indexer_rules_in_location, location, PrismaClient},
prisma_sync,
@ -21,8 +25,6 @@ use sd_utils::{
msgpack, uuid_to_bytes,
};
use sd_file_path_helper::IsolatedFilePathDataParts;
use std::{
collections::HashSet,
path::{Component, Path, PathBuf},
@ -53,11 +55,6 @@ use metadata::SpacedriveLocationMetadataFile;
pub type LocationPubId = Uuid;
// Location includes!
location::include!(location_with_indexer_rules {
indexer_rules: select { indexer_rule }
});
/// `LocationCreateArgs` is the argument received from the client using `rspc` to create a new location.
/// It has the actual path and a vector of indexer rules ids, to create many-to-many relationships
/// between the location and indexer rules.
@ -867,52 +864,6 @@ pub async fn delete_directory(
Ok(())
}
impl From<location_with_indexer_rules::Data> for location::Data {
fn from(data: location_with_indexer_rules::Data) -> Self {
Self {
id: data.id,
pub_id: data.pub_id,
path: data.path,
instance_id: data.instance_id,
name: data.name,
total_capacity: data.total_capacity,
available_capacity: data.available_capacity,
is_archived: data.is_archived,
size_in_bytes: data.size_in_bytes,
generate_preview_media: data.generate_preview_media,
sync_preview_media: data.sync_preview_media,
hidden: data.hidden,
date_created: data.date_created,
file_paths: None,
indexer_rules: None,
instance: None,
}
}
}
impl From<&location_with_indexer_rules::Data> for location::Data {
fn from(data: &location_with_indexer_rules::Data) -> Self {
Self {
id: data.id,
pub_id: data.pub_id.clone(),
path: data.path.clone(),
instance_id: data.instance_id,
name: data.name.clone(),
total_capacity: data.total_capacity,
available_capacity: data.available_capacity,
size_in_bytes: data.size_in_bytes.clone(),
is_archived: data.is_archived,
generate_preview_media: data.generate_preview_media,
sync_preview_media: data.sync_preview_media,
hidden: data.hidden,
date_created: data.date_created,
file_paths: None,
indexer_rules: None,
instance: None,
}
}
}
async fn check_nested_location(
location_path: impl AsRef<Path>,
db: &PrismaClient,
@ -1049,8 +1000,8 @@ pub async fn create_file_path(
..
}: IsolatedFilePathDataParts<'_>,
cas_id: Option<String>,
metadata: sd_file_path_helper::FilePathMetadata,
) -> Result<file_path::Data, sd_file_path_helper::FilePathError> {
metadata: sd_core_file_path_helper::FilePathMetadata,
) -> Result<file_path::Data, sd_core_file_path_helper::FilePathError> {
use sd_utils::db::inode_to_db;
use sd_prisma::prisma;
@ -1063,7 +1014,7 @@ pub async fn create_file_path(
.select(location::select!({ id pub_id }))
.exec()
.await?
.ok_or(sd_file_path_helper::FilePathError::LocationNotFound(
.ok_or(sd_core_file_path_helper::FilePathError::LocationNotFound(
location_id,
))?;

View file

@ -8,10 +8,13 @@ use crate::{
Node,
};
use futures::Stream;
use itertools::Either;
use sd_core_file_path_helper::{path_is_hidden, MetadataExt};
use sd_core_indexer_rules::{
seed::{no_hidden, no_os_protected},
IndexerRule, RuleKind,
};
use sd_file_ext::{extensions::Extension, kind::ObjectKind};
use sd_file_path_helper::{path_is_hidden, MetadataExt};
use sd_prisma::prisma::location;
use sd_utils::{chain_optional_iter, error::FileIOError};
@ -23,6 +26,8 @@ use std::{
};
use chrono::{DateTime, Utc};
use futures::Stream;
use itertools::Either;
use rspc::ErrorCode;
use serde::Serialize;
use specta::Type;
@ -31,13 +36,7 @@ use tokio::{io, sync::mpsc, task::JoinError};
use tokio_stream::wrappers::ReceiverStream;
use tracing::{error, span, warn, Level};
use super::{
indexer::rules::{
seed::{no_hidden, no_os_protected},
IndexerRule, RuleKind,
},
normalize_path,
};
use super::normalize_path;
#[derive(Debug, Error)]
pub enum NonIndexedLocationError {

View file

@ -1,6 +1,7 @@
use crate::location::LocationError;
use sd_file_path_helper::FilePathError;
use sd_core_file_path_helper::FilePathError;
use sd_prisma::prisma::file_path;
use sd_utils::{
db::MissingFieldError,

View file

@ -1,6 +1,8 @@
use crate::location::LocationError;
use sd_file_path_helper::{file_path_with_object, IsolatedFilePathData};
use sd_core_file_path_helper::IsolatedFilePathData;
use sd_core_prisma_helpers::file_path_with_object;
use sd_prisma::prisma::{file_path, location, PrismaClient};
use sd_utils::{
db::maybe_missing,

View file

@ -7,7 +7,8 @@ use crate::{
},
};
use sd_file_path_helper::{join_location_relative_path, IsolatedFilePathData};
use sd_core_file_path_helper::{join_location_relative_path, IsolatedFilePathData};
use sd_prisma::prisma::{file_path, location};
use sd_utils::{db::maybe_missing, error::FileIOError};

View file

@ -8,7 +8,8 @@ use crate::{
},
};
use sd_file_path_helper::push_location_relative_path;
use sd_core_file_path_helper::push_location_relative_path;
use sd_prisma::prisma::{file_path, location};
use sd_utils::error::FileIOError;

View file

@ -8,7 +8,8 @@ use crate::{
},
};
use sd_file_path_helper::IsolatedFilePathData;
use sd_core_file_path_helper::IsolatedFilePathData;
use sd_prisma::prisma::{file_path, location};
use sd_utils::{db::maybe_missing, error::FileIOError};

View file

@ -1,7 +1,9 @@
use crate::old_job::JobRunErrors;
use sd_core_file_path_helper::IsolatedFilePathData;
use sd_core_prisma_helpers::file_path_for_media_processor;
use sd_file_ext::extensions::{Extension, ImageExtension, ALL_IMAGE_EXTENSIONS};
use sd_file_path_helper::{file_path_for_media_processor, IsolatedFilePathData};
use sd_media_metadata::ImageMetadata;
use sd_prisma::prisma::{location, media_data, PrismaClient};
use sd_utils::error::FileIOError;

View file

@ -11,11 +11,13 @@ use crate::{
#[cfg(feature = "ai")]
use crate::old_job::JobRunErrors;
use sd_file_ext::extensions::Extension;
use sd_file_path_helper::{
use sd_core_file_path_helper::{
ensure_file_path_exists, ensure_sub_path_is_directory, ensure_sub_path_is_in_location,
file_path_for_media_processor, IsolatedFilePathData,
IsolatedFilePathData,
};
use sd_core_prisma_helpers::file_path_for_media_processor;
use sd_file_ext::extensions::Extension;
use sd_prisma::prisma::{location, PrismaClient};
use sd_utils::db::maybe_missing;

View file

@ -1,6 +1,8 @@
use crate::old_job::{JobRunErrors, JobRunMetadata};
use sd_file_path_helper::{file_path_for_media_processor, FilePathError};
use sd_core_file_path_helper::FilePathError;
use sd_core_prisma_helpers::file_path_for_media_processor;
use sd_prisma::prisma::{location, PrismaClient};
use std::path::Path;

View file

@ -6,11 +6,13 @@ use crate::{
Node,
};
use sd_file_ext::extensions::Extension;
use sd_file_path_helper::{
use sd_core_file_path_helper::{
ensure_file_path_exists, ensure_sub_path_is_directory, ensure_sub_path_is_in_location,
file_path_for_media_processor, IsolatedFilePathData,
IsolatedFilePathData,
};
use sd_core_prisma_helpers::file_path_for_media_processor;
use sd_file_ext::extensions::Extension;
use sd_prisma::prisma::{location, PrismaClient};
use sd_utils::db::maybe_missing;

View file

@ -15,12 +15,6 @@ pub mod validation;
// Some Objects are purely virtual, unless they have one or more associated Paths, which refer to a file found in a Location
// Objects are what can be added to Spaces
// Object selectables!
object::select!(object_for_file_identifier {
pub_id
file_paths: select { pub_id cas_id extension is_dir materialized_path name }
});
// The response to provide the Explorer when looking at Objects
#[derive(Debug, Serialize, Deserialize, Type)]
pub struct ObjectsForExplorer {

View file

@ -1,11 +1,9 @@
use crate::{
library::Library,
object::{cas::generate_cas_id, object_for_file_identifier},
old_job::JobError,
};
use crate::{library::Library, object::cas::generate_cas_id, old_job::JobError};
use sd_core_file_path_helper::{FilePathError, IsolatedFilePathData};
use sd_core_prisma_helpers::{file_path_for_file_identifier, object_for_file_identifier};
use sd_file_ext::{extensions::Extension, kind::ObjectKind};
use sd_file_path_helper::{file_path_for_file_identifier, FilePathError, IsolatedFilePathData};
use sd_prisma::{
prisma::{file_path, location, object, PrismaClient},
prisma_sync,

View file

@ -6,10 +6,12 @@ use crate::{
},
};
use sd_file_path_helper::{
use sd_core_file_path_helper::{
ensure_file_path_exists, ensure_sub_path_is_directory, ensure_sub_path_is_in_location,
file_path_for_file_identifier, IsolatedFilePathData,
IsolatedFilePathData,
};
use sd_core_prisma_helpers::file_path_for_file_identifier;
use sd_prisma::prisma::{file_path, location, PrismaClient, SortOrder};
use sd_utils::db::maybe_missing;

View file

@ -1,9 +1,11 @@
use crate::{invalidate_query, library::Library, old_job::JobError};
use sd_file_path_helper::{
use sd_core_file_path_helper::{
ensure_file_path_exists, ensure_sub_path_is_directory, ensure_sub_path_is_in_location,
file_path_for_file_identifier, IsolatedFilePathData,
IsolatedFilePathData,
};
use sd_core_prisma_helpers::file_path_for_file_identifier;
use sd_prisma::prisma::{file_path, location, PrismaClient, SortOrder};
use sd_utils::db::maybe_missing;

View file

@ -1,4 +1,4 @@
use sd_file_path_helper::FilePathError;
use sd_core_file_path_helper::FilePathError;
use sd_utils::error::FileIOError;
use std::path::Path;

View file

@ -5,10 +5,12 @@ use crate::{
},
};
use sd_file_path_helper::{
use sd_core_file_path_helper::{
ensure_file_path_exists, ensure_sub_path_is_directory, ensure_sub_path_is_in_location,
file_path_for_object_validator, IsolatedFilePathData,
IsolatedFilePathData,
};
use sd_core_prisma_helpers::file_path_for_object_validator;
use sd_prisma::{
prisma::{file_path, location},
prisma_sync,

View file

@ -1,5 +1,7 @@
use crate::library::Library;
use sd_core_prisma_helpers::job_without_data;
use sd_prisma::prisma::job;
use sd_utils::db::{maybe_missing, MissingFieldError};
@ -24,22 +26,6 @@ pub enum JobReportUpdate {
Phase(String),
}
job::select!(job_without_data {
id
name
action
status
parent_id
errors_text
metadata
date_created
date_started
date_completed
task_count
completed_task_count
date_estimated_completion
});
#[derive(Debug, Serialize, Deserialize, Type, Clone)]
pub struct JobReport {
pub id: Uuid,

View file

@ -10,11 +10,15 @@ repository = { workspace = true }
edition = { workspace = true }
[dependencies]
sd-prisma = { path = "../prisma" }
# Inner Core Sub-crates
sd-core-file-path-helper = { path = "../../core/crates/file-path-helper" }
sd-core-prisma-helpers = { path = "../../core/crates/prisma-helpers" }
sd-core-sync = { path = "../../core/crates/sync" }
# Spacedrive Sub-crates
sd-prisma = { path = "../prisma" }
sd-sync = { path = "../sync" }
sd-utils = { path = "../utils" }
sd-file-path-helper = { path = "../file-path-helper" }
async-channel = { workspace = true }
chrono = { workspace = true, features = ["serde"] }

View file

@ -1,4 +1,5 @@
use sd_file_path_helper::file_path_for_media_processor;
use sd_core_prisma_helpers::file_path_for_media_processor;
use sd_prisma::prisma::{location, PrismaClient};
use sd_utils::error::FileIOError;

View file

@ -1,6 +1,6 @@
#![allow(non_camel_case_types)]
use sd_core_file_path_helper::IsolatedFilePathData;
use sd_core_prisma_helpers::file_path_for_media_processor;
use sd_file_path_helper::{file_path_for_media_processor, IsolatedFilePathData};
use sd_prisma::{
prisma::{file_path, label, label_on_object, object, PrismaClient},
prisma_sync,

View file

@ -6,14 +6,18 @@ edition.workspace = true
repository.workspace = true
[dependencies]
# Spacedrive Sub-crates
sd-p2p = { path = "../p2p" }
reqwest = "0.11.22"
serde.workspace = true
serde_json.workspace = true
thiserror = "1.0.50"
uuid.workspace = true
base64 = { workspace = true }
rmpv = { workspace = true }
rspc = { workspace = true }
specta.workspace = true
base64.workspace = true
rmpv.workspace = true
tracing.workspace = true
serde = { workspace = true }
serde_json = { workspace = true }
specta = { workspace = true }
thiserror = { workspace = true }
tracing = { workspace = true }
uuid = { workspace = true }
reqwest = "0.11.22"

View file

@ -7,9 +7,11 @@ edition.workspace = true
repository.workspace = true
[dependencies]
# Spacedrive Sub-crates
sd-p2p = { path = "../p2p" }
sd-p2p-proto = { path = "../p2p-proto" }
thiserror.workspace = true
tokio.workspace = true
tracing.workspace = true
uuid.workspace = true
thiserror = { workspace = true }
tokio = { workspace = true }
tracing = { workspace = true }
uuid = { workspace = true }

View file

@ -7,7 +7,8 @@ edition.workspace = true
repository.workspace = true
[dependencies]
ed25519-dalek = "2.1.1"
thiserror.workspace = true
thiserror = { workspace = true }
tokio = { workspace = true, features = ["io-util"] }
uuid.workspace = true
uuid = { workspace = true }
ed25519-dalek = "2.1.1"

View file

@ -7,5 +7,7 @@ edition.workspace = true
repository.workspace = true
[dependencies]
# Spacedrive Sub-crates
sd-p2p = { path = "../p2p" }
tokio = { workspace = true, features = ["io-util"] }

View file

@ -6,6 +6,7 @@ repository = { workspace = true }
edition = { workspace = true }
[dependencies]
# Spacedrive Sub-crates
sd-sync-generator = { path = "../sync-generator" }
prisma-client-rust-cli = { workspace = true }

View file

@ -4,6 +4,7 @@ version = "0.1.0"
edition = "2021"
[dependencies]
# Spacedrive Sub-crates
sd-cache = { path = "../cache" }
sd-sync = { path = "../sync" }

View file

@ -8,14 +8,17 @@ repository = { workspace = true }
edition = { workspace = true }
[dependencies]
serde_json = "1.0.85"
serde = { version = "1.0.145", features = ["derive"] }
# Spacedrive Sub-crates
sd-sync = { path = ".." }
axum = { workspace = true }
rspc = { workspace = true, features = ["axum"] }
tokio = { workspace = true, features = ["full"] }
prisma-client-rust = { workspace = true }
serde_json = { workspace = true }
serde = { workspace = true, features = ["derive"] }
uuid = { workspace = true, features = ["v4"] }
dotenv = "0.15.0"
tower-http = { version = "0.3.4", features = ["cors"] }
sd-sync = { path = ".." }
uuid = { workspace = true, features = ["v4"] }
http = "0.2.8"

View file

@ -34,9 +34,11 @@ pin-project = "1.1.4"
tokio = { workspace = true, features = ["macros", "test-util", "fs"] }
tempfile = { workspace = true }
rand = "0.8.5"
tracing-test = { version = "^0.2.4", features = ["no-env-filter"] }
tracing-test = { workspace.dev-dependencies = true, features = [
"no-env-filter",
] }
thiserror = { workspace = true }
lending-stream = "1.0.0"
lending-stream = { workspace = true }
serde = { workspace = true, features = ["derive"] }
rmp-serde = { workspace = true }
uuid = { workspace = true, features = ["serde"] }

View file

@ -94,8 +94,11 @@ mod task;
mod worker;
pub use error::{RunError, SystemError as TaskSystemError};
pub use system::{Dispatcher as TaskDispatcher, System as TaskSystem};
pub use task::{
AnyTaskOutput, ExecStatus, Interrupter, InterrupterFuture, InterruptionKind, IntoAnyTaskOutput,
IntoTask, Task, TaskHandle, TaskId, TaskOutput, TaskStatus,
pub use system::{
BaseDispatcher as BaseTaskDispatcher, Dispatcher as TaskDispatcher, System as TaskSystem,
};
pub use task::{
AnyTaskOutput, CancelTaskOnDrop, ExecStatus, Interrupter, InterrupterFuture, InterruptionKind,
IntoAnyTaskOutput, IntoTask, SerializableTask, Task, TaskHandle, TaskId, TaskOutput,
TaskRemoteController, TaskStatus,
};

View file

@ -23,7 +23,7 @@ pub enum SystemMessage {
CancelNotRunningTask {
task_id: TaskId,
worker_id: WorkerId,
ack: oneshot::Sender<Result<(), SystemError>>,
ack: oneshot::Sender<()>,
},
ForceAbortion {
task_id: TaskId,
@ -51,7 +51,7 @@ pub enum WorkerMessage<E: RunError> {
},
CancelNotRunningTask {
task_id: TaskId,
ack: oneshot::Sender<Result<(), SystemError>>,
ack: oneshot::Sender<()>,
},
ForceAbortion {
task_id: TaskId,

View file

@ -1,6 +1,8 @@
use std::{
cell::RefCell,
collections::HashSet,
fmt,
future::Future,
num::NonZeroUsize,
pin::pin,
sync::{
@ -30,7 +32,7 @@ use super::{
pub struct System<E: RunError> {
workers: Arc<Vec<Worker<E>>>,
msgs_tx: chan::Sender<SystemMessage>,
dispatcher: Dispatcher<E>,
dispatcher: BaseDispatcher<E>,
handle: RefCell<Option<JoinHandle<()>>>,
}
@ -94,7 +96,7 @@ impl<E: RunError> System<E> {
Self {
workers: Arc::clone(&workers),
msgs_tx,
dispatcher: Dispatcher {
dispatcher: BaseDispatcher {
workers,
idle_workers,
last_worker_id: Arc::new(AtomicWorkerId::new(0)),
@ -115,12 +117,18 @@ impl<E: RunError> System<E> {
}
/// Dispatches many tasks to the system, the tasks will be assigned to workers and executed as soon as possible.
pub async fn dispatch_many(&self, into_tasks: Vec<impl IntoTask<E>>) -> Vec<TaskHandle<E>> {
pub async fn dispatch_many<I: IntoIterator<Item = impl IntoTask<E>> + Send>(
&self,
into_tasks: I,
) -> Vec<TaskHandle<E>>
where
<I as IntoIterator>::IntoIter: Send,
{
self.dispatcher.dispatch_many(into_tasks).await
}
/// Returns a dispatcher that can be used to remotely dispatch tasks to the system.
pub fn get_dispatcher(&self) -> Dispatcher<E> {
pub fn get_dispatcher(&self) -> BaseDispatcher<E> {
self.dispatcher.clone()
}
@ -314,11 +322,7 @@ impl SystemComm {
.expect("System channel closed trying receive pause not running task response")
}
pub async fn cancel_not_running_task(
&self,
task_id: TaskId,
worker_id: WorkerId,
) -> Result<(), SystemError> {
pub async fn cancel_not_running_task(&self, task_id: TaskId, worker_id: WorkerId) {
let (tx, rx) = oneshot::channel();
self.0
@ -331,7 +335,7 @@ impl SystemComm {
.expect("System channel closed trying to cancel a not running task");
rx.await
.expect("System channel closed trying receive cancel a not running task response")
.expect("System channel closed trying receive cancel a not running task response");
}
pub async fn request_help(&self, worker_id: WorkerId, task_count: usize) {
@ -390,13 +394,45 @@ impl SystemComm {
/// It can be used to dispatch tasks to the system from other threads or tasks.
/// It uses [`Arc`] internally so it can be cheaply cloned and put inside tasks so tasks can dispatch other tasks.
#[derive(Debug)]
pub struct Dispatcher<E: RunError> {
pub struct BaseDispatcher<E: RunError> {
workers: Arc<Vec<Worker<E>>>,
idle_workers: Arc<Vec<AtomicBool>>,
last_worker_id: Arc<AtomicWorkerId>,
}
impl<E: RunError> Clone for Dispatcher<E> {
pub trait Dispatcher<E: RunError>: fmt::Debug + Clone + Send + Sync + 'static {
/// Dispatches a task to the system, the task will be assigned to a worker and executed as soon as possible.
fn dispatch(&self, into_task: impl IntoTask<E>) -> impl Future<Output = TaskHandle<E>> + Send {
self.dispatch_boxed(into_task.into_task())
}
/// Dispatches an already boxed task to the system, the task will be assigned to a worker and executed as
/// soon as possible.
fn dispatch_boxed(
&self,
boxed_task: Box<dyn Task<E>>,
) -> impl Future<Output = TaskHandle<E>> + Send;
/// Dispatches many tasks to the system, the tasks will be assigned to workers and executed as soon as possible.
fn dispatch_many<I: IntoIterator<Item = impl IntoTask<E>> + Send>(
&self,
into_tasks: I,
) -> impl Future<Output = Vec<TaskHandle<E>>> + Send
where
<I as IntoIterator>::IntoIter: Send,
{
self.dispatch_many_boxed(into_tasks.into_iter().map(IntoTask::into_task))
}
/// Dispatches many already boxed tasks to the system, the tasks will be assigned to workers and executed as
/// soon as possible.
fn dispatch_many_boxed(
&self,
boxed_tasks: impl IntoIterator<Item = Box<dyn Task<E>>> + Send,
) -> impl Future<Output = Vec<TaskHandle<E>>> + Send;
}
impl<E: RunError> Clone for BaseDispatcher<E> {
fn clone(&self) -> Self {
Self {
workers: Arc::clone(&self.workers),
@ -406,33 +442,35 @@ impl<E: RunError> Clone for Dispatcher<E> {
}
}
impl<E: RunError> Dispatcher<E> {
/// Dispatches a task to the system, the task will be assigned to a worker and executed as soon as possible.
pub async fn dispatch(&self, into_task: impl IntoTask<E>) -> TaskHandle<E> {
async fn inner<E: RunError>(this: &Dispatcher<E>, task: Box<dyn Task<E>>) -> TaskHandle<E> {
let worker_id = this
impl<E: RunError> Dispatcher<E> for BaseDispatcher<E> {
async fn dispatch(&self, into_task: impl IntoTask<E>) -> TaskHandle<E> {
self.dispatch_boxed(into_task.into_task()).await
}
#[allow(clippy::missing_panics_doc)]
async fn dispatch_boxed(&self, task: Box<dyn Task<E>>) -> TaskHandle<E> {
let worker_id = self
.last_worker_id
.fetch_update(Ordering::Release, Ordering::Acquire, |last_worker_id| {
Some((last_worker_id + 1) % this.workers.len())
Some((last_worker_id + 1) % self.workers.len())
})
.expect("we hardcoded the update function to always return Some(next_worker_id) through dispatcher");
trace!(
"Dispatching task to worker: <worker_id='{worker_id}', task_id='{}'>",
task.id()
);
let handle = this.workers[worker_id].add_task(task).await;
trace!(
"Dispatching task to worker: <worker_id='{worker_id}', task_id='{}'>",
task.id()
);
let handle = self.workers[worker_id].add_task(task).await;
this.idle_workers[worker_id].store(false, Ordering::Relaxed);
self.idle_workers[worker_id].store(false, Ordering::Relaxed);
handle
}
inner(self, into_task.into_task()).await
handle
}
/// Dispatches many tasks to the system, the tasks will be assigned to workers and executed as soon as possible.
pub async fn dispatch_many(&self, into_tasks: Vec<impl IntoTask<E>>) -> Vec<TaskHandle<E>> {
async fn dispatch_many_boxed(
&self,
into_tasks: impl IntoIterator<Item = Box<dyn Task<E>>> + Send,
) -> Vec<TaskHandle<E>> {
let mut workers_task_count = self
.workers
.iter()
@ -445,7 +483,6 @@ impl<E: RunError> Dispatcher<E> {
let (handles, workers_ids_set) = into_tasks
.into_iter()
.map(IntoTask::into_task)
.zip(workers_task_count.into_iter().cycle())
.map(|(task, (worker_id, _))| async move {
(self.workers[worker_id].add_task(task).await, worker_id)
@ -462,7 +499,9 @@ impl<E: RunError> Dispatcher<E> {
handles
}
}
impl<E: RunError> BaseDispatcher<E> {
/// Returns the number of workers in the system.
#[must_use]
pub fn workers_count(&self) -> usize {

View file

@ -13,6 +13,7 @@ use async_channel as chan;
use async_trait::async_trait;
use chan::{Recv, RecvError};
use downcast_rs::{impl_downcast, Downcast};
use futures::executor::block_on;
use tokio::sync::oneshot;
use tracing::{trace, warn};
use uuid::Uuid;
@ -61,7 +62,7 @@ pub enum TaskOutput {
#[derive(Debug)]
pub enum TaskStatus<E: RunError> {
/// The task has finished successfully and maybe has some output for the user.
Done(TaskOutput),
Done((TaskId, TaskOutput)),
/// Task was gracefully cancelled by the user.
Canceled,
/// Task was forcefully aborted by the user.
@ -123,7 +124,7 @@ impl<T: Task<E> + 'static, E: RunError> IntoTask<E> for T {
/// We're currently using the [`async_trait`](https://docs.rs/async-trait) crate to allow dyn async traits,
/// due to a limitation in the Rust language.
#[async_trait]
pub trait Task<E: RunError>: fmt::Debug + Downcast + Send + 'static {
pub trait Task<E: RunError>: fmt::Debug + Downcast + Send + Sync + 'static {
/// This method represent the work that should be done by the worker, it will be called by the
/// worker when there is a slot available in its internal queue.
/// We receive a `&mut self` so any internal data can be mutated on each `run` invocation.
@ -147,6 +148,21 @@ pub trait Task<E: RunError>: fmt::Debug + Downcast + Send + 'static {
impl_downcast!(Task<E> where E: RunError);
pub trait SerializableTask<E: RunError>: Task<E>
where
Self: Sized,
{
type SerializeError: std::error::Error + 'static;
type DeserializeError: std::error::Error + 'static;
type DeserializeCtx: 'static;
fn serialize(self) -> impl Future<Output = Result<Vec<u8>, Self::SerializeError>> + Send;
fn deserialize(
data: &[u8],
ctx: Self::DeserializeCtx,
) -> impl Future<Output = Result<Self, Self::DeserializeError>> + Send;
}
/// Intermediate struct to wait until a pause or a cancel commands are sent by the user.
#[must_use = "`InterrupterFuture` does nothing unless polled"]
#[pin_project::pin_project]
@ -164,7 +180,7 @@ impl Future for InterrupterFuture<'_> {
match this.fut.poll(cx) {
Poll::Ready(Ok(InterruptionRequest { kind, ack })) => {
if ack.send(Ok(())).is_err() {
if ack.send(()).is_err() {
warn!("TaskInterrupter ack channel closed");
}
this.has_interrupted.store(kind as u8, Ordering::Relaxed);
@ -218,7 +234,7 @@ impl Interrupter {
InterruptionKind::load(&self.has_interrupted).map_or_else(
|| {
if let Ok(InterruptionRequest { kind, ack }) = self.interrupt_rx.try_recv() {
if ack.send(Ok(())).is_err() {
if ack.send(()).is_err() {
warn!("TaskInterrupter ack channel closed");
}
@ -245,6 +261,39 @@ impl Interrupter {
}
}
#[macro_export]
macro_rules! check_interruption {
($interrupter:ident) => {
let interrupter: &Interrupter = $interrupter;
match interrupter.try_check_interrupt() {
Some($crate::InterruptionKind::Cancel) => return Ok($crate::ExecStatus::Canceled),
Some($crate::InterruptionKind::Pause) => return Ok($crate::ExecStatus::Paused),
None => { /* Everything is Awesome! */ }
}
};
($interrupter:ident, $instant:ident, $duration_accumulator:ident) => {
let interrupter: &Interrupter = $interrupter;
let instant: Instant = $instant;
let duration_accumulator: &mut Duration = $duration_accumulator;
match interrupter.try_check_interrupt() {
Some($crate::InterruptionKind::Cancel) => {
*duration_accumulator += instant.elapsed();
return Ok($crate::ExecStatus::Canceled);
}
Some($crate::InterruptionKind::Pause) => {
*duration_accumulator += instant.elapsed();
return Ok($crate::ExecStatus::Paused);
}
None => { /* Everything is Awesome! */ }
}
};
}
/// The kind of interruption that can be requested by the user, a pause or a cancel
#[derive(Debug, Clone, Copy)]
#[repr(u8)]
@ -266,30 +315,18 @@ impl InterruptionKind {
#[derive(Debug)]
pub struct InterruptionRequest {
kind: InterruptionKind,
ack: oneshot::Sender<Result<(), SystemError>>,
ack: oneshot::Sender<()>,
}
/// A handle returned when a task is dispatched to the task system, it can be used to pause, cancel, resume, or wait
/// until the task gets completed.
#[derive(Debug)]
pub struct TaskHandle<E: RunError> {
/// A remote controller of a task that can be used to pause, cancel, resume, or force abortion.
#[derive(Debug, Clone)]
pub struct TaskRemoteController {
pub(crate) worktable: Arc<TaskWorktable>,
pub(crate) done_rx: oneshot::Receiver<Result<TaskStatus<E>, SystemError>>,
pub(crate) system_comm: SystemComm,
pub(crate) task_id: TaskId,
}
impl<E: RunError> Future for TaskHandle<E> {
type Output = Result<TaskStatus<E>, SystemError>;
fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Self::Output> {
Pin::new(&mut self.done_rx)
.poll(cx)
.map(|res| res.expect("TaskHandle done channel unexpectedly closed"))
}
}
impl<E: RunError> TaskHandle<E> {
impl TaskRemoteController {
/// Get the unique identifier of the task
#[must_use]
pub const fn task_id(&self) -> TaskId {
@ -316,7 +353,7 @@ impl<E: RunError> TaskHandle<E> {
self.worktable.pause(tx).await;
rx.await.expect("Worker failed to ack pause request")?;
rx.await.expect("Worker failed to ack pause request");
} else {
trace!("Task is not running, setting is_paused flag");
self.worktable.is_paused.store(true, Ordering::Relaxed);
@ -338,7 +375,7 @@ impl<E: RunError> TaskHandle<E> {
/// # Panics
///
/// Will panic if the worker failed to ack the cancel request
pub async fn cancel(&self) -> Result<(), SystemError> {
pub async fn cancel(&self) {
let is_canceled = self.worktable.is_canceled.load(Ordering::Relaxed);
let is_done = self.worktable.is_done.load(Ordering::Relaxed);
@ -352,12 +389,11 @@ impl<E: RunError> TaskHandle<E> {
self.worktable.cancel(tx).await;
rx.await.expect("Worker failed to ack cancel request")?;
rx.await.expect("Worker failed to ack cancel request");
} else {
trace!("Task is not running, setting is_canceled flag");
self.worktable.is_canceled.store(true, Ordering::Relaxed);
return self
.system_comm
self.system_comm
.cancel_not_running_task(
self.task_id,
self.worktable.current_worker_id.load(Ordering::Relaxed),
@ -365,8 +401,6 @@ impl<E: RunError> TaskHandle<E> {
.await;
}
}
Ok(())
}
/// Forcefully abort the task, this can lead to corrupted data or inconsistent states, so use it with caution.
@ -390,6 +424,92 @@ impl<E: RunError> TaskHandle<E> {
)
.await
}
/// Verify if the task was already completed
#[must_use]
pub fn is_done(&self) -> bool {
self.worktable.is_done.load(Ordering::Relaxed)
}
}
/// A handle returned when a task is dispatched to the task system, it can be used to pause, cancel, resume, or wait
/// until the task gets completed.
#[derive(Debug)]
pub struct TaskHandle<E: RunError> {
pub(crate) done_rx: oneshot::Receiver<Result<TaskStatus<E>, SystemError>>,
pub(crate) controller: TaskRemoteController,
}
impl<E: RunError> Future for TaskHandle<E> {
type Output = Result<TaskStatus<E>, SystemError>;
fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Self::Output> {
Pin::new(&mut self.done_rx)
.poll(cx)
.map(|res| res.expect("TaskHandle done channel unexpectedly closed"))
}
}
impl<E: RunError> TaskHandle<E> {
/// Get the unique identifier of the task
#[must_use]
pub const fn task_id(&self) -> TaskId {
self.controller.task_id
}
/// Gracefully pause the task at a safe point defined by the user using the [`Interrupter`]
///
/// # Panics
///
/// Will panic if the worker failed to ack the pause request
pub async fn pause(&self) -> Result<(), SystemError> {
self.controller.pause().await
}
/// Gracefully cancel the task at a safe point defined by the user using the [`Interrupter`]
///
/// # Panics
///
/// Will panic if the worker failed to ack the cancel request
pub async fn cancel(&self) {
self.controller.cancel().await;
}
/// Forcefully abort the task, this can lead to corrupted data or inconsistent states, so use it with caution.
pub async fn force_abortion(&self) -> Result<(), SystemError> {
self.controller.force_abortion().await
}
/// Marks the task to be resumed by the task system, the worker will start processing it if there is a slot
/// available or will be enqueued otherwise.
pub async fn resume(&self) -> Result<(), SystemError> {
self.controller.resume().await
}
/// Gets the [`TaskRemoteController`] object that can be used to control the task remotely, to
/// pause, cancel, resume, or force abortion.
#[must_use]
pub fn remote_controller(&self) -> TaskRemoteController {
self.controller.clone()
}
}
/// A helper struct when you just want to cancel a task if its `TaskHandle` gets dropped.
pub struct CancelTaskOnDrop<E: RunError>(pub TaskHandle<E>);
impl<E: RunError> Future for CancelTaskOnDrop<E> {
type Output = Result<TaskStatus<E>, SystemError>;
fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Self::Output> {
Pin::new(&mut self.0).poll(cx)
}
}
impl<E: RunError> Drop for CancelTaskOnDrop<E> {
fn drop(&mut self) {
// FIXME: We should use async drop when it becomes stable
block_on(self.0.cancel());
}
}
#[derive(Debug)]
@ -436,7 +556,7 @@ impl TaskWorktable {
self.is_aborted.store(true, Ordering::Relaxed);
}
pub async fn pause(&self, tx: oneshot::Sender<Result<(), SystemError>>) {
pub async fn pause(&self, tx: oneshot::Sender<()>) {
self.is_paused.store(true, Ordering::Relaxed);
self.is_running.store(false, Ordering::Relaxed);
@ -451,7 +571,7 @@ impl TaskWorktable {
.expect("Worker channel closed trying to pause task");
}
pub async fn cancel(&self, tx: oneshot::Sender<Result<(), SystemError>>) {
pub async fn cancel(&self, tx: oneshot::Sender<()>) {
self.is_canceled.store(true, Ordering::Relaxed);
self.is_running.store(false, Ordering::Relaxed);

View file

@ -8,6 +8,8 @@ use async_channel as chan;
use tokio::{spawn, sync::oneshot, task::JoinHandle};
use tracing::{error, info, trace, warn};
use crate::task::TaskRemoteController;
use super::{
error::{RunError, SystemError},
message::WorkerMessage,
@ -127,10 +129,12 @@ impl<E: RunError> Worker<E> {
.expect("Worker channel closed trying to add task");
TaskHandle {
worktable,
done_rx,
system_comm: self.system_comm.clone(),
task_id,
controller: TaskRemoteController {
worktable,
system_comm: self.system_comm.clone(),
task_id,
},
}
}
@ -168,11 +172,7 @@ impl<E: RunError> Worker<E> {
.expect("Worker channel closed trying to pause a not running task");
}
pub async fn cancel_not_running_task(
&self,
task_id: TaskId,
ack: oneshot::Sender<Result<(), SystemError>>,
) {
pub async fn cancel_not_running_task(&self, task_id: TaskId, ack: oneshot::Sender<()>) {
self.msgs_tx
.send(WorkerMessage::CancelNotRunningTask { task_id, ack })
.await

View file

@ -65,7 +65,7 @@ pub(super) async fn run<E: RunError>(
StreamMessage::Commands(WorkerMessage::CancelNotRunningTask { task_id, ack }) => {
runner.cancel_not_running_task(task_id);
if ack.send(Ok(())).is_err() {
if ack.send(()).is_err() {
warn!("Resume task channel closed before sending ack");
}
}

View file

@ -965,6 +965,7 @@ impl<E: RunError> Runner<E> {
) {
match status {
InternalTaskExecStatus::Done(out) => {
self.task_kinds.remove(&task_id);
send_complete_task_response(self.worker_id, task_id, task_work_state, out);
}
@ -977,10 +978,12 @@ impl<E: RunError> Runner<E> {
}
InternalTaskExecStatus::Canceled => {
self.task_kinds.remove(&task_id);
send_cancel_task_response(self.worker_id, task_id, task_work_state);
}
InternalTaskExecStatus::Error(e) => {
self.task_kinds.remove(&task_id);
send_error_task_response(self.worker_id, task_id, task_work_state, e);
}
@ -1057,7 +1060,7 @@ impl<E: RunError> Runner<E> {
}
if self.task_kinds.capacity() > TASK_QUEUE_INITIAL_SIZE {
assert_eq!(self.task_kinds.len(), 0);
assert_eq!(self.task_kinds.len(), self.paused_tasks.len());
self.task_kinds.shrink_to(TASK_QUEUE_INITIAL_SIZE);
}
@ -1190,15 +1193,10 @@ fn handle_task_suspension(
worktable.pause(tx).await;
match rx.await {
Ok(Ok(())) => {
Ok(()) => {
trace!("Suspending: <worker_id='{worker_id}', task_id='{task_id}'>");
has_suspended.store(true, Ordering::Relaxed);
}
Ok(Err(e)) => {
error!(
"Task <worker_id='{worker_id}', task_id='{task_id}'> failed to suspend: {e:#?}",
);
}
Err(_) => {
// The task probably finished before we could suspend it so the channel was dropped
trace!(
@ -1408,7 +1406,7 @@ fn send_complete_task_response<E: RunError>(
out: TaskOutput,
) {
worktable.set_completed();
if done_tx.send(Ok(TaskStatus::Done(out))).is_err() {
if done_tx.send(Ok(TaskStatus::Done((task_id, out)))).is_err() {
warn!(
"Task done channel closed before sending done response for task: \
<worker_id='{worker_id}', task_id='{task_id}'>"

Some files were not shown because too many files have changed in this diff Show more