mirror of
https://github.com/spacedriveapp/spacedrive
synced 2024-07-04 12:13:27 +00:00
[ENG-1628] Write new indexer with the task system (#2161)
* Moving file-path-helper to a sub-crate on core * Parallel walker tested and working * Change inner core crate names to sd-core-* naming scheme * Moving stuff around * Save and Update tasks for the new indexer job * Some initial drafts on the new job system * More drafts on job system * Changing walker task to a more explicit state machine Also drafting more of job system * More drafting on job resume * Completed the draft on job system inner workings * New job context abstraction to decouple library stuff from job system * Properly use composition on task dispatcher * First draft on indexer job * Job serialization * Handling ancestors in the distributed walker for indexer * Saving computed directories sizes on a location to db * Enable a way to check if jobs are running in a location * Progress reporting on indexer job * Reorganizing modules * Shallow indexer * Rust fmt * Attempting windows CI fix * Attempting windows CI fix again * Attempting windows CI fix again
This commit is contained in:
parent
11555c583a
commit
6b760b0b2a
125
Cargo.lock
generated
125
Cargo.lock
generated
|
@ -6454,7 +6454,9 @@ version = "0.10.1"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fabbf1ead8a5bcbc20f5f8b939ee3f5b0f6f281b6ad3468b84656b658b455259"
|
||||
dependencies = [
|
||||
"phf_macros 0.10.0",
|
||||
"phf_shared 0.10.0",
|
||||
"proc-macro-hack",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -6531,6 +6533,20 @@ dependencies = [
|
|||
"syn 1.0.109",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "phf_macros"
|
||||
version = "0.10.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "58fdf3184dd560f160dd73922bea2d5cd6e8f064bf4b13110abd81b03697b4e0"
|
||||
dependencies = [
|
||||
"phf_generator 0.10.0",
|
||||
"phf_shared 0.10.0",
|
||||
"proc-macro-hack",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 1.0.109",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "phf_macros"
|
||||
version = "0.11.2"
|
||||
|
@ -8115,8 +8131,9 @@ dependencies = [
|
|||
"reqwest",
|
||||
"rmp-serde",
|
||||
"rmpv",
|
||||
"sd-core-file-path-helper",
|
||||
"sd-core-prisma-helpers",
|
||||
"sd-core-sync",
|
||||
"sd-file-path-helper",
|
||||
"sd-prisma",
|
||||
"sd-sync",
|
||||
"sd-utils",
|
||||
|
@ -8220,11 +8237,14 @@ dependencies = [
|
|||
"sd-ai",
|
||||
"sd-cache",
|
||||
"sd-cloud-api",
|
||||
"sd-core-file-path-helper",
|
||||
"sd-core-heavy-lifting",
|
||||
"sd-core-indexer-rules",
|
||||
"sd-core-prisma-helpers",
|
||||
"sd-core-sync",
|
||||
"sd-crypto",
|
||||
"sd-ffmpeg",
|
||||
"sd-file-ext",
|
||||
"sd-file-path-helper",
|
||||
"sd-images",
|
||||
"sd-media-metadata",
|
||||
"sd-p2p",
|
||||
|
@ -8260,6 +8280,90 @@ dependencies = [
|
|||
"webp",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sd-core-file-path-helper"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"chrono",
|
||||
"prisma-client-rust",
|
||||
"regex",
|
||||
"sd-core-prisma-helpers",
|
||||
"sd-prisma",
|
||||
"sd-utils",
|
||||
"serde",
|
||||
"thiserror",
|
||||
"tokio",
|
||||
"tracing",
|
||||
"winapi-util",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sd-core-heavy-lifting"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"async-channel",
|
||||
"async-trait",
|
||||
"chrono",
|
||||
"futures",
|
||||
"futures-concurrency",
|
||||
"globset",
|
||||
"itertools 0.12.0",
|
||||
"lending-stream",
|
||||
"prisma-client-rust",
|
||||
"rmp-serde",
|
||||
"rmpv",
|
||||
"rspc",
|
||||
"sd-core-file-path-helper",
|
||||
"sd-core-indexer-rules",
|
||||
"sd-core-prisma-helpers",
|
||||
"sd-core-sync",
|
||||
"sd-prisma",
|
||||
"sd-sync",
|
||||
"sd-task-system",
|
||||
"sd-utils",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"specta",
|
||||
"strum",
|
||||
"tempfile",
|
||||
"thiserror",
|
||||
"tokio",
|
||||
"tokio-stream",
|
||||
"tracing",
|
||||
"tracing-test",
|
||||
"uuid",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sd-core-indexer-rules"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"chrono",
|
||||
"futures-concurrency",
|
||||
"globset",
|
||||
"prisma-client-rust",
|
||||
"rmp-serde",
|
||||
"rspc",
|
||||
"sd-prisma",
|
||||
"sd-utils",
|
||||
"serde",
|
||||
"specta",
|
||||
"tempfile",
|
||||
"thiserror",
|
||||
"tokio",
|
||||
"tracing",
|
||||
"uuid",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sd-core-prisma-helpers"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"prisma-client-rust",
|
||||
"sd-prisma",
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sd-core-sync"
|
||||
version = "0.0.0"
|
||||
|
@ -8414,22 +8518,6 @@ dependencies = [
|
|||
"tokio",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sd-file-path-helper"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"chrono",
|
||||
"prisma-client-rust",
|
||||
"regex",
|
||||
"sd-prisma",
|
||||
"sd-utils",
|
||||
"serde",
|
||||
"thiserror",
|
||||
"tokio",
|
||||
"tracing",
|
||||
"winapi-util",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sd-images"
|
||||
version = "0.0.0"
|
||||
|
@ -9450,6 +9538,7 @@ version = "0.25.0"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "290d54ea6f91c969195bdbcd7442c8c2a2ba87da8bf60a7ee86a235d4bc1e125"
|
||||
dependencies = [
|
||||
"phf 0.10.1",
|
||||
"strum_macros",
|
||||
]
|
||||
|
||||
|
|
|
@ -59,9 +59,12 @@ chrono = "0.4.31"
|
|||
clap = "4.4.7"
|
||||
futures = "0.3.30"
|
||||
futures-concurrency = "7.4.3"
|
||||
globset = "^0.4.13"
|
||||
hex = "0.4.3"
|
||||
http = "0.2.9"
|
||||
image = "0.24.7"
|
||||
itertools = "0.12.0"
|
||||
lending-stream = "1.0.0"
|
||||
normpath = "1.1.1"
|
||||
once_cell = "1.18.0"
|
||||
pin-project-lite = "0.2.13"
|
||||
|
@ -84,6 +87,9 @@ uhlc = "=0.5.2"
|
|||
uuid = "1.5.0"
|
||||
webp = "0.2.6"
|
||||
|
||||
[workspace.dev-dependencies]
|
||||
tracing-test = { version = "^0.2.4" }
|
||||
|
||||
[patch.crates-io]
|
||||
# Proper IOS Support
|
||||
if-watch = { git = "https://github.com/oscartbeaumont/if-watch.git", rev = "a92c17d3f85c1c6fb0afeeaf6c2b24d0b147e8c3" }
|
||||
|
|
|
@ -6,6 +6,7 @@ repository = { workspace = true }
|
|||
edition = { workspace = true }
|
||||
|
||||
[dependencies]
|
||||
# Spacedrive Sub-crates
|
||||
sd-crypto = { path = "../../crates/crypto" }
|
||||
|
||||
anyhow = { workspace = true }
|
||||
|
|
|
@ -9,7 +9,11 @@ repository = { workspace = true }
|
|||
edition = { workspace = true }
|
||||
|
||||
[dependencies]
|
||||
sd-core = { path = "../../../core", features = ["ffmpeg", "heif"] }
|
||||
# Spacedrive Sub-crates
|
||||
sd-core = { path = "../../../core", features = [
|
||||
"ffmpeg",
|
||||
"heif",
|
||||
] }
|
||||
sd-fda = { path = "../../../crates/fda" }
|
||||
sd-prisma = { path = "../../../crates/prisma" }
|
||||
|
||||
|
|
|
@ -11,6 +11,7 @@ edition = { workspace = true }
|
|||
crate-type = ["cdylib"]
|
||||
|
||||
[dependencies]
|
||||
# Spacedrive Sub-crates
|
||||
sd-mobile-core = { path = "../../core" }
|
||||
|
||||
# FFI
|
||||
|
|
|
@ -7,6 +7,7 @@ repository = { workspace = true }
|
|||
edition = { workspace = true }
|
||||
|
||||
[dependencies]
|
||||
# Spacedrive Sub-crates
|
||||
sd-core = { path = "../../../../../core", features = [
|
||||
"mobile",
|
||||
], default-features = false }
|
||||
|
|
|
@ -14,4 +14,5 @@ edition = { workspace = true }
|
|||
crate-type = ["staticlib"]
|
||||
|
||||
[dependencies]
|
||||
# Spacedrive Sub-crates
|
||||
sd-mobile-core = { path = "../../core" }
|
||||
|
|
|
@ -11,6 +11,7 @@ assets = []
|
|||
ai-models = ["sd-core/ai"]
|
||||
|
||||
[dependencies]
|
||||
# Spacedrive Sub-crates
|
||||
sd-core = { path = "../../core", features = [
|
||||
"ffmpeg",
|
||||
"heif",
|
||||
|
|
|
@ -19,11 +19,18 @@ ai = ["dep:sd-ai"]
|
|||
crypto = ["dep:sd-crypto"]
|
||||
|
||||
[dependencies]
|
||||
# Sub-crates
|
||||
sd-cache = { path = "../crates/cache" }
|
||||
# Inner Core Sub-crates
|
||||
sd-core-file-path-helper = { path = "./crates/file-path-helper" }
|
||||
sd-core-heavy-lifting = { path = "./crates/heavy-lifting" }
|
||||
sd-core-indexer-rules = { path = "./crates/indexer-rules" }
|
||||
sd-core-prisma-helpers = { path = "./crates/prisma-helpers" }
|
||||
sd-core-sync = { path = "./crates/sync" }
|
||||
# sd-cloud-api = { path = "../crates/cloud-api" }
|
||||
sd-file-path-helper = { path = "../crates/file-path-helper" }
|
||||
|
||||
# Spacedrive Sub-crates
|
||||
sd-actors = { version = "0.1.0", path = "../crates/actors" }
|
||||
sd-ai = { path = "../crates/ai", optional = true }
|
||||
sd-cache = { path = "../crates/cache" }
|
||||
sd-cloud-api = { version = "0.1.0", path = "../crates/cloud-api" }
|
||||
sd-crypto = { path = "../crates/crypto", features = [
|
||||
"sys",
|
||||
"tokio",
|
||||
|
@ -41,10 +48,8 @@ sd-p2p-block = { path = "../crates/p2p-block" }
|
|||
sd-p2p-proto = { path = "../crates/p2p-proto" }
|
||||
sd-p2p-tunnel = { path = "../crates/p2p-tunnel" }
|
||||
sd-prisma = { path = "../crates/prisma" }
|
||||
sd-ai = { path = "../crates/ai", optional = true }
|
||||
sd-sync = { path = "../crates/sync" }
|
||||
sd-utils = { path = "../crates/utils" }
|
||||
sd-cloud-api = { version = "0.1.0", path = "../crates/cloud-api" }
|
||||
|
||||
# Workspace dependencies
|
||||
async-channel = { workspace = true }
|
||||
|
@ -56,6 +61,7 @@ chrono = { workspace = true, features = ["serde"] }
|
|||
futures = { workspace = true }
|
||||
futures-concurrency = { workspace = true }
|
||||
image = { workspace = true }
|
||||
itertools = { workspace = true }
|
||||
normpath = { workspace = true, features = ["localization"] }
|
||||
once_cell = { workspace = true }
|
||||
pin-project-lite = { workspace = true }
|
||||
|
@ -63,6 +69,7 @@ prisma-client-rust = { workspace = true, features = ["rspc"] }
|
|||
regex = { workspace = true }
|
||||
reqwest = { workspace = true, features = ["json", "native-tls-vendored"] }
|
||||
rmp-serde = { workspace = true }
|
||||
rmpv = { workspace = true }
|
||||
rspc = { workspace = true, features = [
|
||||
"axum",
|
||||
"uuid",
|
||||
|
@ -98,22 +105,25 @@ webp = { workspace = true }
|
|||
# Specific Core dependencies
|
||||
async-recursion = "1.0.5"
|
||||
async-stream = "0.3.5"
|
||||
aws-sdk-s3 = { version = "1.5.0", features = ["behavior-version-latest"] }
|
||||
aws-config = "1.0.3"
|
||||
aws-credential-types = "1.0.3"
|
||||
base91 = "0.1.0"
|
||||
bytes = "1.5.0"
|
||||
ctor = "0.2.5"
|
||||
directories = "5.0.1"
|
||||
flate2 = "1.0.28"
|
||||
globset = { version = "^0.4.13", features = ["serde1"] }
|
||||
hostname = "0.3.1"
|
||||
http-body = "0.4.5"
|
||||
http-range = "0.1.5"
|
||||
hyper = { version = "=0.14.28", features = ["http1", "server", "client"] }
|
||||
int-enum = "0.5.0"
|
||||
itertools = "0.12.0"
|
||||
libc = "0.2.153"
|
||||
mini-moka = "0.10.2"
|
||||
notify = { git = "https://github.com/notify-rs/notify.git", rev = "c3929ed114fbb0bc7457a9a498260461596b00ca", default-features = false, features = [
|
||||
"macos_fsevent",
|
||||
] }
|
||||
rmpv = { workspace = true }
|
||||
rmp = "0.8.12"
|
||||
serde-hashkey = "0.4.5"
|
||||
serde_repr = "0.1"
|
||||
serde_with = "3.4.0"
|
||||
|
@ -121,14 +131,7 @@ slotmap = "1.0.6"
|
|||
static_assertions = "1.1.0"
|
||||
sysinfo = "0.29.10"
|
||||
tar = "0.4.40"
|
||||
aws-sdk-s3 = { version = "1.5.0", features = ["behavior-version-latest"] }
|
||||
aws-config = "1.0.3"
|
||||
aws-credential-types = "1.0.3"
|
||||
base91 = "0.1.0"
|
||||
sd-actors = { version = "0.1.0", path = "../crates/actors" }
|
||||
tower-service = "0.3.2"
|
||||
hyper = { version = "=0.14.28", features = ["http1", "server", "client"] }
|
||||
rmp = "0.8.12"
|
||||
|
||||
# Override features of transitive dependencies
|
||||
[dependencies.openssl]
|
||||
|
@ -151,5 +154,6 @@ icrate = { version = "0.1.0", features = [
|
|||
] }
|
||||
|
||||
[dev-dependencies]
|
||||
tracing-test = "^0.2.4"
|
||||
tracing-test = { workspace.dev-dependencies = true }
|
||||
aovec = "1.1.0"
|
||||
globset = { workspace = true }
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
[package]
|
||||
name = "sd-file-path-helper"
|
||||
name = "sd-core-file-path-helper"
|
||||
version = "0.1.0"
|
||||
authors = ["Ericson Soares <ericson@spacedrive.com>"]
|
||||
readme = "README.md"
|
||||
|
@ -9,8 +9,12 @@ repository = { workspace = true }
|
|||
edition = { workspace = true }
|
||||
|
||||
[dependencies]
|
||||
sd-prisma = { path = "../prisma" }
|
||||
sd-utils = { path = "../utils" }
|
||||
# Inner Core Sub-crates
|
||||
sd-core-prisma-helpers = { path = "../prisma-helpers" }
|
||||
|
||||
# Spacedrive Sub-crates
|
||||
sd-prisma = { path = "../../../crates/prisma" }
|
||||
sd-utils = { path = "../../../crates/utils" }
|
||||
|
||||
chrono = { workspace = true, features = ["serde"] }
|
||||
prisma-client-rust = { workspace = true }
|
|
@ -1,3 +1,10 @@
|
|||
use sd_core_prisma_helpers::{
|
||||
file_path_for_file_identifier, file_path_for_media_processor, file_path_for_object_validator,
|
||||
file_path_to_full_path, file_path_to_handle_custom_uri, file_path_to_handle_p2p_serve_file,
|
||||
file_path_to_isolate, file_path_to_isolate_with_id, file_path_to_isolate_with_pub_id,
|
||||
file_path_walker, file_path_with_object,
|
||||
};
|
||||
|
||||
use sd_prisma::prisma::{file_path, location};
|
||||
use sd_utils::error::NonUtf8PathError;
|
||||
|
||||
|
@ -11,12 +18,7 @@ use std::{
|
|||
use regex::RegexSet;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use super::{
|
||||
file_path_for_file_identifier, file_path_for_media_processor, file_path_for_object_validator,
|
||||
file_path_to_full_path, file_path_to_handle_custom_uri, file_path_to_handle_p2p_serve_file,
|
||||
file_path_to_isolate, file_path_to_isolate_with_id, file_path_walker, file_path_with_object,
|
||||
FilePathError,
|
||||
};
|
||||
use super::FilePathError;
|
||||
|
||||
static FORBIDDEN_FILE_NAMES: OnceLock<RegexSet> = OnceLock::new();
|
||||
|
||||
|
@ -30,7 +32,7 @@ pub struct IsolatedFilePathDataParts<'a> {
|
|||
relative_path: &'a str,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Hash, Eq, PartialEq)]
|
||||
#[derive(Serialize, Deserialize, Debug, Hash, Eq, PartialEq, Clone, Default)]
|
||||
#[non_exhaustive]
|
||||
pub struct IsolatedFilePathData<'a> {
|
||||
// WARN! These fields MUST NOT be changed outside the location module, that's why they have this visibility
|
||||
|
@ -88,14 +90,22 @@ impl IsolatedFilePathData<'static> {
|
|||
}
|
||||
|
||||
impl<'a> IsolatedFilePathData<'a> {
|
||||
pub fn location_id(&self) -> location::id::Type {
|
||||
#[must_use]
|
||||
pub const fn location_id(&self) -> location::id::Type {
|
||||
self.location_id
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn extension(&self) -> &str {
|
||||
self.extension.as_ref()
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub const fn is_dir(&self) -> bool {
|
||||
self.is_dir
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn is_root(&self) -> bool {
|
||||
self.is_dir
|
||||
&& self.materialized_path == "/"
|
||||
|
@ -103,6 +113,7 @@ impl<'a> IsolatedFilePathData<'a> {
|
|||
&& self.relative_path.is_empty()
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn to_parts(&self) -> IsolatedFilePathDataParts<'_> {
|
||||
IsolatedFilePathDataParts {
|
||||
location_id: self.location_id,
|
||||
|
@ -114,6 +125,12 @@ impl<'a> IsolatedFilePathData<'a> {
|
|||
}
|
||||
}
|
||||
|
||||
/// Return the `IsolatedFilePath` for the parent of the current file or directory.
|
||||
///
|
||||
/// # Panics
|
||||
/// May panic if the materialized path was malformed, without a slash for the parent directory.
|
||||
/// Considering that the parent can be just `/` for the root directory.
|
||||
#[must_use]
|
||||
pub fn parent(&'a self) -> Self {
|
||||
let (parent_path_str, name, relative_path) = if self.materialized_path == "/" {
|
||||
("/", "", "")
|
||||
|
@ -124,7 +141,7 @@ impl<'a> IsolatedFilePathData<'a> {
|
|||
.expect("malformed materialized path at `parent` method");
|
||||
|
||||
(
|
||||
&self.materialized_path[..last_slash_idx + 1],
|
||||
&self.materialized_path[..=last_slash_idx],
|
||||
&self.materialized_path[last_slash_idx + 1..trailing_slash_idx],
|
||||
&self.materialized_path[1..trailing_slash_idx],
|
||||
)
|
||||
|
@ -159,6 +176,7 @@ impl<'a> IsolatedFilePathData<'a> {
|
|||
}
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn full_name(&self) -> String {
|
||||
if self.extension.is_empty() {
|
||||
self.name.to_string()
|
||||
|
@ -167,6 +185,7 @@ impl<'a> IsolatedFilePathData<'a> {
|
|||
}
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn materialized_path_for_children(&self) -> Option<String> {
|
||||
if self.materialized_path == "/" && self.name.is_empty() && self.is_dir {
|
||||
// We're at the root file_path
|
||||
|
@ -186,19 +205,21 @@ impl<'a> IsolatedFilePathData<'a> {
|
|||
));
|
||||
}
|
||||
|
||||
if let Some(last_dot_idx) = source.rfind('.') {
|
||||
source.rfind('.').map_or_else(
|
||||
|| Ok((source, "")), // It's a file without extension
|
||||
|last_dot_idx| {
|
||||
if last_dot_idx == 0 {
|
||||
// The dot is the first character, so it's a hidden file
|
||||
Ok((source, ""))
|
||||
} else {
|
||||
Ok((&source[..last_dot_idx], &source[last_dot_idx + 1..]))
|
||||
}
|
||||
} else {
|
||||
// It's a file without extension
|
||||
Ok((source, ""))
|
||||
}
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
#[allow(clippy::missing_panics_doc)] // Don't actually panic as the regexes are hardcoded
|
||||
#[must_use]
|
||||
pub fn accept_file_name(name: &str) -> bool {
|
||||
let reg = {
|
||||
// Maybe we should enforce windows more restrictive rules on all platforms?
|
||||
|
@ -224,6 +245,7 @@ impl<'a> IsolatedFilePathData<'a> {
|
|||
!reg.is_match(name)
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn separate_path_name_and_extension_from_str(
|
||||
source: &'a str,
|
||||
is_dir: bool,
|
||||
|
@ -253,22 +275,25 @@ impl<'a> IsolatedFilePathData<'a> {
|
|||
} else {
|
||||
let first_name_char_idx = source.rfind('/').unwrap_or(0) + 1;
|
||||
let end_idx = first_name_char_idx - 1;
|
||||
if let Some(last_dot_relative_idx) = source[first_name_char_idx..].rfind('.') {
|
||||
source[first_name_char_idx..].rfind('.').map_or_else(
|
||||
|| {
|
||||
(
|
||||
&source[..end_idx],
|
||||
Some(&source[first_name_char_idx..]),
|
||||
None,
|
||||
)
|
||||
},
|
||||
|last_dot_relative_idx| {
|
||||
let last_dot_idx = first_name_char_idx + last_dot_relative_idx;
|
||||
(
|
||||
&source[..end_idx],
|
||||
Some(&source[first_name_char_idx..last_dot_idx]),
|
||||
Some(&source[last_dot_idx + 1..]),
|
||||
)
|
||||
} else {
|
||||
(
|
||||
&source[..end_idx],
|
||||
Some(&source[first_name_char_idx..]),
|
||||
None,
|
||||
},
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn prepare_name(path: &Path, is_dir: bool) -> &str {
|
||||
// Not using `impl AsRef<Path>` here because it's an private method
|
||||
|
@ -282,6 +307,7 @@ impl<'a> IsolatedFilePathData<'a> {
|
|||
.unwrap_or_default()
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn from_db_data(
|
||||
location_id: location::id::Type,
|
||||
is_dir: bool,
|
||||
|
@ -465,6 +491,7 @@ mod macros {
|
|||
impl_from_db!(
|
||||
file_path,
|
||||
file_path_to_isolate,
|
||||
file_path_to_isolate_with_pub_id,
|
||||
file_path_walker,
|
||||
file_path_to_isolate_with_id,
|
||||
file_path_with_object
|
||||
|
@ -514,19 +541,21 @@ pub fn extract_normalized_materialized_path_str(
|
|||
path: path.into(),
|
||||
})?
|
||||
.parent()
|
||||
.map(|materialized_path| {
|
||||
.map_or_else(
|
||||
|| Ok("/".to_string()),
|
||||
|materialized_path| {
|
||||
materialized_path
|
||||
.to_str()
|
||||
.map(|materialized_path_str| {
|
||||
if !materialized_path_str.is_empty() {
|
||||
format!("/{}/", materialized_path_str.replace('\\', "/"))
|
||||
} else {
|
||||
if materialized_path_str.is_empty() {
|
||||
"/".to_string()
|
||||
} else {
|
||||
format!("/{}/", materialized_path_str.replace('\\', "/"))
|
||||
}
|
||||
})
|
||||
.ok_or_else(|| NonUtf8PathError(path.into()))
|
||||
})
|
||||
.unwrap_or_else(|| Ok("/".to_string()))
|
||||
},
|
||||
)
|
||||
.map_err(Into::into)
|
||||
}
|
||||
|
||||
|
@ -544,6 +573,7 @@ fn assemble_relative_path(
|
|||
}
|
||||
}
|
||||
|
||||
#[allow(clippy::missing_panics_doc)] // Don't actually panic as we check before `expect`
|
||||
pub fn join_location_relative_path(
|
||||
location_path: impl AsRef<Path>,
|
||||
relative_path: impl AsRef<Path>,
|
||||
|
@ -561,6 +591,7 @@ pub fn join_location_relative_path(
|
|||
})
|
||||
}
|
||||
|
||||
#[allow(clippy::missing_panics_doc)] // Don't actually panic as we check before `expect`
|
||||
pub fn push_location_relative_path(
|
||||
mut location_path: PathBuf,
|
||||
relative_path: impl AsRef<Path>,
|
|
@ -1,3 +1,32 @@
|
|||
#![warn(
|
||||
clippy::all,
|
||||
clippy::pedantic,
|
||||
clippy::correctness,
|
||||
clippy::perf,
|
||||
clippy::style,
|
||||
clippy::suspicious,
|
||||
clippy::complexity,
|
||||
clippy::nursery,
|
||||
clippy::unwrap_used,
|
||||
unused_qualifications,
|
||||
rust_2018_idioms,
|
||||
trivial_casts,
|
||||
trivial_numeric_casts,
|
||||
unused_allocation,
|
||||
clippy::unnecessary_cast,
|
||||
clippy::cast_lossless,
|
||||
clippy::cast_possible_truncation,
|
||||
clippy::cast_possible_wrap,
|
||||
clippy::cast_precision_loss,
|
||||
clippy::cast_sign_loss,
|
||||
clippy::dbg_macro,
|
||||
clippy::deprecated_cfg_attr,
|
||||
clippy::separated_literal_suffix,
|
||||
deprecated
|
||||
)]
|
||||
#![forbid(deprecated_in_future)]
|
||||
#![allow(clippy::missing_errors_doc, clippy::module_name_repetitions)]
|
||||
|
||||
use sd_prisma::prisma::{file_path, location, PrismaClient};
|
||||
use sd_utils::error::{FileIOError, NonUtf8PathError};
|
||||
|
||||
|
@ -21,107 +50,6 @@ pub use isolated_file_path_data::{
|
|||
IsolatedFilePathDataParts,
|
||||
};
|
||||
|
||||
// File Path selectables!
|
||||
file_path::select!(file_path_pub_and_cas_ids { id pub_id cas_id });
|
||||
file_path::select!(file_path_just_pub_id_materialized_path {
|
||||
pub_id
|
||||
materialized_path
|
||||
});
|
||||
file_path::select!(file_path_for_file_identifier {
|
||||
id
|
||||
pub_id
|
||||
materialized_path
|
||||
date_created
|
||||
is_dir
|
||||
name
|
||||
extension
|
||||
object_id
|
||||
});
|
||||
file_path::select!(file_path_for_object_validator {
|
||||
pub_id
|
||||
materialized_path
|
||||
is_dir
|
||||
name
|
||||
extension
|
||||
integrity_checksum
|
||||
});
|
||||
file_path::select!(file_path_for_media_processor {
|
||||
id
|
||||
materialized_path
|
||||
is_dir
|
||||
name
|
||||
extension
|
||||
cas_id
|
||||
object_id
|
||||
});
|
||||
file_path::select!(file_path_to_isolate {
|
||||
location_id
|
||||
materialized_path
|
||||
is_dir
|
||||
name
|
||||
extension
|
||||
});
|
||||
file_path::select!(file_path_to_isolate_with_id {
|
||||
id
|
||||
location_id
|
||||
materialized_path
|
||||
is_dir
|
||||
name
|
||||
extension
|
||||
});
|
||||
file_path::select!(file_path_walker {
|
||||
pub_id
|
||||
location_id
|
||||
object_id
|
||||
materialized_path
|
||||
is_dir
|
||||
name
|
||||
extension
|
||||
date_modified
|
||||
inode
|
||||
size_in_bytes_bytes
|
||||
hidden
|
||||
});
|
||||
file_path::select!(file_path_to_handle_custom_uri {
|
||||
pub_id
|
||||
materialized_path
|
||||
is_dir
|
||||
name
|
||||
extension
|
||||
location: select {
|
||||
id
|
||||
path
|
||||
instance: select {
|
||||
identity
|
||||
remote_identity
|
||||
}
|
||||
}
|
||||
});
|
||||
file_path::select!(file_path_to_handle_p2p_serve_file {
|
||||
materialized_path
|
||||
name
|
||||
extension
|
||||
is_dir // For isolated file path
|
||||
location: select {
|
||||
id
|
||||
path
|
||||
}
|
||||
});
|
||||
file_path::select!(file_path_to_full_path {
|
||||
id
|
||||
materialized_path
|
||||
is_dir
|
||||
name
|
||||
extension
|
||||
location: select {
|
||||
id
|
||||
path
|
||||
}
|
||||
});
|
||||
|
||||
// File Path includes!
|
||||
file_path::include!(file_path_with_object { object });
|
||||
|
||||
#[derive(Clone, Copy, Debug, Serialize, Deserialize)]
|
||||
pub struct FilePathMetadata {
|
||||
pub inode: u64,
|
||||
|
@ -140,8 +68,7 @@ pub fn path_is_hidden(path: impl AsRef<Path>, metadata: &Metadata) -> bool {
|
|||
.as_ref()
|
||||
.file_name()
|
||||
.and_then(OsStr::to_str)
|
||||
.map(|s| s.starts_with('.'))
|
||||
.unwrap_or_default()
|
||||
.is_some_and(|s| s.starts_with('.'))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
@ -176,10 +103,8 @@ pub fn path_is_hidden(path: impl AsRef<Path>, metadata: &Metadata) -> bool {
|
|||
}
|
||||
|
||||
impl FilePathMetadata {
|
||||
pub async fn from_path(
|
||||
path: impl AsRef<Path>,
|
||||
metadata: &Metadata,
|
||||
) -> Result<Self, FilePathError> {
|
||||
pub fn from_path(path: impl AsRef<Path>, metadata: &Metadata) -> Result<Self, FilePathError> {
|
||||
let path = path.as_ref();
|
||||
let inode = {
|
||||
#[cfg(target_family = "unix")]
|
||||
{
|
||||
|
@ -188,13 +113,21 @@ impl FilePathMetadata {
|
|||
|
||||
#[cfg(target_family = "windows")]
|
||||
{
|
||||
get_inode_from_path(path.as_ref()).await?
|
||||
use winapi_util::{file::information, Handle};
|
||||
|
||||
let info = tokio::task::block_in_place(|| {
|
||||
Handle::from_path_any(path)
|
||||
.and_then(|ref handle| information(handle))
|
||||
.map_err(|e| FileIOError::from((path, e)))
|
||||
})?;
|
||||
|
||||
info.file_index()
|
||||
}
|
||||
};
|
||||
|
||||
Ok(Self {
|
||||
inode,
|
||||
hidden: path_is_hidden(path.as_ref(), metadata),
|
||||
hidden: path_is_hidden(path, metadata),
|
||||
size_in_bytes: metadata.len(),
|
||||
created_at: metadata.created_or_now().into(),
|
||||
modified_at: metadata.modified_or_now().into(),
|
||||
|
@ -242,6 +175,7 @@ pub enum FilePathError {
|
|||
InvalidFilenameAndExtension(String),
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn filter_existing_file_path_params(
|
||||
IsolatedFilePathData {
|
||||
materialized_path,
|
||||
|
@ -250,7 +184,7 @@ pub fn filter_existing_file_path_params(
|
|||
name,
|
||||
extension,
|
||||
..
|
||||
}: &IsolatedFilePathData,
|
||||
}: &IsolatedFilePathData<'_>,
|
||||
) -> Vec<file_path::WhereParam> {
|
||||
vec![
|
||||
file_path::location_id::equals(Some(*location_id)),
|
||||
|
@ -294,9 +228,10 @@ pub fn loose_find_existing_file_path_params(
|
|||
])
|
||||
}
|
||||
|
||||
#[allow(clippy::missing_panics_doc)] // Don't actually panic
|
||||
pub async fn ensure_sub_path_is_in_location(
|
||||
location_path: impl AsRef<Path>,
|
||||
sub_path: impl AsRef<Path>,
|
||||
location_path: impl AsRef<Path> + Send,
|
||||
sub_path: impl AsRef<Path> + Send,
|
||||
) -> Result<PathBuf, FilePathError> {
|
||||
let mut sub_path = sub_path.as_ref();
|
||||
let location_path = location_path.as_ref();
|
||||
|
@ -311,7 +246,9 @@ pub async fn ensure_sub_path_is_in_location(
|
|||
.expect("we just checked that it starts with the separator");
|
||||
}
|
||||
|
||||
if !sub_path.starts_with(location_path) {
|
||||
if sub_path.starts_with(location_path) {
|
||||
Ok(sub_path.to_path_buf())
|
||||
} else {
|
||||
// If the sub_path doesn't start with the location_path, we have to check if it's a
|
||||
// materialized path received from the frontend, then we check if the full path exists
|
||||
let full_path = location_path.join(sub_path);
|
||||
|
@ -324,24 +261,22 @@ pub async fn ensure_sub_path_is_in_location(
|
|||
}),
|
||||
Err(e) => Err(FileIOError::from((full_path, e)).into()),
|
||||
}
|
||||
} else {
|
||||
Ok(sub_path.to_path_buf())
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn ensure_file_path_exists<E>(
|
||||
sub_path: impl AsRef<Path>,
|
||||
sub_path: impl AsRef<Path> + Send,
|
||||
iso_file_path: &IsolatedFilePathData<'_>,
|
||||
db: &PrismaClient,
|
||||
error_fn: impl FnOnce(Box<Path>) -> E,
|
||||
error_fn: impl FnOnce(Box<Path>) -> E + Send,
|
||||
) -> Result<(), E>
|
||||
where
|
||||
E: From<QueryError>,
|
||||
{
|
||||
if !check_file_path_exists(iso_file_path, db).await? {
|
||||
Err(error_fn(sub_path.as_ref().into()))
|
||||
} else {
|
||||
if check_file_path_exists(iso_file_path, db).await? {
|
||||
Ok(())
|
||||
} else {
|
||||
Err(error_fn(sub_path.as_ref().into()))
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -360,9 +295,10 @@ where
|
|||
.await? > 0)
|
||||
}
|
||||
|
||||
#[allow(clippy::missing_panics_doc)] // Don't actually panic
|
||||
pub async fn ensure_sub_path_is_directory(
|
||||
location_path: impl AsRef<Path>,
|
||||
sub_path: impl AsRef<Path>,
|
||||
location_path: impl AsRef<Path> + Send,
|
||||
sub_path: impl AsRef<Path> + Send,
|
||||
) -> Result<(), FilePathError> {
|
||||
let mut sub_path = sub_path.as_ref();
|
||||
|
||||
|
@ -410,7 +346,7 @@ pub async fn ensure_sub_path_is_directory(
|
|||
}
|
||||
}
|
||||
|
||||
#[allow(unused)] // TODO remove this annotation when we can use it on windows
|
||||
#[must_use]
|
||||
pub fn get_inode(metadata: &Metadata) -> u64 {
|
||||
#[cfg(target_family = "unix")]
|
||||
{
|
||||
|
@ -435,8 +371,7 @@ pub fn get_inode(metadata: &Metadata) -> u64 {
|
|||
}
|
||||
}
|
||||
|
||||
#[allow(unused)]
|
||||
pub async fn get_inode_from_path(path: impl AsRef<Path>) -> Result<u64, FilePathError> {
|
||||
pub async fn get_inode_from_path(path: impl AsRef<Path> + Send) -> Result<u64, FilePathError> {
|
||||
#[cfg(target_family = "unix")]
|
||||
{
|
||||
// TODO use this when it's stable and remove winapi-utils dependency
|
||||
|
@ -451,9 +386,11 @@ pub async fn get_inode_from_path(path: impl AsRef<Path>) -> Result<u64, FilePath
|
|||
{
|
||||
use winapi_util::{file::information, Handle};
|
||||
|
||||
let info = Handle::from_path_any(path.as_ref())
|
||||
let info = tokio::task::block_in_place(|| {
|
||||
Handle::from_path_any(path.as_ref())
|
||||
.and_then(|ref handle| information(handle))
|
||||
.map_err(|e| FileIOError::from((path, e)))?;
|
||||
.map_err(|e| FileIOError::from((path, e)))
|
||||
})?;
|
||||
|
||||
Ok(info.file_index())
|
||||
}
|
50
core/crates/heavy-lifting/Cargo.toml
Normal file
50
core/crates/heavy-lifting/Cargo.toml
Normal file
|
@ -0,0 +1,50 @@
|
|||
[package]
|
||||
name = "sd-core-heavy-lifting"
|
||||
version = "0.1.0"
|
||||
authors = ["Ericson Soares <ericson@spacedrive.com>"]
|
||||
license = { workspace = true }
|
||||
repository = { workspace = true }
|
||||
edition = { workspace = true }
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
# Inner Core Sub-crates
|
||||
sd-core-file-path-helper = { path = "../file-path-helper" }
|
||||
sd-core-indexer-rules = { path = "../indexer-rules" }
|
||||
sd-core-prisma-helpers = { path = "../prisma-helpers" }
|
||||
sd-core-sync = { path = "../sync" }
|
||||
|
||||
# Sub-crates
|
||||
sd-prisma = { path = "../../../crates/prisma" }
|
||||
sd-sync = { path = "../../../crates/sync" }
|
||||
sd-task-system = { path = "../../../crates/task-system" }
|
||||
sd-utils = { path = "../../../crates/utils" }
|
||||
|
||||
|
||||
async-channel = { workspace = true }
|
||||
async-trait = { workspace = true }
|
||||
chrono = { workspace = true, features = ["serde"] }
|
||||
futures = { workspace = true }
|
||||
futures-concurrency = { workspace = true }
|
||||
globset = { workspace = true }
|
||||
itertools = { workspace = true }
|
||||
lending-stream = { workspace = true }
|
||||
prisma-client-rust = { workspace = true }
|
||||
rmp-serde = { workspace = true }
|
||||
rmpv = { workspace = true }
|
||||
rspc = { workspace = true }
|
||||
serde = { workspace = true, features = ["derive"] }
|
||||
serde_json = { workspace = true }
|
||||
specta = { workspace = true }
|
||||
strum = { workspace = true, features = ["derive", "phf"] }
|
||||
thiserror = { workspace = true }
|
||||
tokio = { workspace = true, features = ["fs", "sync", "parking_lot"] }
|
||||
tokio-stream = { workspace = true, features = ["fs"] }
|
||||
tracing = { workspace = true }
|
||||
uuid = { workspace = true, features = ["v4", "serde"] }
|
||||
|
||||
|
||||
[dev-dependencies]
|
||||
tempfile = { workspace = true }
|
||||
tracing-test = { workspace.dev-dependencies = true }
|
738
core/crates/heavy-lifting/src/indexer/job.rs
Normal file
738
core/crates/heavy-lifting/src/indexer/job.rs
Normal file
|
@ -0,0 +1,738 @@
|
|||
use crate::{
|
||||
indexer::BATCH_SIZE,
|
||||
job_system::{
|
||||
job::{
|
||||
Job, JobContext, JobName, JobReturn, JobTaskDispatcher, ProgressUpdate, ReturnStatus,
|
||||
},
|
||||
report::ReportOutputMetadata,
|
||||
utils::cancel_pending_tasks,
|
||||
SerializableJob, SerializedTasks,
|
||||
},
|
||||
Error, NonCriticalJobError,
|
||||
};
|
||||
|
||||
use sd_core_file_path_helper::IsolatedFilePathData;
|
||||
use sd_core_indexer_rules::{IndexerRule, IndexerRuler};
|
||||
use sd_core_prisma_helpers::location_with_indexer_rules;
|
||||
|
||||
use sd_task_system::{
|
||||
AnyTaskOutput, IntoTask, SerializableTask, Task, TaskDispatcher, TaskHandle, TaskId,
|
||||
TaskOutput, TaskStatus,
|
||||
};
|
||||
use sd_utils::db::maybe_missing;
|
||||
|
||||
use std::{
|
||||
collections::{HashMap, HashSet},
|
||||
hash::{Hash, Hasher},
|
||||
mem,
|
||||
path::PathBuf,
|
||||
sync::Arc,
|
||||
time::Duration,
|
||||
};
|
||||
|
||||
use futures::{stream::FuturesUnordered, StreamExt};
|
||||
use futures_concurrency::future::TryJoin;
|
||||
use itertools::Itertools;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_json::json;
|
||||
use tokio::time::Instant;
|
||||
use tracing::warn;
|
||||
|
||||
use super::{
|
||||
determine_initial_walk_path, remove_non_existing_file_paths, reverse_update_directories_sizes,
|
||||
tasks::{
|
||||
saver::{SaveTask, SaveTaskOutput},
|
||||
updater::{UpdateTask, UpdateTaskOutput},
|
||||
walker::{WalkDirTask, WalkTaskOutput, WalkedEntry},
|
||||
},
|
||||
update_directory_sizes, update_location_size, IndexerError, IsoFilePathFactory, WalkerDBProxy,
|
||||
};
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct IndexerJob {
|
||||
location: location_with_indexer_rules::Data,
|
||||
sub_path: Option<PathBuf>,
|
||||
metadata: Metadata,
|
||||
|
||||
iso_file_path_factory: IsoFilePathFactory,
|
||||
indexer_ruler: IndexerRuler,
|
||||
walker_root_path: Option<Arc<PathBuf>>,
|
||||
ancestors_needing_indexing: HashSet<WalkedEntry>,
|
||||
ancestors_already_indexed: HashSet<IsolatedFilePathData<'static>>,
|
||||
iso_paths_and_sizes: HashMap<IsolatedFilePathData<'static>, u64>,
|
||||
|
||||
errors: Vec<NonCriticalJobError>,
|
||||
|
||||
pending_tasks_on_resume: Vec<TaskHandle<Error>>,
|
||||
tasks_for_shutdown: Vec<Box<dyn Task<Error>>>,
|
||||
}
|
||||
|
||||
impl Job for IndexerJob {
|
||||
const NAME: JobName = JobName::Indexer;
|
||||
|
||||
async fn run(
|
||||
mut self,
|
||||
dispatcher: JobTaskDispatcher,
|
||||
ctx: impl JobContext,
|
||||
) -> Result<ReturnStatus, Error> {
|
||||
let mut pending_running_tasks = FuturesUnordered::new();
|
||||
|
||||
self.init_or_resume(&mut pending_running_tasks, &ctx, &dispatcher)
|
||||
.await?;
|
||||
|
||||
if let Some(res) = self
|
||||
.process_handles(&mut pending_running_tasks, &ctx, &dispatcher)
|
||||
.await
|
||||
{
|
||||
return res;
|
||||
}
|
||||
|
||||
if !self.tasks_for_shutdown.is_empty() {
|
||||
return Ok(ReturnStatus::Shutdown(self.serialize().await));
|
||||
}
|
||||
|
||||
if !self.ancestors_needing_indexing.is_empty() {
|
||||
let save_tasks = self
|
||||
.ancestors_needing_indexing
|
||||
.drain()
|
||||
.chunks(BATCH_SIZE)
|
||||
.into_iter()
|
||||
.map(|chunk| {
|
||||
let chunked_saves = chunk.collect::<Vec<_>>();
|
||||
self.metadata.total_paths += chunked_saves.len() as u64;
|
||||
self.metadata.total_save_steps += 1;
|
||||
|
||||
SaveTask::new(
|
||||
self.location.id,
|
||||
self.location.pub_id.clone(),
|
||||
chunked_saves,
|
||||
Arc::clone(ctx.db()),
|
||||
Arc::clone(ctx.sync()),
|
||||
)
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
pending_running_tasks.extend(dispatcher.dispatch_many(save_tasks).await);
|
||||
|
||||
if let Some(res) = self
|
||||
.process_handles(&mut pending_running_tasks, &ctx, &dispatcher)
|
||||
.await
|
||||
{
|
||||
return res;
|
||||
}
|
||||
|
||||
if !self.tasks_for_shutdown.is_empty() {
|
||||
return Ok(ReturnStatus::Shutdown(self.serialize().await));
|
||||
}
|
||||
}
|
||||
|
||||
// From here onward, job will not be interrupted anymore
|
||||
|
||||
let Self {
|
||||
location,
|
||||
mut metadata,
|
||||
iso_file_path_factory,
|
||||
walker_root_path,
|
||||
iso_paths_and_sizes,
|
||||
mut errors,
|
||||
tasks_for_shutdown,
|
||||
..
|
||||
} = self;
|
||||
|
||||
if metadata.indexed_count > 0 || metadata.removed_count > 0 || metadata.updated_count > 0 {
|
||||
let start_size_update_time = Instant::now();
|
||||
|
||||
update_directory_sizes(iso_paths_and_sizes, ctx.db(), ctx.sync()).await?;
|
||||
|
||||
let root_path = walker_root_path.expect("must be set");
|
||||
if root_path != iso_file_path_factory.location_path {
|
||||
reverse_update_directories_sizes(
|
||||
&*root_path,
|
||||
location.id,
|
||||
&*iso_file_path_factory.location_path,
|
||||
ctx.db(),
|
||||
ctx.sync(),
|
||||
&mut errors,
|
||||
)
|
||||
.await?;
|
||||
}
|
||||
|
||||
update_location_size(location.id, ctx.db(), &ctx.query_invalidator()).await?;
|
||||
|
||||
metadata.db_write_time += start_size_update_time.elapsed();
|
||||
}
|
||||
|
||||
if metadata.indexed_count > 0 || metadata.removed_count > 0 {
|
||||
ctx.invalidate_query("search.paths");
|
||||
}
|
||||
|
||||
assert!(
|
||||
tasks_for_shutdown.is_empty(),
|
||||
"all tasks must be completed here"
|
||||
);
|
||||
|
||||
Ok(ReturnStatus::Completed(
|
||||
JobReturn::builder()
|
||||
.with_metadata(metadata)
|
||||
.with_non_critical_errors(errors)
|
||||
.build(),
|
||||
))
|
||||
}
|
||||
|
||||
async fn resume_tasks(
|
||||
&mut self,
|
||||
dispatcher: &JobTaskDispatcher,
|
||||
ctx: &impl JobContext,
|
||||
SerializedTasks(serialized_tasks): SerializedTasks,
|
||||
) -> Result<(), Error> {
|
||||
let location_id = self.location.id;
|
||||
|
||||
self.pending_tasks_on_resume = dispatcher
|
||||
.dispatch_many_boxed(
|
||||
rmp_serde::from_slice::<Vec<(TaskKind, Vec<u8>)>>(&serialized_tasks)
|
||||
.map_err(IndexerError::from)?
|
||||
.into_iter()
|
||||
.map(|(task_kind, task_bytes)| {
|
||||
let indexer_ruler = self.indexer_ruler.clone();
|
||||
let iso_file_path_factory = self.iso_file_path_factory.clone();
|
||||
async move {
|
||||
match task_kind {
|
||||
TaskKind::Walk => WalkDirTask::deserialize(
|
||||
&task_bytes,
|
||||
(
|
||||
indexer_ruler.clone(),
|
||||
WalkerDBProxy {
|
||||
location_id,
|
||||
db: Arc::clone(ctx.db()),
|
||||
},
|
||||
iso_file_path_factory.clone(),
|
||||
dispatcher.clone(),
|
||||
),
|
||||
)
|
||||
.await
|
||||
.map(IntoTask::into_task),
|
||||
|
||||
TaskKind::Save => SaveTask::deserialize(
|
||||
&task_bytes,
|
||||
(Arc::clone(ctx.db()), Arc::clone(ctx.sync())),
|
||||
)
|
||||
.await
|
||||
.map(IntoTask::into_task),
|
||||
TaskKind::Update => UpdateTask::deserialize(
|
||||
&task_bytes,
|
||||
(Arc::clone(ctx.db()), Arc::clone(ctx.sync())),
|
||||
)
|
||||
.await
|
||||
.map(IntoTask::into_task),
|
||||
}
|
||||
}
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
.try_join()
|
||||
.await
|
||||
.map_err(IndexerError::from)?,
|
||||
)
|
||||
.await;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl IndexerJob {
|
||||
pub fn new(
|
||||
location: location_with_indexer_rules::Data,
|
||||
sub_path: Option<PathBuf>,
|
||||
) -> Result<Self, IndexerError> {
|
||||
Ok(Self {
|
||||
indexer_ruler: location
|
||||
.indexer_rules
|
||||
.iter()
|
||||
.map(|rule| IndexerRule::try_from(&rule.indexer_rule))
|
||||
.collect::<Result<Vec<_>, _>>()
|
||||
.map(IndexerRuler::new)?,
|
||||
iso_file_path_factory: IsoFilePathFactory {
|
||||
location_id: location.id,
|
||||
location_path: maybe_missing(&location.path, "location.path")
|
||||
.map(PathBuf::from)
|
||||
.map(Arc::new)?,
|
||||
},
|
||||
walker_root_path: None,
|
||||
ancestors_needing_indexing: HashSet::new(),
|
||||
ancestors_already_indexed: HashSet::new(),
|
||||
iso_paths_and_sizes: HashMap::new(),
|
||||
location,
|
||||
sub_path,
|
||||
metadata: Metadata::default(),
|
||||
errors: Vec::new(),
|
||||
|
||||
pending_tasks_on_resume: Vec::new(),
|
||||
tasks_for_shutdown: Vec::new(),
|
||||
})
|
||||
}
|
||||
|
||||
/// Process output of tasks, according to the downcasted output type
|
||||
///
|
||||
/// # Panics
|
||||
/// Will panic if another task type is added in the job, but this function wasn't updated to handle it
|
||||
///
|
||||
async fn process_task_output(
|
||||
&mut self,
|
||||
task_id: TaskId,
|
||||
any_task_output: Box<dyn AnyTaskOutput>,
|
||||
job_ctx: &impl JobContext,
|
||||
dispatcher: &JobTaskDispatcher,
|
||||
) -> Result<Vec<TaskHandle<Error>>, IndexerError> {
|
||||
if any_task_output.is::<WalkTaskOutput>() {
|
||||
return self
|
||||
.process_walk_output(
|
||||
*any_task_output
|
||||
.downcast::<WalkTaskOutput>()
|
||||
.expect("just checked"),
|
||||
job_ctx,
|
||||
dispatcher,
|
||||
)
|
||||
.await;
|
||||
} else if any_task_output.is::<SaveTaskOutput>() {
|
||||
self.process_save_output(
|
||||
*any_task_output
|
||||
.downcast::<SaveTaskOutput>()
|
||||
.expect("just checked"),
|
||||
job_ctx,
|
||||
);
|
||||
} else if any_task_output.is::<UpdateTaskOutput>() {
|
||||
self.process_update_output(
|
||||
*any_task_output
|
||||
.downcast::<UpdateTaskOutput>()
|
||||
.expect("just checked"),
|
||||
job_ctx,
|
||||
);
|
||||
} else {
|
||||
unreachable!("Unexpected task output type: <id='{task_id}'>");
|
||||
}
|
||||
|
||||
self.metadata.completed_tasks += 1;
|
||||
|
||||
job_ctx.progress(vec![ProgressUpdate::CompletedTaskCount(
|
||||
self.metadata.completed_tasks,
|
||||
)]);
|
||||
|
||||
Ok(Vec::new())
|
||||
}
|
||||
|
||||
async fn process_walk_output(
|
||||
&mut self,
|
||||
WalkTaskOutput {
|
||||
to_create,
|
||||
to_update,
|
||||
to_remove,
|
||||
accepted_ancestors,
|
||||
errors,
|
||||
directory_iso_file_path,
|
||||
total_size,
|
||||
mut handles,
|
||||
scan_time,
|
||||
}: WalkTaskOutput,
|
||||
job_ctx: &impl JobContext,
|
||||
dispatcher: &JobTaskDispatcher,
|
||||
) -> Result<Vec<TaskHandle<Error>>, IndexerError> {
|
||||
self.metadata.scan_read_time += scan_time;
|
||||
|
||||
let (to_create_count, to_update_count) = (to_create.len(), to_update.len());
|
||||
|
||||
*self
|
||||
.iso_paths_and_sizes
|
||||
.entry(directory_iso_file_path)
|
||||
.or_default() += total_size;
|
||||
|
||||
for ancestor_iso_file_path in accepted_ancestors
|
||||
.iter()
|
||||
.map(|ancestor_entry| &ancestor_entry.iso_file_path)
|
||||
{
|
||||
if self
|
||||
.iso_paths_and_sizes
|
||||
.contains_key(ancestor_iso_file_path)
|
||||
{
|
||||
*self
|
||||
.iso_paths_and_sizes
|
||||
.get_mut(ancestor_iso_file_path)
|
||||
.expect("we just checked") += total_size;
|
||||
} else {
|
||||
self.iso_paths_and_sizes
|
||||
.insert(ancestor_iso_file_path.clone(), total_size);
|
||||
}
|
||||
}
|
||||
|
||||
// First we add ancestors, filtering out ancestors already indexed in previous iterations
|
||||
self.ancestors_needing_indexing
|
||||
.extend(accepted_ancestors.into_iter().filter(|ancestor_entry| {
|
||||
!self
|
||||
.ancestors_already_indexed
|
||||
.contains(&ancestor_entry.iso_file_path)
|
||||
}));
|
||||
|
||||
// Then we add new directories to be indexed as they can be received as ancestors in coming iterations
|
||||
self.ancestors_already_indexed.extend(
|
||||
to_create
|
||||
.iter()
|
||||
.filter(|&WalkedEntry { iso_file_path, .. }| iso_file_path.is_dir())
|
||||
.map(|WalkedEntry { iso_file_path, .. }| iso_file_path.clone()),
|
||||
);
|
||||
|
||||
self.errors.extend(errors);
|
||||
|
||||
let db_delete_time = Instant::now();
|
||||
self.metadata.removed_count +=
|
||||
remove_non_existing_file_paths(to_remove, job_ctx.db(), job_ctx.sync()).await?;
|
||||
self.metadata.db_write_time += db_delete_time.elapsed();
|
||||
|
||||
let save_tasks = to_create
|
||||
.into_iter()
|
||||
.chunks(BATCH_SIZE)
|
||||
.into_iter()
|
||||
.map(|chunk| {
|
||||
let chunked_saves = chunk.collect::<Vec<_>>();
|
||||
self.metadata.total_paths += chunked_saves.len() as u64;
|
||||
self.metadata.total_save_steps += 1;
|
||||
|
||||
SaveTask::new(
|
||||
self.location.id,
|
||||
self.location.pub_id.clone(),
|
||||
chunked_saves,
|
||||
Arc::clone(job_ctx.db()),
|
||||
Arc::clone(job_ctx.sync()),
|
||||
)
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let update_tasks = to_update
|
||||
.into_iter()
|
||||
.chunks(BATCH_SIZE)
|
||||
.into_iter()
|
||||
.map(|chunk| {
|
||||
let chunked_updates = chunk.collect::<Vec<_>>();
|
||||
self.metadata.total_updated_paths += chunked_updates.len() as u64;
|
||||
self.metadata.total_update_steps += 1;
|
||||
|
||||
UpdateTask::new(
|
||||
chunked_updates,
|
||||
Arc::clone(job_ctx.db()),
|
||||
Arc::clone(job_ctx.sync()),
|
||||
)
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
handles.extend(dispatcher.dispatch_many(save_tasks).await);
|
||||
handles.extend(dispatcher.dispatch_many(update_tasks).await);
|
||||
|
||||
self.metadata.total_tasks += handles.len() as u64;
|
||||
|
||||
job_ctx.progress(vec![
|
||||
ProgressUpdate::TaskCount(handles.len() as u64),
|
||||
ProgressUpdate::message(format!(
|
||||
"Found {to_create_count} new files and {to_update_count} to update"
|
||||
)),
|
||||
]);
|
||||
|
||||
Ok(handles)
|
||||
}
|
||||
|
||||
fn process_save_output(
|
||||
&mut self,
|
||||
SaveTaskOutput {
|
||||
saved_count,
|
||||
save_duration,
|
||||
}: SaveTaskOutput,
|
||||
job_ctx: &impl JobContext,
|
||||
) {
|
||||
self.metadata.indexed_count += saved_count;
|
||||
self.metadata.db_write_time += save_duration;
|
||||
|
||||
job_ctx.progress_msg(format!("Saved {saved_count} files"));
|
||||
}
|
||||
|
||||
fn process_update_output(
|
||||
&mut self,
|
||||
UpdateTaskOutput {
|
||||
updated_count,
|
||||
update_duration,
|
||||
}: UpdateTaskOutput,
|
||||
job_ctx: &impl JobContext,
|
||||
) {
|
||||
self.metadata.updated_count += updated_count;
|
||||
self.metadata.db_write_time += update_duration;
|
||||
|
||||
job_ctx.progress_msg(format!("Updated {updated_count} files"));
|
||||
}
|
||||
|
||||
async fn process_handles(
|
||||
&mut self,
|
||||
pending_running_tasks: &mut FuturesUnordered<TaskHandle<Error>>,
|
||||
job_ctx: &impl JobContext,
|
||||
dispatcher: &JobTaskDispatcher,
|
||||
) -> Option<Result<ReturnStatus, Error>> {
|
||||
while let Some(task) = pending_running_tasks.next().await {
|
||||
match task {
|
||||
Ok(TaskStatus::Done((task_id, TaskOutput::Out(out)))) => {
|
||||
let more_handles = match self
|
||||
.process_task_output(task_id, out, job_ctx, dispatcher)
|
||||
.await
|
||||
{
|
||||
Ok(more_handles) => more_handles,
|
||||
Err(e) => {
|
||||
cancel_pending_tasks(&*pending_running_tasks).await;
|
||||
|
||||
return Some(Err(e.into()));
|
||||
}
|
||||
};
|
||||
|
||||
pending_running_tasks.extend(more_handles);
|
||||
}
|
||||
|
||||
Ok(TaskStatus::Done((task_id, TaskOutput::Empty))) => {
|
||||
warn!("Task <id='{task_id}'> returned an empty output");
|
||||
}
|
||||
|
||||
Ok(TaskStatus::Shutdown(task)) => {
|
||||
self.tasks_for_shutdown.push(task);
|
||||
}
|
||||
|
||||
Ok(TaskStatus::Error(e)) => {
|
||||
cancel_pending_tasks(&*pending_running_tasks).await;
|
||||
|
||||
return Some(Err(e));
|
||||
}
|
||||
|
||||
Ok(TaskStatus::Canceled | TaskStatus::ForcedAbortion) => {
|
||||
cancel_pending_tasks(&*pending_running_tasks).await;
|
||||
|
||||
return Some(Ok(ReturnStatus::Canceled));
|
||||
}
|
||||
|
||||
Err(e) => {
|
||||
cancel_pending_tasks(&*pending_running_tasks).await;
|
||||
|
||||
return Some(Err(e.into()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
async fn init_or_resume(
|
||||
&mut self,
|
||||
pending_running_tasks: &mut FuturesUnordered<TaskHandle<Error>>,
|
||||
job_ctx: &impl JobContext,
|
||||
dispatcher: &JobTaskDispatcher,
|
||||
) -> Result<(), IndexerError> {
|
||||
// if we don't have any pending task, then this is a fresh job
|
||||
if self.pending_tasks_on_resume.is_empty() {
|
||||
let walker_root_path = Arc::new(
|
||||
determine_initial_walk_path(
|
||||
self.location.id,
|
||||
&self.sub_path,
|
||||
&*self.iso_file_path_factory.location_path,
|
||||
job_ctx.db(),
|
||||
)
|
||||
.await?,
|
||||
);
|
||||
|
||||
pending_running_tasks.push(
|
||||
dispatcher
|
||||
.dispatch(WalkDirTask::new(
|
||||
walker_root_path.as_ref(),
|
||||
Arc::clone(&walker_root_path),
|
||||
self.indexer_ruler.clone(),
|
||||
self.iso_file_path_factory.clone(),
|
||||
WalkerDBProxy {
|
||||
location_id: self.location.id,
|
||||
db: Arc::clone(job_ctx.db()),
|
||||
},
|
||||
Some(dispatcher.clone()),
|
||||
)?)
|
||||
.await,
|
||||
);
|
||||
|
||||
self.walker_root_path = Some(walker_root_path);
|
||||
} else {
|
||||
pending_running_tasks.extend(mem::take(&mut self.pending_tasks_on_resume));
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
|
||||
pub struct Metadata {
|
||||
db_write_time: Duration,
|
||||
scan_read_time: Duration,
|
||||
total_tasks: u64,
|
||||
completed_tasks: u64,
|
||||
total_paths: u64,
|
||||
total_updated_paths: u64,
|
||||
total_save_steps: u64,
|
||||
total_update_steps: u64,
|
||||
indexed_count: u64,
|
||||
updated_count: u64,
|
||||
removed_count: u64,
|
||||
}
|
||||
|
||||
impl From<Metadata> for ReportOutputMetadata {
|
||||
fn from(value: Metadata) -> Self {
|
||||
Self::Metrics(HashMap::from([
|
||||
("db_write_time".into(), json!(value.db_write_time)),
|
||||
("scan_read_time".into(), json!(value.scan_read_time)),
|
||||
("total_tasks".into(), json!(value.total_tasks)),
|
||||
("total_paths".into(), json!(value.total_paths)),
|
||||
(
|
||||
"total_updated_paths".into(),
|
||||
json!(value.total_updated_paths),
|
||||
),
|
||||
("total_save_steps".into(), json!(value.total_save_steps)),
|
||||
("total_update_steps".into(), json!(value.total_update_steps)),
|
||||
("indexed_count".into(), json!(value.indexed_count)),
|
||||
("updated_count".into(), json!(value.updated_count)),
|
||||
("removed_count".into(), json!(value.removed_count)),
|
||||
]))
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
|
||||
enum TaskKind {
|
||||
Walk,
|
||||
Save,
|
||||
Update,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize)]
|
||||
struct SaveState {
|
||||
location: location_with_indexer_rules::Data,
|
||||
sub_path: Option<PathBuf>,
|
||||
metadata: Metadata,
|
||||
|
||||
iso_file_path_factory: IsoFilePathFactory,
|
||||
indexer_ruler_bytes: Vec<u8>,
|
||||
walker_root_path: Option<Arc<PathBuf>>,
|
||||
ancestors_needing_indexing: HashSet<WalkedEntry>,
|
||||
ancestors_already_indexed: HashSet<IsolatedFilePathData<'static>>,
|
||||
paths_and_sizes: HashMap<IsolatedFilePathData<'static>, u64>,
|
||||
|
||||
errors: Vec<NonCriticalJobError>,
|
||||
|
||||
tasks_for_shutdown_bytes: Option<SerializedTasks>,
|
||||
}
|
||||
|
||||
impl SerializableJob for IndexerJob {
|
||||
async fn serialize(self) -> Result<Option<Vec<u8>>, rmp_serde::encode::Error> {
|
||||
let Self {
|
||||
location,
|
||||
sub_path,
|
||||
metadata,
|
||||
iso_file_path_factory,
|
||||
indexer_ruler,
|
||||
walker_root_path,
|
||||
ancestors_needing_indexing,
|
||||
ancestors_already_indexed,
|
||||
iso_paths_and_sizes: paths_and_sizes,
|
||||
errors,
|
||||
tasks_for_shutdown,
|
||||
..
|
||||
} = self;
|
||||
|
||||
rmp_serde::to_vec_named(&SaveState {
|
||||
location,
|
||||
sub_path,
|
||||
metadata,
|
||||
iso_file_path_factory,
|
||||
indexer_ruler_bytes: indexer_ruler.serialize().await?,
|
||||
walker_root_path,
|
||||
ancestors_needing_indexing,
|
||||
ancestors_already_indexed,
|
||||
paths_and_sizes,
|
||||
tasks_for_shutdown_bytes: Some(SerializedTasks(rmp_serde::to_vec_named(
|
||||
&tasks_for_shutdown
|
||||
.into_iter()
|
||||
.map(|task| async move {
|
||||
if task
|
||||
.is::<WalkDirTask<WalkerDBProxy, IsoFilePathFactory, JobTaskDispatcher>>(
|
||||
) {
|
||||
task
|
||||
.downcast::<WalkDirTask<WalkerDBProxy, IsoFilePathFactory, JobTaskDispatcher>>(
|
||||
)
|
||||
.expect("just checked")
|
||||
.serialize()
|
||||
.await
|
||||
.map(|bytes| (TaskKind::Walk, bytes))
|
||||
} else if task.is::<SaveTask>() {
|
||||
task.downcast::<SaveTask>()
|
||||
.expect("just checked")
|
||||
.serialize()
|
||||
.await
|
||||
.map(|bytes| (TaskKind::Save, bytes))
|
||||
} else if task.is::<UpdateTask>() {
|
||||
task.downcast::<UpdateTask>()
|
||||
.expect("just checked")
|
||||
.serialize()
|
||||
.await
|
||||
.map(|bytes| (TaskKind::Update, bytes))
|
||||
} else {
|
||||
unreachable!("Unexpected task type")
|
||||
}
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
.try_join()
|
||||
.await?,
|
||||
)?)),
|
||||
errors,
|
||||
})
|
||||
.map(Some)
|
||||
}
|
||||
|
||||
async fn deserialize(
|
||||
serialized_job: &[u8],
|
||||
_: &impl JobContext,
|
||||
) -> Result<Option<(Self, Option<SerializedTasks>)>, rmp_serde::decode::Error> {
|
||||
let SaveState {
|
||||
location,
|
||||
sub_path,
|
||||
metadata,
|
||||
iso_file_path_factory,
|
||||
indexer_ruler_bytes,
|
||||
walker_root_path,
|
||||
ancestors_needing_indexing,
|
||||
ancestors_already_indexed,
|
||||
paths_and_sizes,
|
||||
errors,
|
||||
tasks_for_shutdown_bytes,
|
||||
} = rmp_serde::from_slice::<SaveState>(serialized_job)?;
|
||||
|
||||
let indexer_ruler = IndexerRuler::deserialize(&indexer_ruler_bytes)?;
|
||||
|
||||
Ok(Some((
|
||||
Self {
|
||||
location,
|
||||
sub_path,
|
||||
metadata,
|
||||
iso_file_path_factory,
|
||||
indexer_ruler,
|
||||
walker_root_path,
|
||||
ancestors_needing_indexing,
|
||||
ancestors_already_indexed,
|
||||
iso_paths_and_sizes: paths_and_sizes,
|
||||
errors,
|
||||
pending_tasks_on_resume: Vec::new(),
|
||||
tasks_for_shutdown: Vec::new(),
|
||||
},
|
||||
tasks_for_shutdown_bytes,
|
||||
)))
|
||||
}
|
||||
}
|
||||
|
||||
impl Hash for IndexerJob {
|
||||
fn hash<H: Hasher>(&self, state: &mut H) {
|
||||
self.location.id.hash(state);
|
||||
if let Some(ref sub_path) = self.sub_path {
|
||||
sub_path.hash(state);
|
||||
}
|
||||
}
|
||||
}
|
542
core/crates/heavy-lifting/src/indexer/mod.rs
Normal file
542
core/crates/heavy-lifting/src/indexer/mod.rs
Normal file
|
@ -0,0 +1,542 @@
|
|||
use crate::NonCriticalJobError;
|
||||
|
||||
use sd_core_file_path_helper::{
|
||||
ensure_file_path_exists, ensure_sub_path_is_directory, ensure_sub_path_is_in_location,
|
||||
FilePathError, IsolatedFilePathData,
|
||||
};
|
||||
use sd_core_indexer_rules::IndexerRuleError;
|
||||
use sd_core_prisma_helpers::{
|
||||
file_path_pub_and_cas_ids, file_path_to_isolate_with_pub_id, file_path_walker,
|
||||
};
|
||||
use sd_core_sync::Manager as SyncManager;
|
||||
|
||||
use sd_prisma::{
|
||||
prisma::{file_path, location, PrismaClient, SortOrder},
|
||||
prisma_sync,
|
||||
};
|
||||
use sd_sync::OperationFactory;
|
||||
use sd_utils::{
|
||||
db::{size_in_bytes_from_db, size_in_bytes_to_db, MissingFieldError},
|
||||
error::{FileIOError, NonUtf8PathError},
|
||||
from_bytes_to_uuid, msgpack,
|
||||
};
|
||||
|
||||
use std::{
|
||||
collections::{HashMap, HashSet},
|
||||
hash::BuildHasher,
|
||||
mem,
|
||||
path::{Path, PathBuf},
|
||||
sync::Arc,
|
||||
};
|
||||
|
||||
use itertools::Itertools;
|
||||
use prisma_client_rust::{operator::or, Select};
|
||||
use rspc::ErrorCode;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use specta::Type;
|
||||
use tracing::warn;
|
||||
|
||||
mod job;
|
||||
mod shallow;
|
||||
mod tasks;
|
||||
|
||||
pub use job::IndexerJob;
|
||||
pub use shallow::shallow;
|
||||
|
||||
use tasks::walker;
|
||||
|
||||
/// `BATCH_SIZE` is the number of files to index at each task, writing the chunk of files metadata in the database.
|
||||
const BATCH_SIZE: usize = 1000;
|
||||
|
||||
#[derive(thiserror::Error, Debug)]
|
||||
pub enum IndexerError {
|
||||
// Not Found errors
|
||||
#[error("indexer rule not found: <id='{0}'>")]
|
||||
IndexerRuleNotFound(i32),
|
||||
#[error("received sub path not in database: <path='{}'>", .0.display())]
|
||||
SubPathNotFound(Box<Path>),
|
||||
|
||||
// Internal Errors
|
||||
#[error("database Error: {0}")]
|
||||
Database(#[from] prisma_client_rust::QueryError),
|
||||
#[error(transparent)]
|
||||
FileIO(#[from] FileIOError),
|
||||
#[error(transparent)]
|
||||
NonUtf8Path(#[from] NonUtf8PathError),
|
||||
#[error(transparent)]
|
||||
IsoFilePath(#[from] FilePathError),
|
||||
#[error("missing field on database: {0}")]
|
||||
MissingField(#[from] MissingFieldError),
|
||||
#[error("failed to deserialized stored tasks for job resume: {0}")]
|
||||
DeserializeTasks(#[from] rmp_serde::decode::Error),
|
||||
|
||||
// Mixed errors
|
||||
#[error(transparent)]
|
||||
Rules(#[from] IndexerRuleError),
|
||||
}
|
||||
|
||||
impl From<IndexerError> for rspc::Error {
|
||||
fn from(err: IndexerError) -> Self {
|
||||
match err {
|
||||
IndexerError::IndexerRuleNotFound(_) | IndexerError::SubPathNotFound(_) => {
|
||||
Self::with_cause(ErrorCode::NotFound, err.to_string(), err)
|
||||
}
|
||||
|
||||
IndexerError::Rules(rule_err) => rule_err.into(),
|
||||
|
||||
_ => Self::with_cause(ErrorCode::InternalServerError, err.to_string(), err),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(thiserror::Error, Debug, Serialize, Deserialize, Type)]
|
||||
pub enum NonCriticalIndexerError {
|
||||
#[error("failed to read directory entry: {0}")]
|
||||
FailedDirectoryEntry(String),
|
||||
#[error("failed to fetch metadata: {0}")]
|
||||
Metadata(String),
|
||||
#[error("error applying indexer rule: {0}")]
|
||||
IndexerRule(String),
|
||||
#[error("error trying to extract file path metadata from a file: {0}")]
|
||||
FilePathMetadata(String),
|
||||
#[error("failed to fetch file paths ids from existing files on database: {0}")]
|
||||
FetchAlreadyExistingFilePathIds(String),
|
||||
#[error("failed to fetch file paths to be removed from database: {0}")]
|
||||
FetchFilePathsToRemove(String),
|
||||
#[error("error constructing isolated file path: {0}")]
|
||||
IsoFilePath(String),
|
||||
#[error("failed to dispatch new task to keep walking a directory: {0}")]
|
||||
DispatchKeepWalking(String),
|
||||
#[error("missing file_path data on database: {0}")]
|
||||
MissingFilePathData(String),
|
||||
}
|
||||
|
||||
async fn determine_initial_walk_path(
|
||||
location_id: location::id::Type,
|
||||
sub_path: &Option<impl AsRef<Path> + Send + Sync>,
|
||||
location_path: impl AsRef<Path> + Send,
|
||||
db: &PrismaClient,
|
||||
) -> Result<PathBuf, IndexerError> {
|
||||
let location_path = location_path.as_ref();
|
||||
|
||||
match sub_path {
|
||||
Some(sub_path) if sub_path.as_ref() != Path::new("") => {
|
||||
let sub_path = sub_path.as_ref();
|
||||
let full_path = ensure_sub_path_is_in_location(location_path, sub_path).await?;
|
||||
|
||||
ensure_sub_path_is_directory(location_path, sub_path).await?;
|
||||
|
||||
ensure_file_path_exists(
|
||||
sub_path,
|
||||
&IsolatedFilePathData::new(location_id, location_path, &full_path, true)
|
||||
.map_err(IndexerError::from)?,
|
||||
db,
|
||||
IndexerError::SubPathNotFound,
|
||||
)
|
||||
.await?;
|
||||
|
||||
Ok(full_path)
|
||||
}
|
||||
_ => Ok(location_path.to_path_buf()),
|
||||
}
|
||||
}
|
||||
|
||||
fn chunk_db_queries<'db, 'iso>(
|
||||
iso_file_paths: impl IntoIterator<Item = &'iso IsolatedFilePathData<'iso>>,
|
||||
db: &'db PrismaClient,
|
||||
) -> Vec<Select<'db, Vec<file_path_to_isolate_with_pub_id::Data>>> {
|
||||
iso_file_paths
|
||||
.into_iter()
|
||||
.chunks(200)
|
||||
.into_iter()
|
||||
.map(|paths_chunk| {
|
||||
db.file_path()
|
||||
.find_many(vec![or(paths_chunk
|
||||
.into_iter()
|
||||
.map(file_path::WhereParam::from)
|
||||
.collect())])
|
||||
.select(file_path_to_isolate_with_pub_id::select())
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
}
|
||||
|
||||
#[allow(clippy::missing_panics_doc)] // Can't actually panic as we use the hashmap to fetch entries from db
|
||||
async fn update_directory_sizes(
|
||||
iso_paths_and_sizes: HashMap<IsolatedFilePathData<'_>, u64, impl BuildHasher + Send>,
|
||||
db: &PrismaClient,
|
||||
sync: &SyncManager,
|
||||
) -> Result<(), IndexerError> {
|
||||
let to_sync_and_update = db
|
||||
._batch(chunk_db_queries(iso_paths_and_sizes.keys(), db))
|
||||
.await?
|
||||
.into_iter()
|
||||
.flatten()
|
||||
.map(|file_path| {
|
||||
let size_bytes = iso_paths_and_sizes
|
||||
.get(&IsolatedFilePathData::try_from(&file_path)?)
|
||||
.map(|size| size.to_be_bytes().to_vec())
|
||||
.expect("must be here");
|
||||
|
||||
Ok((
|
||||
sync.shared_update(
|
||||
prisma_sync::file_path::SyncId {
|
||||
pub_id: file_path.pub_id.clone(),
|
||||
},
|
||||
file_path::size_in_bytes_bytes::NAME,
|
||||
msgpack!(size_bytes),
|
||||
),
|
||||
db.file_path().update(
|
||||
file_path::pub_id::equals(file_path.pub_id),
|
||||
vec![file_path::size_in_bytes_bytes::set(Some(size_bytes))],
|
||||
),
|
||||
))
|
||||
})
|
||||
.collect::<Result<Vec<_>, IndexerError>>()?
|
||||
.into_iter()
|
||||
.unzip::<_, _, Vec<_>, Vec<_>>();
|
||||
|
||||
sync.write_ops(db, to_sync_and_update).await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn update_location_size<InvalidateQuery: Fn(&'static str) + Send + Sync>(
|
||||
location_id: location::id::Type,
|
||||
db: &PrismaClient,
|
||||
invalidate_query: &InvalidateQuery,
|
||||
) -> Result<(), IndexerError> {
|
||||
let total_size = db
|
||||
.file_path()
|
||||
.find_many(vec![
|
||||
file_path::location_id::equals(Some(location_id)),
|
||||
file_path::materialized_path::equals(Some("/".to_string())),
|
||||
])
|
||||
.select(file_path::select!({ size_in_bytes_bytes }))
|
||||
.exec()
|
||||
.await?
|
||||
.into_iter()
|
||||
.filter_map(|file_path| {
|
||||
file_path
|
||||
.size_in_bytes_bytes
|
||||
.map(|size_in_bytes_bytes| size_in_bytes_from_db(&size_in_bytes_bytes))
|
||||
})
|
||||
.sum::<u64>();
|
||||
|
||||
db.location()
|
||||
.update(
|
||||
location::id::equals(location_id),
|
||||
vec![location::size_in_bytes::set(Some(
|
||||
total_size.to_be_bytes().to_vec(),
|
||||
))],
|
||||
)
|
||||
.exec()
|
||||
.await?;
|
||||
|
||||
invalidate_query("locations.list");
|
||||
invalidate_query("locations.get");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn remove_non_existing_file_paths(
|
||||
to_remove: Vec<file_path_pub_and_cas_ids::Data>,
|
||||
db: &PrismaClient,
|
||||
sync: &sd_core_sync::Manager,
|
||||
) -> Result<u64, IndexerError> {
|
||||
#[allow(clippy::cast_sign_loss)]
|
||||
let (sync_params, db_params): (Vec<_>, Vec<_>) = to_remove
|
||||
.into_iter()
|
||||
.map(|file_path| {
|
||||
(
|
||||
sync.shared_delete(prisma_sync::file_path::SyncId {
|
||||
pub_id: file_path.pub_id,
|
||||
}),
|
||||
file_path.id,
|
||||
)
|
||||
})
|
||||
.unzip();
|
||||
|
||||
sync.write_ops(
|
||||
db,
|
||||
(
|
||||
sync_params,
|
||||
db.file_path()
|
||||
.delete_many(vec![file_path::id::in_vec(db_params)]),
|
||||
),
|
||||
)
|
||||
.await
|
||||
.map(
|
||||
#[allow(clippy::cast_sign_loss)]
|
||||
|count| count as u64,
|
||||
)
|
||||
.map_err(Into::into)
|
||||
}
|
||||
|
||||
#[allow(clippy::missing_panics_doc)] // Can't actually panic as we only deal with directories
|
||||
async fn reverse_update_directories_sizes(
|
||||
base_path: impl AsRef<Path> + Send,
|
||||
location_id: location::id::Type,
|
||||
location_path: impl AsRef<Path> + Send,
|
||||
db: &PrismaClient,
|
||||
sync: &SyncManager,
|
||||
errors: &mut Vec<NonCriticalJobError>,
|
||||
) -> Result<(), IndexerError> {
|
||||
let location_path = location_path.as_ref();
|
||||
|
||||
let ancestors = base_path
|
||||
.as_ref()
|
||||
.ancestors()
|
||||
.take_while(|&ancestor| ancestor != location_path)
|
||||
.map(|ancestor| {
|
||||
IsolatedFilePathData::new(location_id, location_path, ancestor, true).map(
|
||||
|iso_file_path| {
|
||||
let materialized_path = iso_file_path
|
||||
.materialized_path_for_children()
|
||||
.expect("each ancestor is a directory");
|
||||
|
||||
(iso_file_path, materialized_path)
|
||||
},
|
||||
)
|
||||
})
|
||||
.collect::<Result<HashMap<_, _>, _>>()?;
|
||||
|
||||
let mut pub_id_by_ancestor_materialized_path = db
|
||||
._batch(chunk_db_queries(ancestors.keys(), db))
|
||||
.await?
|
||||
.into_iter()
|
||||
.flatten()
|
||||
.filter_map(|mut file_path| {
|
||||
let pub_id = mem::take(&mut file_path.pub_id);
|
||||
IsolatedFilePathData::try_from(file_path)
|
||||
.map_err(|e| {
|
||||
errors.push(
|
||||
NonCriticalIndexerError::MissingFilePathData(format!(
|
||||
"Found a file_path missing data: <pub_id='{:#?}'>, error: {e:#?}",
|
||||
from_bytes_to_uuid(&pub_id)
|
||||
))
|
||||
.into(),
|
||||
);
|
||||
})
|
||||
.map(|iso_file_path| {
|
||||
(
|
||||
iso_file_path
|
||||
.materialized_path_for_children()
|
||||
.expect("we know it's a directory"),
|
||||
(pub_id, 0),
|
||||
)
|
||||
})
|
||||
.ok()
|
||||
})
|
||||
.collect::<HashMap<_, _>>();
|
||||
|
||||
compute_sizes(
|
||||
location_id,
|
||||
ancestors.values().cloned().collect(),
|
||||
&mut pub_id_by_ancestor_materialized_path,
|
||||
db,
|
||||
errors,
|
||||
)
|
||||
.await?;
|
||||
|
||||
let to_sync_and_update = ancestors
|
||||
.into_values()
|
||||
.filter_map(|materialized_path| {
|
||||
if let Some((pub_id, size)) =
|
||||
pub_id_by_ancestor_materialized_path.remove(&materialized_path)
|
||||
{
|
||||
let size_bytes = size_in_bytes_to_db(size);
|
||||
|
||||
Some((
|
||||
sync.shared_update(
|
||||
prisma_sync::file_path::SyncId {
|
||||
pub_id: pub_id.clone(),
|
||||
},
|
||||
file_path::size_in_bytes_bytes::NAME,
|
||||
msgpack!(size_bytes),
|
||||
),
|
||||
db.file_path().update(
|
||||
file_path::pub_id::equals(pub_id),
|
||||
vec![file_path::size_in_bytes_bytes::set(Some(size_bytes))],
|
||||
),
|
||||
))
|
||||
} else {
|
||||
warn!("Got a missing ancestor for a file_path in the database, maybe we have a corruption");
|
||||
None
|
||||
}
|
||||
})
|
||||
.unzip::<_, _, Vec<_>, Vec<_>>();
|
||||
|
||||
sync.write_ops(db, to_sync_and_update).await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn compute_sizes(
|
||||
location_id: location::id::Type,
|
||||
materialized_paths: Vec<String>,
|
||||
pub_id_by_ancestor_materialized_path: &mut HashMap<String, (file_path::pub_id::Type, u64)>,
|
||||
db: &PrismaClient,
|
||||
errors: &mut Vec<NonCriticalJobError>,
|
||||
) -> Result<(), IndexerError> {
|
||||
db.file_path()
|
||||
.find_many(vec![
|
||||
file_path::location_id::equals(Some(location_id)),
|
||||
file_path::materialized_path::in_vec(materialized_paths),
|
||||
])
|
||||
.select(file_path::select!({ pub_id materialized_path size_in_bytes_bytes }))
|
||||
.exec()
|
||||
.await?
|
||||
.into_iter()
|
||||
.for_each(|file_path| {
|
||||
if let Some(materialized_path) = file_path.materialized_path {
|
||||
if let Some((_, size)) =
|
||||
pub_id_by_ancestor_materialized_path.get_mut(&materialized_path)
|
||||
{
|
||||
*size += file_path.size_in_bytes_bytes.map_or_else(
|
||||
|| {
|
||||
warn!("Got a directory missing its size in bytes");
|
||||
0
|
||||
},
|
||||
|size_in_bytes_bytes| size_in_bytes_from_db(&size_in_bytes_bytes),
|
||||
);
|
||||
}
|
||||
} else {
|
||||
errors.push(
|
||||
NonCriticalIndexerError::MissingFilePathData(format!(
|
||||
"Corrupt database possessing a file_path entry without materialized_path: <pub_id='{:#?}'>",
|
||||
from_bytes_to_uuid(&file_path.pub_id)
|
||||
))
|
||||
.into(),
|
||||
);
|
||||
}
|
||||
});
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
struct IsoFilePathFactory {
|
||||
pub location_id: location::id::Type,
|
||||
pub location_path: Arc<PathBuf>,
|
||||
}
|
||||
|
||||
impl walker::IsoFilePathFactory for IsoFilePathFactory {
|
||||
fn build(
|
||||
&self,
|
||||
path: impl AsRef<Path>,
|
||||
is_dir: bool,
|
||||
) -> Result<IsolatedFilePathData<'static>, FilePathError> {
|
||||
IsolatedFilePathData::new(self.location_id, self.location_path.as_ref(), path, is_dir)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
struct WalkerDBProxy {
|
||||
location_id: location::id::Type,
|
||||
db: Arc<PrismaClient>,
|
||||
}
|
||||
|
||||
impl walker::WalkerDBProxy for WalkerDBProxy {
|
||||
async fn fetch_file_paths(
|
||||
&self,
|
||||
found_paths: Vec<file_path::WhereParam>,
|
||||
) -> Result<Vec<file_path_walker::Data>, IndexerError> {
|
||||
// Each found path is a AND with 4 terms, and SQLite has a expression tree limit of 1000 terms
|
||||
// so we will use chunks of 200 just to be safe
|
||||
self.db
|
||||
._batch(
|
||||
found_paths
|
||||
.into_iter()
|
||||
.chunks(200)
|
||||
.into_iter()
|
||||
.map(|founds| {
|
||||
self.db
|
||||
.file_path()
|
||||
.find_many(vec![or(founds.collect::<Vec<_>>())])
|
||||
.select(file_path_walker::select())
|
||||
})
|
||||
.collect::<Vec<_>>(),
|
||||
)
|
||||
.await
|
||||
.map(|fetched| fetched.into_iter().flatten().collect::<Vec<_>>())
|
||||
.map_err(Into::into)
|
||||
}
|
||||
|
||||
async fn fetch_file_paths_to_remove(
|
||||
&self,
|
||||
parent_iso_file_path: &IsolatedFilePathData<'_>,
|
||||
unique_location_id_materialized_path_name_extension_params: Vec<file_path::WhereParam>,
|
||||
) -> Result<Vec<file_path_pub_and_cas_ids::Data>, NonCriticalIndexerError> {
|
||||
// NOTE: This batch size can be increased if we wish to trade memory for more performance
|
||||
const BATCH_SIZE: i64 = 1000;
|
||||
|
||||
let founds_ids = self
|
||||
.db
|
||||
._batch(
|
||||
unique_location_id_materialized_path_name_extension_params
|
||||
.into_iter()
|
||||
.chunks(200)
|
||||
.into_iter()
|
||||
.map(|unique_params| {
|
||||
self.db
|
||||
.file_path()
|
||||
.find_many(vec![or(unique_params.collect())])
|
||||
.select(file_path::select!({ id }))
|
||||
})
|
||||
.collect::<Vec<_>>(),
|
||||
)
|
||||
.await
|
||||
.map(|founds_chunk| {
|
||||
founds_chunk
|
||||
.into_iter()
|
||||
.flat_map(|file_paths| file_paths.into_iter().map(|file_path| file_path.id))
|
||||
.collect::<HashSet<_>>()
|
||||
})
|
||||
.map_err(|e| NonCriticalIndexerError::FetchAlreadyExistingFilePathIds(e.to_string()))?;
|
||||
|
||||
let mut to_remove = vec![];
|
||||
let mut cursor = 1;
|
||||
|
||||
loop {
|
||||
let found = self
|
||||
.db
|
||||
.file_path()
|
||||
.find_many(vec![
|
||||
file_path::location_id::equals(Some(self.location_id)),
|
||||
file_path::materialized_path::equals(Some(
|
||||
parent_iso_file_path
|
||||
.materialized_path_for_children()
|
||||
.expect("the received isolated file path must be from a directory"),
|
||||
)),
|
||||
])
|
||||
.order_by(file_path::id::order(SortOrder::Asc))
|
||||
.take(BATCH_SIZE)
|
||||
.cursor(file_path::id::equals(cursor))
|
||||
.select(file_path_pub_and_cas_ids::select())
|
||||
.exec()
|
||||
.await
|
||||
.map_err(|e| NonCriticalIndexerError::FetchFilePathsToRemove(e.to_string()))?;
|
||||
|
||||
#[allow(clippy::cast_possible_truncation)] // Safe because we are using a constant
|
||||
let should_stop = found.len() < BATCH_SIZE as usize;
|
||||
|
||||
if let Some(last) = found.last() {
|
||||
cursor = last.id;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
|
||||
to_remove.extend(
|
||||
found
|
||||
.into_iter()
|
||||
.filter(|file_path| !founds_ids.contains(&file_path.id)),
|
||||
);
|
||||
|
||||
if should_stop {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(to_remove)
|
||||
}
|
||||
}
|
261
core/crates/heavy-lifting/src/indexer/shallow.rs
Normal file
261
core/crates/heavy-lifting/src/indexer/shallow.rs
Normal file
|
@ -0,0 +1,261 @@
|
|||
use crate::{Error, NonCriticalJobError};
|
||||
|
||||
use sd_core_indexer_rules::{IndexerRule, IndexerRuler};
|
||||
use sd_core_prisma_helpers::location_with_indexer_rules;
|
||||
use sd_core_sync::Manager as SyncManager;
|
||||
|
||||
use sd_prisma::prisma::PrismaClient;
|
||||
use sd_task_system::{BaseTaskDispatcher, CancelTaskOnDrop, IntoTask, TaskDispatcher, TaskOutput};
|
||||
use sd_utils::db::maybe_missing;
|
||||
|
||||
use std::{
|
||||
collections::HashMap,
|
||||
path::{Path, PathBuf},
|
||||
sync::Arc,
|
||||
};
|
||||
|
||||
use futures_concurrency::future::TryJoin;
|
||||
use itertools::Itertools;
|
||||
use tracing::{debug, warn};
|
||||
|
||||
use super::{
|
||||
determine_initial_walk_path, remove_non_existing_file_paths, reverse_update_directories_sizes,
|
||||
tasks::{
|
||||
saver::{SaveTask, SaveTaskOutput},
|
||||
updater::{UpdateTask, UpdateTaskOutput},
|
||||
walker::{ToWalkEntry, WalkDirTask, WalkTaskOutput, WalkedEntry},
|
||||
},
|
||||
update_directory_sizes, update_location_size, IndexerError, IsoFilePathFactory, WalkerDBProxy,
|
||||
BATCH_SIZE,
|
||||
};
|
||||
|
||||
pub async fn shallow(
|
||||
location: location_with_indexer_rules::Data,
|
||||
sub_path: impl AsRef<Path> + Send,
|
||||
dispatcher: BaseTaskDispatcher<Error>,
|
||||
db: Arc<PrismaClient>,
|
||||
sync: Arc<SyncManager>,
|
||||
invalidate_query: impl Fn(&'static str) + Send + Sync,
|
||||
) -> Result<Vec<NonCriticalJobError>, Error> {
|
||||
let sub_path = sub_path.as_ref();
|
||||
|
||||
let location_path = maybe_missing(&location.path, "location.path")
|
||||
.map(PathBuf::from)
|
||||
.map(Arc::new)
|
||||
.map_err(IndexerError::from)?;
|
||||
|
||||
let to_walk_path = Arc::new(
|
||||
determine_initial_walk_path(location.id, &Some(sub_path), &*location_path, &db).await?,
|
||||
);
|
||||
|
||||
let Some(WalkTaskOutput {
|
||||
to_create,
|
||||
to_update,
|
||||
to_remove,
|
||||
mut errors,
|
||||
directory_iso_file_path,
|
||||
total_size,
|
||||
..
|
||||
}) = walk(
|
||||
&location,
|
||||
Arc::clone(&location_path),
|
||||
Arc::clone(&to_walk_path),
|
||||
Arc::clone(&db),
|
||||
&dispatcher,
|
||||
)
|
||||
.await?
|
||||
else {
|
||||
return Ok(vec![]);
|
||||
};
|
||||
|
||||
let removed_count = remove_non_existing_file_paths(to_remove, &db, &sync).await?;
|
||||
|
||||
let Some(Metadata {
|
||||
indexed_count,
|
||||
updated_count,
|
||||
}) = save_and_update(
|
||||
&location,
|
||||
to_create,
|
||||
to_update,
|
||||
Arc::clone(&db),
|
||||
Arc::clone(&sync),
|
||||
&dispatcher,
|
||||
)
|
||||
.await?
|
||||
else {
|
||||
return Ok(errors);
|
||||
};
|
||||
|
||||
if indexed_count > 0 || removed_count > 0 || updated_count > 0 {
|
||||
update_directory_sizes(
|
||||
HashMap::from([(directory_iso_file_path, total_size)]),
|
||||
&db,
|
||||
&sync,
|
||||
)
|
||||
.await?;
|
||||
|
||||
if to_walk_path != location_path {
|
||||
reverse_update_directories_sizes(
|
||||
&*to_walk_path,
|
||||
location.id,
|
||||
&*location_path,
|
||||
&db,
|
||||
&sync,
|
||||
&mut errors,
|
||||
)
|
||||
.await?;
|
||||
}
|
||||
|
||||
update_location_size(location.id, &db, &invalidate_query).await?;
|
||||
}
|
||||
|
||||
if indexed_count > 0 || removed_count > 0 {
|
||||
invalidate_query("search.paths");
|
||||
}
|
||||
|
||||
Ok(errors)
|
||||
}
|
||||
|
||||
async fn walk(
|
||||
location: &location_with_indexer_rules::Data,
|
||||
location_path: Arc<PathBuf>,
|
||||
to_walk_path: Arc<PathBuf>,
|
||||
db: Arc<PrismaClient>,
|
||||
dispatcher: &BaseTaskDispatcher<Error>,
|
||||
) -> Result<Option<WalkTaskOutput>, Error> {
|
||||
match dispatcher
|
||||
.dispatch(WalkDirTask::new(
|
||||
ToWalkEntry::from(&*to_walk_path),
|
||||
to_walk_path,
|
||||
location
|
||||
.indexer_rules
|
||||
.iter()
|
||||
.map(|rule| IndexerRule::try_from(&rule.indexer_rule))
|
||||
.collect::<Result<Vec<_>, _>>()
|
||||
.map(IndexerRuler::new)
|
||||
.map_err(IndexerError::from)?,
|
||||
IsoFilePathFactory {
|
||||
location_id: location.id,
|
||||
location_path,
|
||||
},
|
||||
WalkerDBProxy {
|
||||
location_id: location.id,
|
||||
db,
|
||||
},
|
||||
None::<BaseTaskDispatcher<Error>>,
|
||||
)?)
|
||||
.await
|
||||
.await?
|
||||
{
|
||||
sd_task_system::TaskStatus::Done((_, TaskOutput::Out(data))) => Ok(Some(
|
||||
*data
|
||||
.downcast::<WalkTaskOutput>()
|
||||
.expect("we just dispatched this task"),
|
||||
)),
|
||||
sd_task_system::TaskStatus::Done((_, TaskOutput::Empty)) => {
|
||||
warn!("Shallow indexer's walker task finished without any output");
|
||||
Ok(None)
|
||||
}
|
||||
sd_task_system::TaskStatus::Error(e) => Err(e),
|
||||
|
||||
sd_task_system::TaskStatus::Shutdown(_) => {
|
||||
debug!("Spacedrive is shuting down while a shallow indexer was in progress");
|
||||
Ok(None)
|
||||
}
|
||||
sd_task_system::TaskStatus::Canceled | sd_task_system::TaskStatus::ForcedAbortion => {
|
||||
unreachable!("WalkDirTask on shallow indexer can never be canceled or aborted")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct Metadata {
|
||||
indexed_count: u64,
|
||||
updated_count: u64,
|
||||
}
|
||||
|
||||
async fn save_and_update(
|
||||
location: &location_with_indexer_rules::Data,
|
||||
to_create: Vec<WalkedEntry>,
|
||||
to_update: Vec<WalkedEntry>,
|
||||
db: Arc<PrismaClient>,
|
||||
sync: Arc<SyncManager>,
|
||||
dispatcher: &BaseTaskDispatcher<Error>,
|
||||
) -> Result<Option<Metadata>, Error> {
|
||||
let save_and_update_tasks = to_create
|
||||
.into_iter()
|
||||
.chunks(BATCH_SIZE)
|
||||
.into_iter()
|
||||
.map(|chunk| {
|
||||
SaveTask::new(
|
||||
location.id,
|
||||
location.pub_id.clone(),
|
||||
chunk.collect::<Vec<_>>(),
|
||||
Arc::clone(&db),
|
||||
Arc::clone(&sync),
|
||||
)
|
||||
})
|
||||
.map(IntoTask::into_task)
|
||||
.chain(
|
||||
to_update
|
||||
.into_iter()
|
||||
.chunks(BATCH_SIZE)
|
||||
.into_iter()
|
||||
.map(|chunk| {
|
||||
UpdateTask::new(
|
||||
chunk.collect::<Vec<_>>(),
|
||||
Arc::clone(&db),
|
||||
Arc::clone(&sync),
|
||||
)
|
||||
})
|
||||
.map(IntoTask::into_task),
|
||||
)
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let mut metadata = Metadata {
|
||||
indexed_count: 0,
|
||||
updated_count: 0,
|
||||
};
|
||||
|
||||
for task_status in dispatcher
|
||||
.dispatch_many_boxed(save_and_update_tasks)
|
||||
.await
|
||||
.into_iter()
|
||||
.map(CancelTaskOnDrop)
|
||||
.collect::<Vec<_>>()
|
||||
.try_join()
|
||||
.await?
|
||||
{
|
||||
match task_status {
|
||||
sd_task_system::TaskStatus::Done((_, TaskOutput::Out(data))) => {
|
||||
if data.is::<SaveTaskOutput>() {
|
||||
metadata.indexed_count += data
|
||||
.downcast::<SaveTaskOutput>()
|
||||
.expect("just checked")
|
||||
.saved_count;
|
||||
} else {
|
||||
metadata.updated_count += data
|
||||
.downcast::<UpdateTaskOutput>()
|
||||
.expect("just checked")
|
||||
.updated_count;
|
||||
}
|
||||
}
|
||||
sd_task_system::TaskStatus::Done((_, TaskOutput::Empty)) => {
|
||||
warn!("Shallow indexer's saver or updater task finished without any output");
|
||||
return Ok(None);
|
||||
}
|
||||
sd_task_system::TaskStatus::Error(e) => return Err(e),
|
||||
|
||||
sd_task_system::TaskStatus::Shutdown(_) => {
|
||||
debug!("Spacedrive is shuting down while a shallow indexer was in progress");
|
||||
return Ok(None);
|
||||
}
|
||||
sd_task_system::TaskStatus::Canceled | sd_task_system::TaskStatus::ForcedAbortion => {
|
||||
unreachable!(
|
||||
"Save or Updater tasks on shallow indexer can never be canceled or aborted"
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(Some(metadata))
|
||||
}
|
3
core/crates/heavy-lifting/src/indexer/tasks/mod.rs
Normal file
3
core/crates/heavy-lifting/src/indexer/tasks/mod.rs
Normal file
|
@ -0,0 +1,3 @@
|
|||
pub mod saver;
|
||||
pub mod updater;
|
||||
pub mod walker;
|
218
core/crates/heavy-lifting/src/indexer/tasks/saver.rs
Normal file
218
core/crates/heavy-lifting/src/indexer/tasks/saver.rs
Normal file
|
@ -0,0 +1,218 @@
|
|||
use crate::{indexer::IndexerError, Error};
|
||||
|
||||
use sd_core_file_path_helper::IsolatedFilePathDataParts;
|
||||
use sd_core_sync::Manager as SyncManager;
|
||||
|
||||
use sd_prisma::{
|
||||
prisma::{file_path, location, PrismaClient},
|
||||
prisma_sync,
|
||||
};
|
||||
use sd_sync::{sync_db_entry, OperationFactory};
|
||||
use sd_task_system::{ExecStatus, Interrupter, IntoAnyTaskOutput, SerializableTask, Task, TaskId};
|
||||
use sd_utils::{db::inode_to_db, msgpack};
|
||||
|
||||
use std::{sync::Arc, time::Duration};
|
||||
|
||||
use chrono::Utc;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use tokio::time::Instant;
|
||||
use tracing::trace;
|
||||
|
||||
use super::walker::WalkedEntry;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct SaveTask {
|
||||
id: TaskId,
|
||||
location_id: location::id::Type,
|
||||
location_pub_id: location::pub_id::Type,
|
||||
walked_entries: Vec<WalkedEntry>,
|
||||
db: Arc<PrismaClient>,
|
||||
sync: Arc<SyncManager>,
|
||||
}
|
||||
|
||||
impl SaveTask {
|
||||
#[must_use]
|
||||
pub fn new(
|
||||
location_id: location::id::Type,
|
||||
location_pub_id: location::pub_id::Type,
|
||||
walked_entries: Vec<WalkedEntry>,
|
||||
db: Arc<PrismaClient>,
|
||||
sync: Arc<SyncManager>,
|
||||
) -> Self {
|
||||
Self {
|
||||
id: TaskId::new_v4(),
|
||||
location_id,
|
||||
location_pub_id,
|
||||
walked_entries,
|
||||
db,
|
||||
sync,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
struct SaveTaskSaveState {
|
||||
id: TaskId,
|
||||
location_id: location::id::Type,
|
||||
location_pub_id: location::pub_id::Type,
|
||||
walked_entries: Vec<WalkedEntry>,
|
||||
}
|
||||
|
||||
impl SerializableTask<Error> for SaveTask {
|
||||
type SerializeError = rmp_serde::encode::Error;
|
||||
|
||||
type DeserializeError = rmp_serde::decode::Error;
|
||||
|
||||
type DeserializeCtx = (Arc<PrismaClient>, Arc<SyncManager>);
|
||||
|
||||
async fn serialize(self) -> Result<Vec<u8>, Self::SerializeError> {
|
||||
let Self {
|
||||
id,
|
||||
location_id,
|
||||
location_pub_id,
|
||||
walked_entries,
|
||||
..
|
||||
} = self;
|
||||
rmp_serde::to_vec_named(&SaveTaskSaveState {
|
||||
id,
|
||||
location_id,
|
||||
location_pub_id,
|
||||
walked_entries,
|
||||
})
|
||||
}
|
||||
|
||||
async fn deserialize(
|
||||
data: &[u8],
|
||||
(db, sync): Self::DeserializeCtx,
|
||||
) -> Result<Self, Self::DeserializeError> {
|
||||
rmp_serde::from_slice(data).map(
|
||||
|SaveTaskSaveState {
|
||||
id,
|
||||
location_id,
|
||||
location_pub_id,
|
||||
walked_entries,
|
||||
}| Self {
|
||||
id,
|
||||
location_id,
|
||||
location_pub_id,
|
||||
walked_entries,
|
||||
db,
|
||||
sync,
|
||||
},
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct SaveTaskOutput {
|
||||
pub saved_count: u64,
|
||||
pub save_duration: Duration,
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl Task<Error> for SaveTask {
|
||||
fn id(&self) -> TaskId {
|
||||
self.id
|
||||
}
|
||||
|
||||
async fn run(&mut self, _: &Interrupter) -> Result<ExecStatus, Error> {
|
||||
use file_path::{
|
||||
create_unchecked, date_created, date_indexed, date_modified, extension, hidden, inode,
|
||||
is_dir, location, location_id, materialized_path, name, size_in_bytes_bytes,
|
||||
};
|
||||
|
||||
let start_time = Instant::now();
|
||||
|
||||
let Self {
|
||||
location_id,
|
||||
location_pub_id,
|
||||
walked_entries,
|
||||
db,
|
||||
sync,
|
||||
..
|
||||
} = self;
|
||||
|
||||
let (sync_stuff, paths): (Vec<_>, Vec<_>) = walked_entries
|
||||
.drain(..)
|
||||
.map(|entry| {
|
||||
let IsolatedFilePathDataParts {
|
||||
materialized_path,
|
||||
is_dir,
|
||||
name,
|
||||
extension,
|
||||
..
|
||||
} = entry.iso_file_path.to_parts();
|
||||
|
||||
let pub_id = sd_utils::uuid_to_bytes(entry.pub_id);
|
||||
|
||||
let (sync_params, db_params): (Vec<_>, Vec<_>) = [
|
||||
(
|
||||
(
|
||||
location::NAME,
|
||||
msgpack!(prisma_sync::location::SyncId {
|
||||
pub_id: location_pub_id.clone()
|
||||
}),
|
||||
),
|
||||
location_id::set(Some(*location_id)),
|
||||
),
|
||||
sync_db_entry!(materialized_path.to_string(), materialized_path),
|
||||
sync_db_entry!(name.to_string(), name),
|
||||
sync_db_entry!(is_dir, is_dir),
|
||||
sync_db_entry!(extension.to_string(), extension),
|
||||
sync_db_entry!(
|
||||
entry.metadata.size_in_bytes.to_be_bytes().to_vec(),
|
||||
size_in_bytes_bytes
|
||||
),
|
||||
sync_db_entry!(inode_to_db(entry.metadata.inode), inode),
|
||||
{
|
||||
let v = entry.metadata.created_at.into();
|
||||
sync_db_entry!(v, date_created)
|
||||
},
|
||||
{
|
||||
let v = entry.metadata.modified_at.into();
|
||||
sync_db_entry!(v, date_modified)
|
||||
},
|
||||
{
|
||||
let v = Utc::now().into();
|
||||
sync_db_entry!(v, date_indexed)
|
||||
},
|
||||
sync_db_entry!(entry.metadata.hidden, hidden),
|
||||
]
|
||||
.into_iter()
|
||||
.unzip();
|
||||
|
||||
(
|
||||
sync.shared_create(
|
||||
prisma_sync::file_path::SyncId {
|
||||
pub_id: sd_utils::uuid_to_bytes(entry.pub_id),
|
||||
},
|
||||
sync_params,
|
||||
),
|
||||
create_unchecked(pub_id, db_params),
|
||||
)
|
||||
})
|
||||
.unzip();
|
||||
|
||||
#[allow(clippy::cast_sign_loss)]
|
||||
let saved_count = sync
|
||||
.write_ops(
|
||||
db,
|
||||
(
|
||||
sync_stuff.into_iter().flatten().collect(),
|
||||
db.file_path().create_many(paths).skip_duplicates(),
|
||||
),
|
||||
)
|
||||
.await
|
||||
.map_err(IndexerError::from)? as u64;
|
||||
|
||||
trace!("Inserted {saved_count} records");
|
||||
|
||||
Ok(ExecStatus::Done(
|
||||
SaveTaskOutput {
|
||||
saved_count,
|
||||
save_duration: start_time.elapsed(),
|
||||
}
|
||||
.into_output(),
|
||||
))
|
||||
}
|
||||
}
|
236
core/crates/heavy-lifting/src/indexer/tasks/updater.rs
Normal file
236
core/crates/heavy-lifting/src/indexer/tasks/updater.rs
Normal file
|
@ -0,0 +1,236 @@
|
|||
use crate::{indexer::IndexerError, Error};
|
||||
|
||||
use sd_core_file_path_helper::IsolatedFilePathDataParts;
|
||||
use sd_core_sync::Manager as SyncManager;
|
||||
|
||||
use sd_prisma::{
|
||||
prisma::{file_path, object, PrismaClient},
|
||||
prisma_sync,
|
||||
};
|
||||
use sd_sync::{sync_db_entry, OperationFactory};
|
||||
use sd_task_system::{
|
||||
check_interruption, ExecStatus, Interrupter, IntoAnyTaskOutput, SerializableTask, Task, TaskId,
|
||||
};
|
||||
use sd_utils::{chain_optional_iter, db::inode_to_db, msgpack};
|
||||
|
||||
use std::{collections::HashSet, sync::Arc, time::Duration};
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
use tokio::time::Instant;
|
||||
use tracing::trace;
|
||||
|
||||
use super::walker::WalkedEntry;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct UpdateTask {
|
||||
id: TaskId,
|
||||
walked_entries: Vec<WalkedEntry>,
|
||||
object_ids_that_should_be_unlinked: HashSet<object::id::Type>,
|
||||
db: Arc<PrismaClient>,
|
||||
sync: Arc<SyncManager>,
|
||||
}
|
||||
|
||||
impl UpdateTask {
|
||||
#[must_use]
|
||||
pub fn new(
|
||||
walked_entries: Vec<WalkedEntry>,
|
||||
db: Arc<PrismaClient>,
|
||||
sync: Arc<SyncManager>,
|
||||
) -> Self {
|
||||
Self {
|
||||
id: TaskId::new_v4(),
|
||||
walked_entries,
|
||||
db,
|
||||
sync,
|
||||
object_ids_that_should_be_unlinked: HashSet::new(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
struct UpdateTaskSaveState {
|
||||
id: TaskId,
|
||||
walked_entries: Vec<WalkedEntry>,
|
||||
object_ids_that_should_be_unlinked: HashSet<object::id::Type>,
|
||||
}
|
||||
|
||||
impl SerializableTask<Error> for UpdateTask {
|
||||
type SerializeError = rmp_serde::encode::Error;
|
||||
|
||||
type DeserializeError = rmp_serde::decode::Error;
|
||||
|
||||
type DeserializeCtx = (Arc<PrismaClient>, Arc<SyncManager>);
|
||||
|
||||
async fn serialize(self) -> Result<Vec<u8>, Self::SerializeError> {
|
||||
rmp_serde::to_vec_named(&UpdateTaskSaveState {
|
||||
id: self.id,
|
||||
walked_entries: self.walked_entries,
|
||||
object_ids_that_should_be_unlinked: self.object_ids_that_should_be_unlinked,
|
||||
})
|
||||
}
|
||||
|
||||
async fn deserialize(
|
||||
data: &[u8],
|
||||
(db, sync): Self::DeserializeCtx,
|
||||
) -> Result<Self, Self::DeserializeError> {
|
||||
rmp_serde::from_slice(data).map(
|
||||
|UpdateTaskSaveState {
|
||||
id,
|
||||
walked_entries,
|
||||
object_ids_that_should_be_unlinked,
|
||||
}| Self {
|
||||
id,
|
||||
walked_entries,
|
||||
object_ids_that_should_be_unlinked,
|
||||
db,
|
||||
sync,
|
||||
},
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct UpdateTaskOutput {
|
||||
pub updated_count: u64,
|
||||
pub update_duration: Duration,
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl Task<Error> for UpdateTask {
|
||||
fn id(&self) -> TaskId {
|
||||
self.id
|
||||
}
|
||||
|
||||
async fn run(&mut self, interrupter: &Interrupter) -> Result<ExecStatus, Error> {
|
||||
use file_path::{
|
||||
cas_id, date_created, date_modified, hidden, inode, is_dir, object, object_id,
|
||||
size_in_bytes_bytes,
|
||||
};
|
||||
|
||||
let start_time = Instant::now();
|
||||
|
||||
let Self {
|
||||
walked_entries,
|
||||
db,
|
||||
sync,
|
||||
object_ids_that_should_be_unlinked,
|
||||
..
|
||||
} = self;
|
||||
|
||||
fetch_objects_ids_to_unlink(walked_entries, object_ids_that_should_be_unlinked, db).await?;
|
||||
|
||||
check_interruption!(interrupter);
|
||||
|
||||
let (sync_stuff, paths_to_update) = walked_entries
|
||||
.drain(..)
|
||||
.map(|entry| {
|
||||
let IsolatedFilePathDataParts { is_dir, .. } = &entry.iso_file_path.to_parts();
|
||||
|
||||
let pub_id = sd_utils::uuid_to_bytes(entry.pub_id);
|
||||
|
||||
let should_unlink_object = entry.maybe_object_id.map_or(false, |object_id| {
|
||||
object_ids_that_should_be_unlinked.contains(&object_id)
|
||||
});
|
||||
|
||||
let (sync_params, db_params) = chain_optional_iter(
|
||||
[
|
||||
((cas_id::NAME, msgpack!(nil)), cas_id::set(None)),
|
||||
sync_db_entry!(*is_dir, is_dir),
|
||||
sync_db_entry!(
|
||||
entry.metadata.size_in_bytes.to_be_bytes().to_vec(),
|
||||
size_in_bytes_bytes
|
||||
),
|
||||
sync_db_entry!(inode_to_db(entry.metadata.inode), inode),
|
||||
{
|
||||
let v = entry.metadata.created_at.into();
|
||||
sync_db_entry!(v, date_created)
|
||||
},
|
||||
{
|
||||
let v = entry.metadata.modified_at.into();
|
||||
sync_db_entry!(v, date_modified)
|
||||
},
|
||||
sync_db_entry!(entry.metadata.hidden, hidden),
|
||||
],
|
||||
[
|
||||
// As this file was updated while Spacedrive was offline, we mark the object_id and cas_id as null
|
||||
// So this file_path will be updated at file identifier job
|
||||
should_unlink_object
|
||||
.then_some(((object_id::NAME, msgpack!(nil)), object::disconnect())),
|
||||
],
|
||||
)
|
||||
.into_iter()
|
||||
.unzip::<_, _, Vec<_>, Vec<_>>();
|
||||
|
||||
(
|
||||
sync_params
|
||||
.into_iter()
|
||||
.map(|(field, value)| {
|
||||
sync.shared_update(
|
||||
prisma_sync::file_path::SyncId {
|
||||
pub_id: pub_id.clone(),
|
||||
},
|
||||
field,
|
||||
value,
|
||||
)
|
||||
})
|
||||
.collect::<Vec<_>>(),
|
||||
db.file_path()
|
||||
.update(file_path::pub_id::equals(pub_id), db_params)
|
||||
.select(file_path::select!({ id })),
|
||||
)
|
||||
})
|
||||
.unzip::<_, _, Vec<_>, Vec<_>>();
|
||||
|
||||
let updated = sync
|
||||
.write_ops(
|
||||
db,
|
||||
(sync_stuff.into_iter().flatten().collect(), paths_to_update),
|
||||
)
|
||||
.await
|
||||
.map_err(IndexerError::from)?;
|
||||
|
||||
trace!("Updated {updated:?} records");
|
||||
|
||||
Ok(ExecStatus::Done(
|
||||
UpdateTaskOutput {
|
||||
updated_count: updated.len() as u64,
|
||||
update_duration: start_time.elapsed(),
|
||||
}
|
||||
.into_output(),
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
async fn fetch_objects_ids_to_unlink(
|
||||
walked_entries: &[WalkedEntry],
|
||||
object_ids_that_should_be_unlinked: &mut HashSet<object::id::Type>,
|
||||
db: &PrismaClient,
|
||||
) -> Result<(), IndexerError> {
|
||||
if object_ids_that_should_be_unlinked.is_empty() {
|
||||
// First we consult which file paths we should unlink
|
||||
let object_ids = walked_entries
|
||||
.iter()
|
||||
.filter_map(|entry| entry.maybe_object_id)
|
||||
.collect::<HashSet<_>>() // Removing possible duplicates
|
||||
.into_iter()
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
*object_ids_that_should_be_unlinked = db
|
||||
._batch(
|
||||
object_ids
|
||||
.iter()
|
||||
.map(|object_id| {
|
||||
db.file_path()
|
||||
.count(vec![file_path::object_id::equals(Some(*object_id))])
|
||||
})
|
||||
.collect::<Vec<_>>(),
|
||||
)
|
||||
.await?
|
||||
.into_iter()
|
||||
.zip(object_ids)
|
||||
.filter_map(|(count, object_id)| (count > 1).then_some(object_id))
|
||||
.collect::<HashSet<_>>();
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
1516
core/crates/heavy-lifting/src/indexer/tasks/walker.rs
Normal file
1516
core/crates/heavy-lifting/src/indexer/tasks/walker.rs
Normal file
File diff suppressed because it is too large
Load diff
61
core/crates/heavy-lifting/src/job_system/error.rs
Normal file
61
core/crates/heavy-lifting/src/job_system/error.rs
Normal file
|
@ -0,0 +1,61 @@
|
|||
use crate::Error;
|
||||
|
||||
use sd_utils::error::FileIOError;
|
||||
|
||||
use prisma_client_rust::QueryError;
|
||||
|
||||
use super::{job::JobName, report::ReportError, JobId};
|
||||
|
||||
#[derive(thiserror::Error, Debug)]
|
||||
pub enum JobSystemError {
|
||||
#[error("job not found: <id='{0}'>")]
|
||||
NotFound(JobId),
|
||||
#[error("job already running: <new_id='{new_id}', name='{job_name}', already_running_id='{already_running_id}'>")]
|
||||
AlreadyRunning {
|
||||
new_id: JobId,
|
||||
job_name: JobName,
|
||||
already_running_id: JobId,
|
||||
},
|
||||
|
||||
#[error("job canceled: <id='{0}'>")]
|
||||
Canceled(JobId),
|
||||
|
||||
#[error("failed to load job reports from database to resume jobs: {0}")]
|
||||
LoadReportsForResume(#[from] QueryError),
|
||||
|
||||
#[error("failed to serialize job to be saved and resumed later: {0}")]
|
||||
Serialize(#[from] rmp_serde::encode::Error),
|
||||
|
||||
#[error("failed to deserialize job to be resumed: {0}")]
|
||||
Deserialize(#[from] rmp_serde::decode::Error),
|
||||
|
||||
#[error("failed to save or load jobs on disk: {0}")]
|
||||
StoredJobs(FileIOError),
|
||||
|
||||
#[error(transparent)]
|
||||
Report(#[from] ReportError),
|
||||
|
||||
#[error(transparent)]
|
||||
Processing(#[from] Error),
|
||||
}
|
||||
|
||||
impl From<JobSystemError> for rspc::Error {
|
||||
fn from(e: JobSystemError) -> Self {
|
||||
match e {
|
||||
JobSystemError::NotFound(_) => {
|
||||
Self::with_cause(rspc::ErrorCode::NotFound, e.to_string(), e)
|
||||
}
|
||||
JobSystemError::AlreadyRunning { .. } => {
|
||||
Self::with_cause(rspc::ErrorCode::Conflict, e.to_string(), e)
|
||||
}
|
||||
|
||||
JobSystemError::Canceled(_) => {
|
||||
Self::with_cause(rspc::ErrorCode::ClientClosedRequest, e.to_string(), e)
|
||||
}
|
||||
JobSystemError::Processing(e) => e.into(),
|
||||
JobSystemError::Report(e) => e.into(),
|
||||
|
||||
_ => Self::with_cause(rspc::ErrorCode::InternalServerError, e.to_string(), e),
|
||||
}
|
||||
}
|
||||
}
|
784
core/crates/heavy-lifting/src/job_system/job.rs
Normal file
784
core/crates/heavy-lifting/src/job_system/job.rs
Normal file
|
@ -0,0 +1,784 @@
|
|||
use crate::{Error, NonCriticalJobError};
|
||||
|
||||
use sd_core_sync::Manager as SyncManager;
|
||||
|
||||
use sd_prisma::prisma::PrismaClient;
|
||||
use sd_task_system::{
|
||||
BaseTaskDispatcher, Task, TaskDispatcher, TaskHandle, TaskRemoteController, TaskSystemError,
|
||||
};
|
||||
|
||||
use std::{
|
||||
collections::VecDeque,
|
||||
hash::{DefaultHasher, Hash, Hasher},
|
||||
marker::PhantomData,
|
||||
pin::pin,
|
||||
sync::Arc,
|
||||
};
|
||||
|
||||
use async_channel as chan;
|
||||
use chrono::{DateTime, Utc};
|
||||
use futures::{stream, Future, StreamExt};
|
||||
use futures_concurrency::{
|
||||
future::{Join, TryJoin},
|
||||
stream::Merge,
|
||||
};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use specta::Type;
|
||||
use strum::{Display, EnumString};
|
||||
use tokio::spawn;
|
||||
use tracing::{debug, error, info, warn};
|
||||
use uuid::Uuid;
|
||||
|
||||
use super::{
|
||||
report::{
|
||||
Report, ReportBuilder, ReportInputMetadata, ReportMetadata, ReportOutputMetadata, Status,
|
||||
},
|
||||
Command, JobId, JobSystemError, SerializableJob, SerializedTasks,
|
||||
};
|
||||
|
||||
#[derive(
|
||||
Debug, Serialize, Deserialize, EnumString, Display, Clone, Copy, Type, Hash, PartialEq, Eq,
|
||||
)]
|
||||
#[strum(use_phf, serialize_all = "snake_case")]
|
||||
pub enum JobName {
|
||||
Indexer,
|
||||
// TODO: Add more job names as needed
|
||||
}
|
||||
|
||||
pub enum ReturnStatus {
|
||||
Completed(JobReturn),
|
||||
Shutdown(Result<Option<Vec<u8>>, rmp_serde::encode::Error>),
|
||||
Canceled,
|
||||
}
|
||||
|
||||
pub enum ProgressUpdate {
|
||||
TaskCount(u64),
|
||||
CompletedTaskCount(u64),
|
||||
Message(String),
|
||||
Phase(String),
|
||||
}
|
||||
|
||||
impl ProgressUpdate {
|
||||
pub fn message(message: impl Into<String>) -> Self {
|
||||
Self::Message(message.into())
|
||||
}
|
||||
|
||||
pub fn phase(phase: impl Into<String>) -> Self {
|
||||
Self::Phase(phase.into())
|
||||
}
|
||||
}
|
||||
|
||||
pub trait JobContext: Send + Sync + Clone + 'static {
|
||||
fn id(&self) -> Uuid;
|
||||
fn db(&self) -> &Arc<PrismaClient>;
|
||||
fn sync(&self) -> &Arc<SyncManager>;
|
||||
fn invalidate_query(&self, query: &'static str);
|
||||
fn query_invalidator(&self) -> impl Fn(&'static str) + Send + Sync;
|
||||
fn progress(&self, updates: Vec<ProgressUpdate>);
|
||||
fn progress_msg(&self, msg: impl Into<String>) {
|
||||
self.progress(vec![ProgressUpdate::Message(msg.into())]);
|
||||
}
|
||||
}
|
||||
|
||||
pub trait Job: Send + Sync + Hash + 'static {
|
||||
const NAME: JobName;
|
||||
|
||||
#[allow(unused_variables)]
|
||||
fn resume_tasks(
|
||||
&mut self,
|
||||
dispatcher: &JobTaskDispatcher,
|
||||
ctx: &impl JobContext,
|
||||
serialized_tasks: SerializedTasks,
|
||||
) -> impl Future<Output = Result<(), Error>> + Send {
|
||||
async move { Ok(()) }
|
||||
}
|
||||
|
||||
fn run(
|
||||
self,
|
||||
dispatcher: JobTaskDispatcher,
|
||||
ctx: impl JobContext,
|
||||
) -> impl Future<Output = Result<ReturnStatus, Error>> + Send;
|
||||
}
|
||||
|
||||
pub trait IntoJob<J, Ctx>
|
||||
where
|
||||
J: Job + SerializableJob,
|
||||
Ctx: JobContext,
|
||||
{
|
||||
fn into_job(self) -> Box<dyn DynJob<Ctx>>;
|
||||
}
|
||||
|
||||
impl<J, Ctx> IntoJob<J, Ctx> for J
|
||||
where
|
||||
J: Job + SerializableJob,
|
||||
Ctx: JobContext,
|
||||
{
|
||||
fn into_job(self) -> Box<dyn DynJob<Ctx>> {
|
||||
let id = JobId::new_v4();
|
||||
|
||||
Box::new(JobHolder {
|
||||
id,
|
||||
job: self,
|
||||
report: ReportBuilder::new(id, J::NAME).build(),
|
||||
next_jobs: VecDeque::new(),
|
||||
_ctx: PhantomData,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl<J, Ctx> IntoJob<J, Ctx> for JobBuilder<J, Ctx>
|
||||
where
|
||||
J: Job + SerializableJob,
|
||||
Ctx: JobContext,
|
||||
{
|
||||
fn into_job(self) -> Box<dyn DynJob<Ctx>> {
|
||||
self.build()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct JobReturn {
|
||||
data: JobOutputData,
|
||||
metadata: Option<ReportOutputMetadata>,
|
||||
non_critical_errors: Vec<NonCriticalJobError>,
|
||||
}
|
||||
|
||||
impl JobReturn {
|
||||
#[must_use]
|
||||
pub fn builder() -> JobReturnBuilder {
|
||||
JobReturnBuilder {
|
||||
job_return: Self::default(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for JobReturn {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
data: JobOutputData::Empty,
|
||||
metadata: None,
|
||||
non_critical_errors: vec![],
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
pub struct JobReturnBuilder {
|
||||
job_return: JobReturn,
|
||||
}
|
||||
|
||||
impl JobReturnBuilder {
|
||||
#[must_use]
|
||||
pub const fn with_data(mut self, data: JobOutputData) -> Self {
|
||||
self.job_return.data = data;
|
||||
self
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn with_metadata(mut self, metadata: impl Into<ReportOutputMetadata>) -> Self {
|
||||
self.job_return.metadata = Some(metadata.into());
|
||||
self
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn with_non_critical_errors(mut self, errors: Vec<NonCriticalJobError>) -> Self {
|
||||
if self.job_return.non_critical_errors.is_empty() {
|
||||
self.job_return.non_critical_errors = errors;
|
||||
} else {
|
||||
self.job_return.non_critical_errors.extend(errors);
|
||||
}
|
||||
self
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn build(self) -> JobReturn {
|
||||
self.job_return
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Serialize, Type)]
|
||||
pub struct JobOutput {
|
||||
id: JobId,
|
||||
status: Status,
|
||||
job_name: JobName,
|
||||
data: JobOutputData,
|
||||
metadata: Vec<ReportMetadata>,
|
||||
non_critical_errors: Vec<NonCriticalJobError>,
|
||||
}
|
||||
|
||||
impl JobOutput {
|
||||
pub fn prepare_output_and_report(
|
||||
JobReturn {
|
||||
data,
|
||||
metadata,
|
||||
non_critical_errors,
|
||||
}: JobReturn,
|
||||
report: &mut Report,
|
||||
) -> Self {
|
||||
if non_critical_errors.is_empty() {
|
||||
report.status = Status::Completed;
|
||||
debug!("Job<id='{}', name='{}'> completed", report.id, report.name);
|
||||
} else {
|
||||
report.status = Status::CompletedWithErrors;
|
||||
report.non_critical_errors = non_critical_errors
|
||||
.iter()
|
||||
.map(ToString::to_string)
|
||||
.collect();
|
||||
|
||||
warn!(
|
||||
"Job<id='{}', name='{}'> completed with errors: {non_critical_errors:#?}",
|
||||
report.id, report.name
|
||||
);
|
||||
}
|
||||
|
||||
if let Some(metadata) = metadata {
|
||||
report.metadata.push(ReportMetadata::Output(metadata));
|
||||
}
|
||||
|
||||
report.completed_at = Some(Utc::now());
|
||||
|
||||
Self {
|
||||
id: report.id,
|
||||
status: report.status,
|
||||
job_name: report.name,
|
||||
data,
|
||||
metadata: report.metadata.clone(),
|
||||
non_critical_errors,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Type)]
|
||||
pub enum JobOutputData {
|
||||
Empty,
|
||||
// TODO: Add more types
|
||||
}
|
||||
|
||||
pub struct JobBuilder<J, Ctx>
|
||||
where
|
||||
J: Job + SerializableJob,
|
||||
Ctx: JobContext,
|
||||
{
|
||||
id: JobId,
|
||||
job: J,
|
||||
report_builder: ReportBuilder,
|
||||
next_jobs: VecDeque<Box<dyn DynJob<Ctx>>>,
|
||||
_ctx: PhantomData<Ctx>,
|
||||
}
|
||||
|
||||
impl<J, Ctx> JobBuilder<J, Ctx>
|
||||
where
|
||||
J: Job + SerializableJob,
|
||||
Ctx: JobContext,
|
||||
{
|
||||
pub fn build(self) -> Box<JobHolder<J, Ctx>> {
|
||||
Box::new(JobHolder {
|
||||
id: self.id,
|
||||
job: self.job,
|
||||
report: self.report_builder.build(),
|
||||
next_jobs: VecDeque::new(),
|
||||
_ctx: PhantomData,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn new(job: J) -> Self {
|
||||
let id = JobId::new_v4();
|
||||
Self {
|
||||
id,
|
||||
job,
|
||||
report_builder: ReportBuilder::new(id, J::NAME),
|
||||
next_jobs: VecDeque::new(),
|
||||
_ctx: PhantomData,
|
||||
}
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn with_action(mut self, action: impl Into<String>) -> Self {
|
||||
self.report_builder = self.report_builder.with_action(action);
|
||||
self
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn with_parent_id(mut self, parent_id: JobId) -> Self {
|
||||
self.report_builder = self.report_builder.with_parent_id(parent_id);
|
||||
self
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn with_metadata(mut self, metadata: ReportInputMetadata) -> Self {
|
||||
self.report_builder = self.report_builder.with_metadata(metadata);
|
||||
self
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn enqueue_next(mut self, next: impl Job + SerializableJob) -> Self {
|
||||
let next_job_order = self.next_jobs.len() + 1;
|
||||
|
||||
let mut child_job_builder = JobBuilder::new(next).with_parent_id(self.id);
|
||||
|
||||
if let Some(parent_action) = &self.report_builder.action {
|
||||
child_job_builder =
|
||||
child_job_builder.with_action(format!("{parent_action}-{next_job_order}"));
|
||||
}
|
||||
|
||||
self.next_jobs.push_back(child_job_builder.build());
|
||||
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
pub struct JobHolder<J, Ctx>
|
||||
where
|
||||
J: Job + SerializableJob,
|
||||
Ctx: JobContext,
|
||||
{
|
||||
pub(super) id: JobId,
|
||||
pub(super) job: J,
|
||||
pub(super) report: Report,
|
||||
pub(super) next_jobs: VecDeque<Box<dyn DynJob<Ctx>>>,
|
||||
pub(super) _ctx: PhantomData<Ctx>,
|
||||
}
|
||||
|
||||
pub struct JobHandle<Ctx: JobContext> {
|
||||
pub(crate) next_jobs: VecDeque<Box<dyn DynJob<Ctx>>>,
|
||||
pub(crate) job_ctx: Ctx,
|
||||
pub(crate) report: Report,
|
||||
pub(crate) commands_tx: chan::Sender<Command>,
|
||||
}
|
||||
|
||||
impl<Ctx: JobContext> JobHandle<Ctx> {
|
||||
pub async fn send_command(&mut self, command: Command) -> Result<(), JobSystemError> {
|
||||
if self.commands_tx.send(command).await.is_err() {
|
||||
warn!("Tried to send a {command:?} to a job that was already completed");
|
||||
|
||||
Ok(())
|
||||
} else {
|
||||
self.command_children(command).await
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn command_children(&mut self, command: Command) -> Result<(), JobSystemError> {
|
||||
let (new_status, completed_at) = match command {
|
||||
Command::Pause => (Status::Paused, None),
|
||||
Command::Resume => return Ok(()),
|
||||
Command::Cancel => (Status::Canceled, Some(Utc::now())),
|
||||
};
|
||||
|
||||
self.next_jobs
|
||||
.iter_mut()
|
||||
.map(|dyn_job| dyn_job.report_mut())
|
||||
.map(|next_job_report| async {
|
||||
next_job_report.status = new_status;
|
||||
next_job_report.completed_at = completed_at;
|
||||
|
||||
next_job_report.update(self.job_ctx.db()).await
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
.try_join()
|
||||
.await
|
||||
.map(|_| ())
|
||||
.map_err(Into::into)
|
||||
}
|
||||
|
||||
pub async fn register_start(
|
||||
&mut self,
|
||||
start_time: DateTime<Utc>,
|
||||
) -> Result<(), JobSystemError> {
|
||||
let Self {
|
||||
next_jobs,
|
||||
report,
|
||||
job_ctx,
|
||||
..
|
||||
} = self;
|
||||
|
||||
report.status = Status::Running;
|
||||
if report.started_at.is_none() {
|
||||
report.started_at = Some(start_time);
|
||||
}
|
||||
|
||||
let db = job_ctx.db();
|
||||
|
||||
// If the report doesn't have a created_at date, it's a new report
|
||||
if report.created_at.is_none() {
|
||||
report.create(db).await?;
|
||||
} else {
|
||||
// Otherwise it can be a job being resumed or a children job that was already been created
|
||||
report.update(db).await?;
|
||||
}
|
||||
|
||||
// Registering children jobs
|
||||
next_jobs
|
||||
.iter_mut()
|
||||
.map(|dyn_job| dyn_job.report_mut())
|
||||
.map(|next_job_report| async {
|
||||
if next_job_report.created_at.is_none() {
|
||||
next_job_report.create(db).await
|
||||
} else {
|
||||
Ok(())
|
||||
}
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
.try_join()
|
||||
.await
|
||||
.map(|_| ())
|
||||
.map_err(Into::into)
|
||||
}
|
||||
|
||||
pub async fn complete_job(
|
||||
&mut self,
|
||||
job_return: JobReturn,
|
||||
) -> Result<JobOutput, JobSystemError> {
|
||||
let Self {
|
||||
report, job_ctx, ..
|
||||
} = self;
|
||||
|
||||
let output = JobOutput::prepare_output_and_report(job_return, report);
|
||||
|
||||
report.update(job_ctx.db()).await?;
|
||||
|
||||
Ok(output)
|
||||
}
|
||||
|
||||
pub async fn failed_job(&mut self, e: &Error) -> Result<(), JobSystemError> {
|
||||
let Self {
|
||||
report, job_ctx, ..
|
||||
} = self;
|
||||
error!(
|
||||
"Job<id='{}', name='{}'> failed with a critical error: {e:#?};",
|
||||
report.id, report.name
|
||||
);
|
||||
|
||||
report.status = Status::Failed;
|
||||
report.critical_error = Some(e.to_string());
|
||||
report.completed_at = Some(Utc::now());
|
||||
|
||||
report.update(job_ctx.db()).await?;
|
||||
|
||||
self.command_children(Command::Cancel).await
|
||||
}
|
||||
|
||||
pub async fn shutdown_pause_job(&mut self) -> Result<(), JobSystemError> {
|
||||
let Self {
|
||||
report, job_ctx, ..
|
||||
} = self;
|
||||
info!(
|
||||
"Job<id='{}', name='{}'> paused due to system shutdown, we will pause all children jobs",
|
||||
report.id, report.name
|
||||
);
|
||||
|
||||
report.status = Status::Paused;
|
||||
|
||||
report.update(job_ctx.db()).await?;
|
||||
|
||||
self.command_children(Command::Pause).await
|
||||
}
|
||||
|
||||
pub async fn cancel_job(&mut self) -> Result<(), JobSystemError> {
|
||||
let Self {
|
||||
report, job_ctx, ..
|
||||
} = self;
|
||||
info!(
|
||||
"Job<id='{}', name='{}'> canceled, we will cancel all children jobs",
|
||||
report.id, report.name
|
||||
);
|
||||
|
||||
report.status = Status::Canceled;
|
||||
report.completed_at = Some(Utc::now());
|
||||
|
||||
report.update(job_ctx.db()).await?;
|
||||
|
||||
self.command_children(Command::Cancel).await
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
pub trait DynJob<Ctx: JobContext>: Send + Sync + 'static {
|
||||
fn id(&self) -> JobId;
|
||||
|
||||
fn job_name(&self) -> JobName;
|
||||
|
||||
fn hash(&self) -> u64;
|
||||
|
||||
fn report_mut(&mut self) -> &mut Report;
|
||||
|
||||
fn set_next_jobs(&mut self, next_jobs: VecDeque<Box<dyn DynJob<Ctx>>>);
|
||||
|
||||
fn next_jobs(&self) -> &VecDeque<Box<dyn DynJob<Ctx>>>;
|
||||
|
||||
async fn serialize(self: Box<Self>) -> Result<Option<Vec<u8>>, rmp_serde::encode::Error>;
|
||||
|
||||
fn dispatch(
|
||||
self: Box<Self>,
|
||||
base_dispatcher: BaseTaskDispatcher<Error>,
|
||||
job_ctx: Ctx,
|
||||
done_tx: chan::Sender<(JobId, Result<ReturnStatus, Error>)>,
|
||||
) -> JobHandle<Ctx>;
|
||||
|
||||
fn resume(
|
||||
self: Box<Self>,
|
||||
base_dispatcher: BaseTaskDispatcher<Error>,
|
||||
job_ctx: Ctx,
|
||||
serialized_tasks: Option<SerializedTasks>,
|
||||
done_tx: chan::Sender<(JobId, Result<ReturnStatus, Error>)>,
|
||||
) -> JobHandle<Ctx>;
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl<J, Ctx> DynJob<Ctx> for JobHolder<J, Ctx>
|
||||
where
|
||||
J: Job + SerializableJob,
|
||||
Ctx: JobContext,
|
||||
{
|
||||
fn id(&self) -> JobId {
|
||||
self.id
|
||||
}
|
||||
|
||||
fn job_name(&self) -> JobName {
|
||||
J::NAME
|
||||
}
|
||||
|
||||
fn hash(&self) -> u64 {
|
||||
let mut hasher = DefaultHasher::new();
|
||||
J::NAME.hash(&mut hasher);
|
||||
self.job.hash(&mut hasher);
|
||||
hasher.finish()
|
||||
}
|
||||
|
||||
fn report_mut(&mut self) -> &mut Report {
|
||||
&mut self.report
|
||||
}
|
||||
|
||||
fn set_next_jobs(&mut self, next_jobs: VecDeque<Box<dyn DynJob<Ctx>>>) {
|
||||
self.next_jobs = next_jobs;
|
||||
}
|
||||
|
||||
fn next_jobs(&self) -> &VecDeque<Box<dyn DynJob<Ctx>>> {
|
||||
&self.next_jobs
|
||||
}
|
||||
|
||||
async fn serialize(self: Box<Self>) -> Result<Option<Vec<u8>>, rmp_serde::encode::Error> {
|
||||
self.job.serialize().await
|
||||
}
|
||||
|
||||
fn dispatch(
|
||||
self: Box<Self>,
|
||||
base_dispatcher: BaseTaskDispatcher<Error>,
|
||||
job_ctx: Ctx,
|
||||
done_tx: chan::Sender<(JobId, Result<ReturnStatus, Error>)>,
|
||||
) -> JobHandle<Ctx> {
|
||||
let (commands_tx, commands_rx) = chan::bounded(8);
|
||||
|
||||
spawn(to_spawn_job(
|
||||
self.id,
|
||||
self.job,
|
||||
job_ctx.clone(),
|
||||
None,
|
||||
base_dispatcher,
|
||||
commands_rx,
|
||||
done_tx,
|
||||
));
|
||||
|
||||
JobHandle {
|
||||
next_jobs: self.next_jobs,
|
||||
job_ctx,
|
||||
report: self.report,
|
||||
commands_tx,
|
||||
}
|
||||
}
|
||||
|
||||
fn resume(
|
||||
self: Box<Self>,
|
||||
base_dispatcher: BaseTaskDispatcher<Error>,
|
||||
job_ctx: Ctx,
|
||||
serialized_tasks: Option<SerializedTasks>,
|
||||
done_tx: chan::Sender<(JobId, Result<ReturnStatus, Error>)>,
|
||||
) -> JobHandle<Ctx> {
|
||||
let (commands_tx, commands_rx) = chan::bounded(8);
|
||||
|
||||
spawn(to_spawn_job(
|
||||
self.id,
|
||||
self.job,
|
||||
job_ctx.clone(),
|
||||
serialized_tasks,
|
||||
base_dispatcher,
|
||||
commands_rx,
|
||||
done_tx,
|
||||
));
|
||||
|
||||
JobHandle {
|
||||
next_jobs: self.next_jobs,
|
||||
job_ctx,
|
||||
report: self.report,
|
||||
commands_tx,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn to_spawn_job<Ctx: JobContext>(
|
||||
id: JobId,
|
||||
mut job: impl Job,
|
||||
job_ctx: Ctx,
|
||||
existing_tasks: Option<SerializedTasks>,
|
||||
base_dispatcher: BaseTaskDispatcher<Error>,
|
||||
commands_rx: chan::Receiver<Command>,
|
||||
done_tx: chan::Sender<(JobId, Result<ReturnStatus, Error>)>,
|
||||
) {
|
||||
enum StreamMessage {
|
||||
Commands(Command),
|
||||
NewRemoteController(TaskRemoteController),
|
||||
Done(Result<ReturnStatus, Error>),
|
||||
}
|
||||
|
||||
let mut remote_controllers = vec![];
|
||||
|
||||
let (dispatcher, remote_controllers_rx) = JobTaskDispatcher::new(base_dispatcher);
|
||||
|
||||
if let Some(existing_tasks) = existing_tasks {
|
||||
if let Err(e) = job
|
||||
.resume_tasks(&dispatcher, &job_ctx, existing_tasks)
|
||||
.await
|
||||
{
|
||||
done_tx
|
||||
.send((id, Err(e)))
|
||||
.await
|
||||
.expect("jobs done tx closed on error at resume_tasks");
|
||||
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
let mut msgs_stream = pin!((
|
||||
commands_rx.map(StreamMessage::Commands),
|
||||
remote_controllers_rx.map(StreamMessage::NewRemoteController),
|
||||
stream::once(job.run(dispatcher, job_ctx)).map(StreamMessage::Done),
|
||||
)
|
||||
.merge());
|
||||
|
||||
while let Some(msg) = msgs_stream.next().await {
|
||||
match msg {
|
||||
StreamMessage::NewRemoteController(remote_controller) => {
|
||||
remote_controllers.push(remote_controller);
|
||||
}
|
||||
StreamMessage::Commands(command) => {
|
||||
remote_controllers.retain(|controller| !controller.is_done());
|
||||
|
||||
match command {
|
||||
Command::Pause => {
|
||||
remote_controllers
|
||||
.iter()
|
||||
.map(TaskRemoteController::pause)
|
||||
.collect::<Vec<_>>()
|
||||
.join()
|
||||
.await
|
||||
.into_iter()
|
||||
.for_each(|res| {
|
||||
if let Err(e) = res {
|
||||
assert!(matches!(e, TaskSystemError::TaskNotFound(_)));
|
||||
|
||||
warn!("Tried to pause a task that was already completed");
|
||||
}
|
||||
});
|
||||
}
|
||||
Command::Resume => {
|
||||
remote_controllers
|
||||
.iter()
|
||||
.map(TaskRemoteController::resume)
|
||||
.collect::<Vec<_>>()
|
||||
.join()
|
||||
.await
|
||||
.into_iter()
|
||||
.for_each(|res| {
|
||||
if let Err(e) = res {
|
||||
assert!(matches!(e, TaskSystemError::TaskNotFound(_)));
|
||||
|
||||
warn!("Tried to pause a task that was already completed");
|
||||
}
|
||||
});
|
||||
}
|
||||
Command::Cancel => {
|
||||
remote_controllers
|
||||
.iter()
|
||||
.map(TaskRemoteController::cancel)
|
||||
.collect::<Vec<_>>()
|
||||
.join()
|
||||
.await;
|
||||
|
||||
return done_tx
|
||||
.send((id, Ok(ReturnStatus::Canceled)))
|
||||
.await
|
||||
.expect("jobs done tx closed");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
StreamMessage::Done(res) => {
|
||||
#[cfg(debug_assertions)]
|
||||
{
|
||||
// Just a sanity check to make sure we don't have any pending tasks left
|
||||
remote_controllers.retain(|controller| !controller.is_done());
|
||||
assert!(remote_controllers.is_empty());
|
||||
// Using #[cfg(debug_assertions)] to don't pay this retain cost in release builds
|
||||
}
|
||||
|
||||
return done_tx.send((id, res)).await.expect("jobs done tx closed");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct JobTaskDispatcher {
|
||||
dispatcher: BaseTaskDispatcher<Error>,
|
||||
remote_controllers_tx: chan::Sender<TaskRemoteController>,
|
||||
}
|
||||
|
||||
impl TaskDispatcher<Error> for JobTaskDispatcher {
|
||||
async fn dispatch_boxed(&self, boxed_task: Box<dyn Task<Error>>) -> TaskHandle<Error> {
|
||||
let handle = self.dispatcher.dispatch_boxed(boxed_task).await;
|
||||
|
||||
self.remote_controllers_tx
|
||||
.send(handle.remote_controller())
|
||||
.await
|
||||
.expect("remote controllers tx closed");
|
||||
|
||||
handle
|
||||
}
|
||||
|
||||
async fn dispatch_many_boxed(
|
||||
&self,
|
||||
boxed_tasks: impl IntoIterator<Item = Box<dyn Task<Error>>> + Send,
|
||||
) -> Vec<TaskHandle<Error>> {
|
||||
let handles = self.dispatcher.dispatch_many_boxed(boxed_tasks).await;
|
||||
|
||||
for handle in &handles {
|
||||
self.remote_controllers_tx
|
||||
.send(handle.remote_controller())
|
||||
.await
|
||||
.expect("remote controllers tx closed");
|
||||
}
|
||||
|
||||
handles
|
||||
.iter()
|
||||
.map(|handle| self.remote_controllers_tx.send(handle.remote_controller()))
|
||||
.collect::<Vec<_>>()
|
||||
.try_join()
|
||||
.await
|
||||
.expect("remote controllers tx closed");
|
||||
|
||||
handles
|
||||
}
|
||||
}
|
||||
|
||||
impl JobTaskDispatcher {
|
||||
fn new(dispatcher: BaseTaskDispatcher<Error>) -> (Self, chan::Receiver<TaskRemoteController>) {
|
||||
let (remote_controllers_tx, remote_controllers_rx) = chan::unbounded();
|
||||
|
||||
(
|
||||
Self {
|
||||
dispatcher,
|
||||
remote_controllers_tx,
|
||||
},
|
||||
remote_controllers_rx,
|
||||
)
|
||||
}
|
||||
}
|
313
core/crates/heavy-lifting/src/job_system/mod.rs
Normal file
313
core/crates/heavy-lifting/src/job_system/mod.rs
Normal file
|
@ -0,0 +1,313 @@
|
|||
use crate::Error;
|
||||
|
||||
use sd_prisma::prisma::location;
|
||||
use sd_task_system::BaseTaskDispatcher;
|
||||
use sd_utils::error::FileIOError;
|
||||
|
||||
use std::{cell::RefCell, collections::hash_map::HashMap, path::Path, sync::Arc};
|
||||
|
||||
use async_channel as chan;
|
||||
use futures::Stream;
|
||||
use futures_concurrency::future::{Join, TryJoin};
|
||||
use tokio::{fs, spawn, sync::oneshot, task::JoinHandle};
|
||||
use tracing::{error, info, trace, warn};
|
||||
use uuid::Uuid;
|
||||
|
||||
mod error;
|
||||
pub mod job;
|
||||
pub mod report;
|
||||
mod runner;
|
||||
mod store;
|
||||
pub mod utils;
|
||||
|
||||
use error::JobSystemError;
|
||||
use job::{IntoJob, Job, JobContext, JobName, JobOutput};
|
||||
use runner::{run, JobSystemRunner, RunnerMessage};
|
||||
use store::{load_jobs, StoredJobEntry};
|
||||
|
||||
pub use store::{SerializableJob, SerializedTasks};
|
||||
|
||||
const PENDING_JOBS_FILE: &str = "pending_jobs.bin";
|
||||
|
||||
pub type JobId = Uuid;
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub enum Command {
|
||||
Pause,
|
||||
Resume,
|
||||
Cancel,
|
||||
}
|
||||
|
||||
pub struct JobSystem<Ctx: JobContext> {
|
||||
msgs_tx: chan::Sender<RunnerMessage<Ctx>>,
|
||||
job_outputs_rx: chan::Receiver<(JobId, Result<JobOutput, JobSystemError>)>,
|
||||
runner_handle: RefCell<Option<JoinHandle<()>>>,
|
||||
}
|
||||
|
||||
impl<Ctx: JobContext> JobSystem<Ctx> {
|
||||
pub async fn new(
|
||||
base_dispatcher: BaseTaskDispatcher<Error>,
|
||||
data_directory: impl AsRef<Path> + Send,
|
||||
previously_existing_contexts: &HashMap<Uuid, Ctx>,
|
||||
) -> Result<Self, JobSystemError> {
|
||||
let (job_outputs_tx, job_outputs_rx) = chan::unbounded();
|
||||
let (job_return_status_tx, job_return_status_rx) = chan::bounded(16);
|
||||
let (msgs_tx, msgs_rx) = chan::bounded(8);
|
||||
|
||||
let store_jobs_file = Arc::new(data_directory.as_ref().join(PENDING_JOBS_FILE));
|
||||
|
||||
let runner_handle = RefCell::new(Some(spawn({
|
||||
let store_jobs_file = Arc::clone(&store_jobs_file);
|
||||
async move {
|
||||
trace!("Job System Runner starting...");
|
||||
while let Err(e) = spawn({
|
||||
let store_jobs_file = Arc::clone(&store_jobs_file);
|
||||
let base_dispatcher = base_dispatcher.clone();
|
||||
let job_return_status_tx = job_return_status_tx.clone();
|
||||
let job_return_status_rx = job_return_status_rx.clone();
|
||||
let job_outputs_tx = job_outputs_tx.clone();
|
||||
let msgs_rx = msgs_rx.clone();
|
||||
|
||||
async move {
|
||||
run(
|
||||
JobSystemRunner::new(
|
||||
base_dispatcher,
|
||||
job_return_status_tx,
|
||||
job_outputs_tx,
|
||||
),
|
||||
store_jobs_file.as_ref(),
|
||||
msgs_rx,
|
||||
job_return_status_rx,
|
||||
)
|
||||
.await;
|
||||
}
|
||||
})
|
||||
.await
|
||||
{
|
||||
if e.is_panic() {
|
||||
error!("Job system panicked: {e:#?}");
|
||||
} else {
|
||||
trace!("JobSystemRunner received shutdown signal and will exit...");
|
||||
break;
|
||||
}
|
||||
trace!("Restarting JobSystemRunner processing task...");
|
||||
}
|
||||
|
||||
info!("JobSystemRunner gracefully shutdown");
|
||||
}
|
||||
})));
|
||||
|
||||
load_stored_job_entries(
|
||||
store_jobs_file.as_ref(),
|
||||
previously_existing_contexts,
|
||||
&msgs_tx,
|
||||
)
|
||||
.await?;
|
||||
|
||||
Ok(Self {
|
||||
msgs_tx,
|
||||
job_outputs_rx,
|
||||
runner_handle,
|
||||
})
|
||||
}
|
||||
|
||||
/// Checks if *any* of the desired jobs is running for the desired location
|
||||
/// # Panics
|
||||
/// Panics only happen if internal channels are unexpectedly closed
|
||||
pub async fn check_running_jobs(
|
||||
&self,
|
||||
job_names: Vec<JobName>,
|
||||
location_id: location::id::Type,
|
||||
) -> bool {
|
||||
let (ack_tx, ack_rx) = oneshot::channel();
|
||||
|
||||
self.msgs_tx
|
||||
.send(RunnerMessage::CheckIfJobAreRunning {
|
||||
job_names,
|
||||
location_id,
|
||||
ack_tx,
|
||||
})
|
||||
.await
|
||||
.expect("runner msgs channel unexpectedly closed on check running job request");
|
||||
|
||||
ack_rx
|
||||
.await
|
||||
.expect("ack channel closed before receiving check running job response")
|
||||
}
|
||||
|
||||
/// Shutdown the job system
|
||||
/// # Panics
|
||||
/// Panics only happen if internal channels are unexpectedly closed
|
||||
pub async fn shutdown(&self) {
|
||||
if let Some(handle) = self
|
||||
.runner_handle
|
||||
.try_borrow_mut()
|
||||
.ok()
|
||||
.and_then(|mut maybe_handle| maybe_handle.take())
|
||||
{
|
||||
self.msgs_tx
|
||||
.send(RunnerMessage::Shutdown)
|
||||
.await
|
||||
.expect("runner msgs channel unexpectedly closed on shutdown request");
|
||||
|
||||
if let Err(e) = handle.await {
|
||||
if e.is_panic() {
|
||||
error!("JobSystem panicked: {e:#?}");
|
||||
}
|
||||
}
|
||||
info!("JobSystem gracefully shutdown");
|
||||
} else {
|
||||
warn!("JobSystem already shutdown");
|
||||
}
|
||||
}
|
||||
|
||||
/// Dispatch a new job to the system
|
||||
/// # Panics
|
||||
/// Panics only happen if internal channels are unexpectedly closed
|
||||
pub async fn dispatch<J: Job + SerializableJob>(
|
||||
&mut self,
|
||||
job: impl IntoJob<J, Ctx> + Send,
|
||||
location_id: location::id::Type,
|
||||
job_ctx: Ctx,
|
||||
) -> Result<JobId, JobSystemError> {
|
||||
let dyn_job = job.into_job();
|
||||
let id = dyn_job.id();
|
||||
|
||||
let (ack_tx, ack_rx) = oneshot::channel();
|
||||
self.msgs_tx
|
||||
.send(RunnerMessage::NewJob {
|
||||
id,
|
||||
location_id,
|
||||
dyn_job,
|
||||
job_ctx,
|
||||
ack_tx,
|
||||
})
|
||||
.await
|
||||
.expect("runner msgs channel unexpectedly closed on new job request");
|
||||
|
||||
ack_rx
|
||||
.await
|
||||
.expect("ack channel closed before receiving new job request")
|
||||
.map(|()| id)
|
||||
}
|
||||
|
||||
pub fn receive_job_outputs(
|
||||
&self,
|
||||
) -> impl Stream<Item = (JobId, Result<JobOutput, JobSystemError>)> {
|
||||
self.job_outputs_rx.clone()
|
||||
}
|
||||
|
||||
async fn send_command(&self, id: JobId, command: Command) -> Result<(), JobSystemError> {
|
||||
let (ack_tx, ack_rx) = oneshot::channel();
|
||||
self.msgs_tx
|
||||
.send(RunnerMessage::Command {
|
||||
id,
|
||||
command,
|
||||
ack_tx,
|
||||
})
|
||||
.await
|
||||
.unwrap_or_else(|_| {
|
||||
panic!("runner msgs channel unexpectedly closed on {command:?} request")
|
||||
});
|
||||
|
||||
ack_rx
|
||||
.await
|
||||
.unwrap_or_else(|_| panic!("ack channel closed before receiving {command:?} response"))
|
||||
}
|
||||
|
||||
pub async fn pause(&self, id: JobId) -> Result<(), JobSystemError> {
|
||||
self.send_command(id, Command::Pause).await
|
||||
}
|
||||
|
||||
pub async fn resume(&self, id: JobId) -> Result<(), JobSystemError> {
|
||||
self.send_command(id, Command::Resume).await
|
||||
}
|
||||
|
||||
pub async fn cancel(&self, id: JobId) -> Result<(), JobSystemError> {
|
||||
self.send_command(id, Command::Cancel).await
|
||||
}
|
||||
}
|
||||
|
||||
/// SAFETY: Due to usage of refcell we lost `Sync` impl, but we only use it to have a shutdown method
|
||||
/// receiving `&self` which is called once, and we also use `try_borrow_mut` so we never panic
|
||||
unsafe impl<Ctx: JobContext> Sync for JobSystem<Ctx> {}
|
||||
|
||||
async fn load_stored_job_entries<Ctx: JobContext>(
|
||||
store_jobs_file: impl AsRef<Path> + Send,
|
||||
previously_existing_job_contexts: &HashMap<Uuid, Ctx>,
|
||||
msgs_tx: &chan::Sender<RunnerMessage<Ctx>>,
|
||||
) -> Result<(), JobSystemError> {
|
||||
let store_jobs_file = store_jobs_file.as_ref();
|
||||
|
||||
let stores_jobs_by_db = rmp_serde::from_slice::<HashMap<Uuid, Vec<StoredJobEntry>>>(
|
||||
&fs::read(store_jobs_file).await.map_err(|e| {
|
||||
JobSystemError::StoredJobs(FileIOError::from((
|
||||
store_jobs_file,
|
||||
e,
|
||||
"Failed to load jobs from disk",
|
||||
)))
|
||||
})?,
|
||||
)?;
|
||||
|
||||
stores_jobs_by_db
|
||||
.into_iter()
|
||||
.filter_map(|(ctx_id, entries)| {
|
||||
previously_existing_job_contexts.get(&ctx_id).map_or_else(
|
||||
|| {
|
||||
warn!("Found stored jobs for a database that doesn't exist anymore: <ctx_id='{ctx_id}'>");
|
||||
None
|
||||
},
|
||||
|ctx| Some((entries, ctx.clone())),
|
||||
)
|
||||
})
|
||||
.map(|(entries, ctx)| async move {
|
||||
load_jobs(entries, &ctx)
|
||||
.await
|
||||
.map(|stored_jobs| (stored_jobs, ctx))
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
.join()
|
||||
.await
|
||||
.into_iter()
|
||||
.filter_map(|res| {
|
||||
res.map_err(|e| error!("Failed to load stored jobs: {e:#?}"))
|
||||
.ok()
|
||||
})
|
||||
.flat_map(|(stored_jobs, job_ctx)| {
|
||||
stored_jobs
|
||||
.into_iter()
|
||||
.map(move |(location_id, dyn_job, serialized_tasks)| {
|
||||
let job_ctx = job_ctx.clone();
|
||||
async move {
|
||||
let (ack_tx, ack_rx) = oneshot::channel();
|
||||
|
||||
msgs_tx
|
||||
.send(RunnerMessage::ResumeStoredJob {
|
||||
id: dyn_job.id(),
|
||||
location_id,
|
||||
dyn_job,
|
||||
job_ctx,
|
||||
serialized_tasks,
|
||||
ack_tx,
|
||||
})
|
||||
.await
|
||||
.expect("runner msgs channel unexpectedly closed on stored job resume");
|
||||
|
||||
ack_rx.await.expect(
|
||||
"ack channel closed before receiving stored job resume response",
|
||||
)
|
||||
}
|
||||
})
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
.try_join()
|
||||
.await?;
|
||||
|
||||
fs::remove_file(store_jobs_file).await.map_err(|e| {
|
||||
JobSystemError::StoredJobs(FileIOError::from((
|
||||
store_jobs_file,
|
||||
e,
|
||||
"Failed to clean stored jobs file",
|
||||
)))
|
||||
})
|
||||
}
|
359
core/crates/heavy-lifting/src/job_system/report.rs
Normal file
359
core/crates/heavy-lifting/src/job_system/report.rs
Normal file
|
@ -0,0 +1,359 @@
|
|||
use sd_prisma::prisma::{job, PrismaClient};
|
||||
use sd_utils::db::{maybe_missing, MissingFieldError};
|
||||
|
||||
use std::{collections::HashMap, fmt, str::FromStr};
|
||||
|
||||
use chrono::{DateTime, Utc};
|
||||
use prisma_client_rust::QueryError;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use specta::Type;
|
||||
use strum::ParseError;
|
||||
use tracing::error;
|
||||
|
||||
use super::{job::JobName, JobId};
|
||||
|
||||
#[derive(thiserror::Error, Debug)]
|
||||
pub enum ReportError {
|
||||
#[error("failed to create job report in database: {0}")]
|
||||
Create(QueryError),
|
||||
#[error("failed to update job report in database: {0}")]
|
||||
Update(QueryError),
|
||||
#[error("invalid job status integer: {0}")]
|
||||
InvalidJobStatusInt(i32),
|
||||
#[error("job not found in database: <id='{0}'>")]
|
||||
MissingReport(JobId),
|
||||
#[error("serialization error: {0}")]
|
||||
Serialization(#[from] rmp_serde::encode::Error),
|
||||
#[error("deserialization error: {0}")]
|
||||
Deserialization(#[from] rmp_serde::decode::Error),
|
||||
#[error(transparent)]
|
||||
MissingField(#[from] MissingFieldError),
|
||||
#[error("failed to parse job name from database: {0}")]
|
||||
JobNameParse(#[from] ParseError),
|
||||
}
|
||||
|
||||
impl From<ReportError> for rspc::Error {
|
||||
fn from(e: ReportError) -> Self {
|
||||
match e {
|
||||
ReportError::Create(_)
|
||||
| ReportError::Update(_)
|
||||
| ReportError::InvalidJobStatusInt(_) => {
|
||||
Self::with_cause(rspc::ErrorCode::BadRequest, e.to_string(), e)
|
||||
}
|
||||
|
||||
ReportError::MissingReport(_) => {
|
||||
Self::with_cause(rspc::ErrorCode::NotFound, e.to_string(), e)
|
||||
}
|
||||
ReportError::Serialization(_)
|
||||
| ReportError::Deserialization(_)
|
||||
| ReportError::MissingField(_)
|
||||
| ReportError::JobNameParse(_) => {
|
||||
Self::with_cause(rspc::ErrorCode::InternalServerError, e.to_string(), e)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Type, Clone)]
|
||||
pub enum ReportMetadata {
|
||||
Input(ReportInputMetadata),
|
||||
Output(ReportOutputMetadata),
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Type, Clone)]
|
||||
pub enum ReportInputMetadata {
|
||||
Placeholder,
|
||||
// TODO: Add more types
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Type, Clone)]
|
||||
pub enum ReportOutputMetadata {
|
||||
Metrics(HashMap<String, serde_json::Value>),
|
||||
// TODO: Add more types
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Type, Clone)]
|
||||
pub struct Report {
|
||||
pub id: JobId,
|
||||
pub name: JobName,
|
||||
pub action: Option<String>,
|
||||
|
||||
pub metadata: Vec<ReportMetadata>,
|
||||
pub critical_error: Option<String>,
|
||||
pub non_critical_errors: Vec<String>,
|
||||
|
||||
pub created_at: Option<DateTime<Utc>>,
|
||||
pub started_at: Option<DateTime<Utc>>,
|
||||
pub completed_at: Option<DateTime<Utc>>,
|
||||
|
||||
pub parent_id: Option<JobId>,
|
||||
|
||||
pub status: Status,
|
||||
pub task_count: i32,
|
||||
pub completed_task_count: i32,
|
||||
|
||||
pub phase: String,
|
||||
pub message: String,
|
||||
pub estimated_completion: DateTime<Utc>,
|
||||
}
|
||||
|
||||
impl fmt::Display for Report {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(
|
||||
f,
|
||||
"Job <name='{}', uuid='{}'> {:#?}",
|
||||
self.name, self.id, self.status
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
// convert database struct into a resource struct
|
||||
impl TryFrom<job::Data> for Report {
|
||||
type Error = ReportError;
|
||||
|
||||
fn try_from(data: job::Data) -> Result<Self, Self::Error> {
|
||||
Ok(Self {
|
||||
id: JobId::from_slice(&data.id).expect("corrupted database"),
|
||||
name: JobName::from_str(&maybe_missing(data.name, "job.name")?)?,
|
||||
action: data.action,
|
||||
|
||||
metadata: data
|
||||
.metadata
|
||||
.map(|m| {
|
||||
rmp_serde::from_slice(&m).unwrap_or_else(|e| {
|
||||
error!("Failed to deserialize job metadata: {e:#?}");
|
||||
vec![]
|
||||
})
|
||||
})
|
||||
.unwrap_or_default(),
|
||||
critical_error: data.critical_error,
|
||||
non_critical_errors: data.non_critical_errors.map_or_else(
|
||||
Default::default,
|
||||
|non_critical_errors| {
|
||||
serde_json::from_slice(&non_critical_errors).unwrap_or_else(|e| {
|
||||
error!("Failed to deserialize job non-critical errors: {e:#?}");
|
||||
vec![]
|
||||
})
|
||||
},
|
||||
),
|
||||
created_at: data.date_created.map(DateTime::into),
|
||||
started_at: data.date_started.map(DateTime::into),
|
||||
completed_at: data.date_completed.map(DateTime::into),
|
||||
parent_id: data
|
||||
.parent_id
|
||||
.map(|id| JobId::from_slice(&id).expect("corrupted database")),
|
||||
status: Status::try_from(maybe_missing(data.status, "job.status")?)
|
||||
.expect("corrupted database"),
|
||||
task_count: data.task_count.unwrap_or(0),
|
||||
completed_task_count: data.completed_task_count.unwrap_or(0),
|
||||
phase: String::new(),
|
||||
message: String::new(),
|
||||
estimated_completion: data
|
||||
.date_estimated_completion
|
||||
.map_or_else(Utc::now, DateTime::into),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl Report {
|
||||
#[must_use]
|
||||
pub fn new(uuid: JobId, name: JobName) -> Self {
|
||||
Self {
|
||||
id: uuid,
|
||||
name,
|
||||
action: None,
|
||||
created_at: None,
|
||||
started_at: None,
|
||||
completed_at: None,
|
||||
status: Status::Queued,
|
||||
critical_error: None,
|
||||
non_critical_errors: vec![],
|
||||
task_count: 0,
|
||||
metadata: vec![],
|
||||
parent_id: None,
|
||||
completed_task_count: 0,
|
||||
phase: String::new(),
|
||||
message: String::new(),
|
||||
estimated_completion: Utc::now(),
|
||||
}
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn get_action_name_and_group_key(&self) -> (String, Option<String>) {
|
||||
// actions are formatted like "added_location" or "added_location-1"
|
||||
let Some(action_name) = self
|
||||
.action
|
||||
.as_ref()
|
||||
.and_then(|action| action.split('-').next().map(str::to_string))
|
||||
else {
|
||||
return (self.id.to_string(), None);
|
||||
};
|
||||
// create a unique group_key, EG: "added_location-<location_id>"
|
||||
let group_key = self.parent_id.map_or_else(
|
||||
|| format!("{action_name}-{}", self.id),
|
||||
|parent_id| format!("{action_name}-{parent_id}"),
|
||||
);
|
||||
|
||||
(action_name, Some(group_key))
|
||||
}
|
||||
|
||||
pub async fn create(&mut self, db: &PrismaClient) -> Result<(), ReportError> {
|
||||
let now = Utc::now();
|
||||
|
||||
db.job()
|
||||
.create(
|
||||
self.id.as_bytes().to_vec(),
|
||||
sd_utils::chain_optional_iter(
|
||||
[
|
||||
job::name::set(Some(self.name.to_string())),
|
||||
job::action::set(self.action.clone()),
|
||||
job::date_created::set(Some(now.into())),
|
||||
job::metadata::set(Some(rmp_serde::to_vec(&self.metadata)?)),
|
||||
job::status::set(Some(self.status as i32)),
|
||||
job::date_started::set(self.started_at.map(Into::into)),
|
||||
job::task_count::set(Some(1)),
|
||||
job::completed_task_count::set(Some(0)),
|
||||
],
|
||||
[self
|
||||
.parent_id
|
||||
.map(|id| job::parent::connect(job::id::equals(id.as_bytes().to_vec())))],
|
||||
),
|
||||
)
|
||||
.exec()
|
||||
.await
|
||||
.map_err(ReportError::Create)?;
|
||||
|
||||
// Only setting created_at after we successfully created the job in DB
|
||||
self.created_at = Some(now);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn update(&mut self, db: &PrismaClient) -> Result<(), ReportError> {
|
||||
db.job()
|
||||
.update(
|
||||
job::id::equals(self.id.as_bytes().to_vec()),
|
||||
vec![
|
||||
job::status::set(Some(self.status as i32)),
|
||||
job::critical_error::set(self.critical_error.clone()),
|
||||
job::non_critical_errors::set(Some(rmp_serde::to_vec(
|
||||
&self.non_critical_errors,
|
||||
)?)),
|
||||
job::metadata::set(Some(rmp_serde::to_vec(&self.metadata)?)),
|
||||
job::task_count::set(Some(self.task_count)),
|
||||
job::completed_task_count::set(Some(self.completed_task_count)),
|
||||
job::date_started::set(self.started_at.map(Into::into)),
|
||||
job::date_completed::set(self.completed_at.map(Into::into)),
|
||||
],
|
||||
)
|
||||
.exec()
|
||||
.await
|
||||
.map_err(ReportError::Update)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[repr(i32)]
|
||||
#[derive(Debug, Clone, Copy, Serialize, Deserialize, Type, Eq, PartialEq)]
|
||||
pub enum Status {
|
||||
Queued = 0,
|
||||
Running = 1,
|
||||
Completed = 2,
|
||||
Canceled = 3,
|
||||
Failed = 4,
|
||||
Paused = 5,
|
||||
CompletedWithErrors = 6,
|
||||
}
|
||||
|
||||
impl Status {
|
||||
#[must_use]
|
||||
pub const fn is_finished(self) -> bool {
|
||||
matches!(
|
||||
self,
|
||||
Self::Completed
|
||||
| Self::Canceled | Self::Paused
|
||||
| Self::Failed | Self::CompletedWithErrors
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<i32> for Status {
|
||||
type Error = ReportError;
|
||||
|
||||
fn try_from(value: i32) -> Result<Self, Self::Error> {
|
||||
let s = match value {
|
||||
0 => Self::Queued,
|
||||
1 => Self::Running,
|
||||
2 => Self::Completed,
|
||||
3 => Self::Canceled,
|
||||
4 => Self::Failed,
|
||||
5 => Self::Paused,
|
||||
6 => Self::CompletedWithErrors,
|
||||
_ => return Err(Self::Error::InvalidJobStatusInt(value)),
|
||||
};
|
||||
|
||||
Ok(s)
|
||||
}
|
||||
}
|
||||
|
||||
pub struct ReportBuilder {
|
||||
pub id: JobId,
|
||||
pub name: JobName,
|
||||
pub action: Option<String>,
|
||||
pub metadata: Vec<ReportMetadata>,
|
||||
pub parent_id: Option<JobId>,
|
||||
}
|
||||
|
||||
impl ReportBuilder {
|
||||
#[must_use]
|
||||
pub fn build(self) -> Report {
|
||||
Report {
|
||||
id: self.id,
|
||||
name: self.name,
|
||||
action: self.action,
|
||||
created_at: None,
|
||||
started_at: None,
|
||||
completed_at: None,
|
||||
status: Status::Queued,
|
||||
critical_error: None,
|
||||
task_count: 0,
|
||||
non_critical_errors: vec![],
|
||||
metadata: self.metadata,
|
||||
parent_id: self.parent_id,
|
||||
completed_task_count: 0,
|
||||
phase: String::new(),
|
||||
message: String::new(),
|
||||
estimated_completion: Utc::now(),
|
||||
}
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn new(id: JobId, name: JobName) -> Self {
|
||||
Self {
|
||||
id,
|
||||
name,
|
||||
action: None,
|
||||
metadata: vec![],
|
||||
parent_id: None,
|
||||
}
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn with_action(mut self, action: impl Into<String>) -> Self {
|
||||
self.action = Some(action.into());
|
||||
self
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn with_metadata(mut self, metadata: ReportInputMetadata) -> Self {
|
||||
self.metadata.push(ReportMetadata::Input(metadata));
|
||||
self
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub const fn with_parent_id(mut self, parent_id: JobId) -> Self {
|
||||
self.parent_id = Some(parent_id);
|
||||
self
|
||||
}
|
||||
}
|
535
core/crates/heavy-lifting/src/job_system/runner.rs
Normal file
535
core/crates/heavy-lifting/src/job_system/runner.rs
Normal file
|
@ -0,0 +1,535 @@
|
|||
use crate::Error;
|
||||
|
||||
use sd_prisma::prisma::location;
|
||||
use sd_task_system::BaseTaskDispatcher;
|
||||
use sd_utils::error::FileIOError;
|
||||
|
||||
use std::{
|
||||
collections::{hash_map::Entry, HashMap, HashSet},
|
||||
mem,
|
||||
path::Path,
|
||||
pin::pin,
|
||||
time::Duration,
|
||||
};
|
||||
|
||||
use async_channel as chan;
|
||||
use chrono::Utc;
|
||||
use futures::StreamExt;
|
||||
use futures_concurrency::{future::TryJoin, stream::Merge};
|
||||
use tokio::{
|
||||
fs,
|
||||
sync::oneshot,
|
||||
time::{interval_at, Instant},
|
||||
};
|
||||
use tokio_stream::wrappers::IntervalStream;
|
||||
use tracing::{debug, error, info, warn};
|
||||
use uuid::Uuid;
|
||||
|
||||
use super::{
|
||||
job::{DynJob, JobContext, JobHandle, JobName, JobOutput, ReturnStatus},
|
||||
report,
|
||||
store::{StoredJob, StoredJobEntry},
|
||||
Command, JobId, JobSystemError, SerializedTasks,
|
||||
};
|
||||
|
||||
const JOBS_INITIAL_CAPACITY: usize = 32;
|
||||
const FIVE_MINUTES: Duration = Duration::from_secs(5 * 60);
|
||||
|
||||
pub(super) enum RunnerMessage<Ctx: JobContext> {
|
||||
NewJob {
|
||||
id: JobId,
|
||||
location_id: location::id::Type,
|
||||
dyn_job: Box<dyn DynJob<Ctx>>,
|
||||
job_ctx: Ctx,
|
||||
ack_tx: oneshot::Sender<Result<(), JobSystemError>>,
|
||||
},
|
||||
ResumeStoredJob {
|
||||
id: JobId,
|
||||
location_id: location::id::Type,
|
||||
dyn_job: Box<dyn DynJob<Ctx>>,
|
||||
job_ctx: Ctx,
|
||||
serialized_tasks: Option<SerializedTasks>,
|
||||
ack_tx: oneshot::Sender<Result<(), JobSystemError>>,
|
||||
},
|
||||
Command {
|
||||
id: JobId,
|
||||
command: Command,
|
||||
ack_tx: oneshot::Sender<Result<(), JobSystemError>>,
|
||||
},
|
||||
CheckIfJobAreRunning {
|
||||
job_names: Vec<JobName>,
|
||||
location_id: location::id::Type,
|
||||
ack_tx: oneshot::Sender<bool>,
|
||||
},
|
||||
Shutdown,
|
||||
}
|
||||
|
||||
pub(super) struct JobSystemRunner<Ctx: JobContext> {
|
||||
base_dispatcher: BaseTaskDispatcher<Error>,
|
||||
handles: HashMap<JobId, JobHandle<Ctx>>,
|
||||
job_hashes: HashMap<u64, JobId>,
|
||||
job_hashes_by_id: HashMap<JobId, u64>,
|
||||
running_jobs_by_job_id: HashMap<JobId, (JobName, location::id::Type)>,
|
||||
running_jobs_set: HashSet<(JobName, location::id::Type)>,
|
||||
jobs_to_store_by_ctx_id: HashMap<Uuid, Vec<StoredJobEntry>>,
|
||||
job_return_status_tx: chan::Sender<(JobId, Result<ReturnStatus, Error>)>,
|
||||
job_outputs_tx: chan::Sender<(JobId, Result<JobOutput, JobSystemError>)>,
|
||||
}
|
||||
|
||||
impl<Ctx: JobContext> JobSystemRunner<Ctx> {
|
||||
pub(super) fn new(
|
||||
base_dispatcher: BaseTaskDispatcher<Error>,
|
||||
job_return_status_tx: chan::Sender<(JobId, Result<ReturnStatus, Error>)>,
|
||||
job_outputs_tx: chan::Sender<(JobId, Result<JobOutput, JobSystemError>)>,
|
||||
) -> Self {
|
||||
Self {
|
||||
base_dispatcher,
|
||||
handles: HashMap::with_capacity(JOBS_INITIAL_CAPACITY),
|
||||
job_hashes: HashMap::with_capacity(JOBS_INITIAL_CAPACITY),
|
||||
job_hashes_by_id: HashMap::with_capacity(JOBS_INITIAL_CAPACITY),
|
||||
running_jobs_by_job_id: HashMap::with_capacity(JOBS_INITIAL_CAPACITY),
|
||||
running_jobs_set: HashSet::with_capacity(JOBS_INITIAL_CAPACITY),
|
||||
jobs_to_store_by_ctx_id: HashMap::new(),
|
||||
job_return_status_tx,
|
||||
job_outputs_tx,
|
||||
}
|
||||
}
|
||||
|
||||
async fn new_job(
|
||||
&mut self,
|
||||
id: JobId,
|
||||
location_id: location::id::Type,
|
||||
dyn_job: Box<dyn DynJob<Ctx>>,
|
||||
job_ctx: Ctx,
|
||||
maybe_existing_tasks: Option<SerializedTasks>,
|
||||
) -> Result<(), JobSystemError> {
|
||||
let Self {
|
||||
base_dispatcher,
|
||||
handles,
|
||||
job_hashes,
|
||||
job_hashes_by_id,
|
||||
job_return_status_tx,
|
||||
running_jobs_by_job_id,
|
||||
running_jobs_set,
|
||||
..
|
||||
} = self;
|
||||
|
||||
let db = job_ctx.db();
|
||||
let job_name = dyn_job.job_name();
|
||||
|
||||
let job_hash = dyn_job.hash();
|
||||
if let Some(&already_running_id) = job_hashes.get(&job_hash) {
|
||||
return Err(JobSystemError::AlreadyRunning {
|
||||
new_id: id,
|
||||
already_running_id,
|
||||
job_name,
|
||||
});
|
||||
}
|
||||
|
||||
running_jobs_by_job_id.insert(id, (job_name, location_id));
|
||||
running_jobs_set.insert((job_name, location_id));
|
||||
|
||||
job_hashes.insert(job_hash, id);
|
||||
job_hashes_by_id.insert(id, job_hash);
|
||||
|
||||
let start_time = Utc::now();
|
||||
|
||||
let mut handle = if maybe_existing_tasks.is_some() {
|
||||
dyn_job.resume(
|
||||
base_dispatcher.clone(),
|
||||
job_ctx.clone(),
|
||||
maybe_existing_tasks,
|
||||
job_return_status_tx.clone(),
|
||||
)
|
||||
} else {
|
||||
dyn_job.dispatch(
|
||||
base_dispatcher.clone(),
|
||||
job_ctx.clone(),
|
||||
job_return_status_tx.clone(),
|
||||
)
|
||||
};
|
||||
|
||||
handle.report.status = report::Status::Running;
|
||||
if handle.report.started_at.is_none() {
|
||||
handle.report.started_at = Some(start_time);
|
||||
}
|
||||
|
||||
// If the report doesn't have a created_at date, it's a new report
|
||||
if handle.report.created_at.is_none() {
|
||||
handle.report.create(db).await?;
|
||||
} else {
|
||||
// Otherwise it can be a job being resumed or a children job that was already been created
|
||||
handle.report.update(db).await?;
|
||||
}
|
||||
|
||||
// Registering children jobs
|
||||
handle
|
||||
.next_jobs
|
||||
.iter_mut()
|
||||
.map(|dyn_job| dyn_job.report_mut())
|
||||
.map(|next_job_report| async {
|
||||
if next_job_report.created_at.is_none() {
|
||||
next_job_report.create(job_ctx.db()).await
|
||||
} else {
|
||||
Ok(())
|
||||
}
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
.try_join()
|
||||
.await?;
|
||||
|
||||
handles.insert(id, handle);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn process_command(&mut self, id: JobId, command: Command) -> Result<(), JobSystemError> {
|
||||
if let Some(handle) = self.handles.get_mut(&id) {
|
||||
handle.send_command(command).await?;
|
||||
Ok(())
|
||||
} else {
|
||||
Err(JobSystemError::NotFound(id))
|
||||
}
|
||||
}
|
||||
|
||||
fn is_empty(&self) -> bool {
|
||||
self.handles.is_empty() && self.job_hashes.is_empty() && self.job_hashes_by_id.is_empty()
|
||||
}
|
||||
|
||||
fn check_if_job_are_running(
|
||||
&self,
|
||||
job_names: Vec<JobName>,
|
||||
location_id: location::id::Type,
|
||||
) -> bool {
|
||||
job_names
|
||||
.into_iter()
|
||||
.any(|job_name| self.running_jobs_set.contains(&(job_name, location_id)))
|
||||
}
|
||||
|
||||
async fn process_return_status(&mut self, job_id: JobId, status: Result<ReturnStatus, Error>) {
|
||||
let Self {
|
||||
handles,
|
||||
job_hashes,
|
||||
job_hashes_by_id,
|
||||
job_outputs_tx,
|
||||
job_return_status_tx,
|
||||
base_dispatcher,
|
||||
jobs_to_store_by_ctx_id,
|
||||
running_jobs_by_job_id,
|
||||
running_jobs_set,
|
||||
..
|
||||
} = self;
|
||||
|
||||
let job_hash = job_hashes_by_id.remove(&job_id).expect("it must be here");
|
||||
let (job_name, location_id) = running_jobs_by_job_id
|
||||
.remove(&job_id)
|
||||
.expect("a JobName and location_id must've been inserted in the map with the job id");
|
||||
assert!(running_jobs_set.remove(&(job_name, location_id)));
|
||||
|
||||
assert!(job_hashes.remove(&job_hash).is_some());
|
||||
let mut handle = handles.remove(&job_id).expect("it must be here");
|
||||
|
||||
let res = match status {
|
||||
Ok(ReturnStatus::Completed(job_return)) => {
|
||||
try_dispatch_next_job(
|
||||
&mut handle,
|
||||
base_dispatcher.clone(),
|
||||
(job_hashes, job_hashes_by_id),
|
||||
handles,
|
||||
job_return_status_tx.clone(),
|
||||
);
|
||||
|
||||
handle.complete_job(job_return).await
|
||||
}
|
||||
|
||||
Ok(ReturnStatus::Shutdown(Ok(Some(serialized_job)))) => {
|
||||
let name = handle.report.name;
|
||||
|
||||
let Ok(next_jobs) = handle
|
||||
.next_jobs
|
||||
.into_iter()
|
||||
.map(|next_job| async move {
|
||||
let next_id = next_job.id();
|
||||
let next_name = next_job.job_name();
|
||||
next_job
|
||||
.serialize()
|
||||
.await
|
||||
.map(|maybe_serialized_job| {
|
||||
maybe_serialized_job.map(|serialized_job| StoredJob {
|
||||
id: next_id,
|
||||
name: next_name,
|
||||
serialized_job,
|
||||
})
|
||||
})
|
||||
.map_err(|e| {
|
||||
error!(
|
||||
"Failed to serialize next job: \
|
||||
<parent_id='{job_id}', parent_name='{name}', \
|
||||
next_id='{next_id}', next_name='{next_name}'>: {e:#?}"
|
||||
);
|
||||
})
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
.try_join()
|
||||
.await
|
||||
else {
|
||||
return;
|
||||
};
|
||||
|
||||
jobs_to_store_by_ctx_id
|
||||
.entry(handle.job_ctx.id())
|
||||
.or_default()
|
||||
.push(StoredJobEntry {
|
||||
location_id,
|
||||
root_job: StoredJob {
|
||||
id: job_id,
|
||||
name,
|
||||
serialized_job,
|
||||
},
|
||||
next_jobs: next_jobs.into_iter().flatten().collect(),
|
||||
});
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
Ok(ReturnStatus::Shutdown(Ok(None))) => {
|
||||
debug!(
|
||||
"Job was shutdown but didn't returned any serialized data, \
|
||||
probably it isn't resumable job: <id='{job_id}'>"
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
Ok(ReturnStatus::Shutdown(Err(e))) => {
|
||||
error!("Failed to serialize job: {e:#?}");
|
||||
return;
|
||||
}
|
||||
|
||||
Ok(ReturnStatus::Canceled) => handle
|
||||
.cancel_job()
|
||||
.await
|
||||
.and_then(|()| Err(JobSystemError::Canceled(job_id))),
|
||||
|
||||
Err(e) => handle.failed_job(&e).await.and_then(|()| Err(e.into())),
|
||||
};
|
||||
|
||||
job_outputs_tx
|
||||
.send((job_id, res))
|
||||
.await
|
||||
.expect("job outputs channel unexpectedly closed on job completion");
|
||||
}
|
||||
|
||||
fn clean_memory(&mut self) {
|
||||
if self.handles.capacity() > JOBS_INITIAL_CAPACITY
|
||||
&& self.handles.len() < JOBS_INITIAL_CAPACITY
|
||||
{
|
||||
self.handles.shrink_to(JOBS_INITIAL_CAPACITY);
|
||||
}
|
||||
|
||||
if self.job_hashes.capacity() > JOBS_INITIAL_CAPACITY
|
||||
&& self.job_hashes.len() < JOBS_INITIAL_CAPACITY
|
||||
{
|
||||
self.job_hashes.shrink_to(JOBS_INITIAL_CAPACITY);
|
||||
}
|
||||
|
||||
if self.job_hashes_by_id.capacity() > JOBS_INITIAL_CAPACITY
|
||||
&& self.job_hashes_by_id.len() < JOBS_INITIAL_CAPACITY
|
||||
{
|
||||
self.job_hashes_by_id.shrink_to(JOBS_INITIAL_CAPACITY);
|
||||
}
|
||||
|
||||
if self.running_jobs_by_job_id.capacity() > JOBS_INITIAL_CAPACITY
|
||||
&& self.running_jobs_by_job_id.len() < JOBS_INITIAL_CAPACITY
|
||||
{
|
||||
self.running_jobs_by_job_id.shrink_to(JOBS_INITIAL_CAPACITY);
|
||||
}
|
||||
|
||||
if self.running_jobs_set.capacity() > JOBS_INITIAL_CAPACITY
|
||||
&& self.running_jobs_set.len() < JOBS_INITIAL_CAPACITY
|
||||
{
|
||||
self.running_jobs_set.shrink_to(JOBS_INITIAL_CAPACITY);
|
||||
}
|
||||
}
|
||||
|
||||
async fn save_jobs(
|
||||
self,
|
||||
store_jobs_file: impl AsRef<Path> + Send,
|
||||
) -> Result<(), JobSystemError> {
|
||||
let store_jobs_file = store_jobs_file.as_ref();
|
||||
|
||||
let Self {
|
||||
handles,
|
||||
job_hashes,
|
||||
job_hashes_by_id,
|
||||
jobs_to_store_by_ctx_id,
|
||||
..
|
||||
} = self;
|
||||
|
||||
assert!(
|
||||
handles.is_empty() && job_hashes.is_empty() && job_hashes_by_id.is_empty(),
|
||||
"All jobs must be completed before saving"
|
||||
);
|
||||
|
||||
if jobs_to_store_by_ctx_id.is_empty() {
|
||||
info!("No jobs to store in disk for job system shutdown!");
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
fs::write(
|
||||
store_jobs_file,
|
||||
rmp_serde::to_vec_named(&jobs_to_store_by_ctx_id)?,
|
||||
)
|
||||
.await
|
||||
.map_err(|e| JobSystemError::StoredJobs(FileIOError::from((store_jobs_file, e))))
|
||||
}
|
||||
}
|
||||
|
||||
fn try_dispatch_next_job<Ctx: JobContext>(
|
||||
handle: &mut JobHandle<Ctx>,
|
||||
base_dispatcher: BaseTaskDispatcher<Error>,
|
||||
(job_hashes, job_hashes_by_id): (&mut HashMap<u64, JobId>, &mut HashMap<JobId, u64>),
|
||||
handles: &mut HashMap<JobId, JobHandle<Ctx>>,
|
||||
job_return_status_tx: chan::Sender<(JobId, Result<ReturnStatus, Error>)>,
|
||||
) {
|
||||
if let Some(next) = handle.next_jobs.pop_front() {
|
||||
let next_id = next.id();
|
||||
let next_hash = next.hash();
|
||||
if let Entry::Vacant(e) = job_hashes.entry(next_hash) {
|
||||
e.insert(next_id);
|
||||
job_hashes_by_id.insert(next_id, next_hash);
|
||||
let mut next_handle = next.dispatch(
|
||||
base_dispatcher,
|
||||
handle.job_ctx.clone(),
|
||||
job_return_status_tx,
|
||||
);
|
||||
|
||||
assert!(
|
||||
next_handle.next_jobs.is_empty(),
|
||||
"Only the root job will have next jobs, the rest will be empty and \
|
||||
we will swap with remaining ones from the previous job"
|
||||
);
|
||||
|
||||
next_handle.next_jobs = mem::take(&mut handle.next_jobs);
|
||||
|
||||
handles.insert(next_id, next_handle);
|
||||
} else {
|
||||
warn!("Unexpectedly found a job with the same hash as the next job: <id='{next_id}', name='{}'>", next.job_name());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub(super) async fn run<Ctx: JobContext>(
|
||||
mut runner: JobSystemRunner<Ctx>,
|
||||
store_jobs_file: impl AsRef<Path> + Send,
|
||||
msgs_rx: chan::Receiver<RunnerMessage<Ctx>>,
|
||||
job_return_status_rx: chan::Receiver<(JobId, Result<ReturnStatus, Error>)>,
|
||||
) {
|
||||
enum StreamMessage<Ctx: JobContext> {
|
||||
ReturnStatus((JobId, Result<ReturnStatus, Error>)),
|
||||
RunnerMessage(RunnerMessage<Ctx>),
|
||||
CleanMemoryTick,
|
||||
}
|
||||
|
||||
let memory_cleanup_interval = interval_at(Instant::now() + FIVE_MINUTES, FIVE_MINUTES);
|
||||
|
||||
let job_return_status_rx_to_shutdown = job_return_status_rx.clone();
|
||||
|
||||
let mut msg_stream = pin!((
|
||||
msgs_rx.map(StreamMessage::RunnerMessage),
|
||||
job_return_status_rx.map(StreamMessage::ReturnStatus),
|
||||
IntervalStream::new(memory_cleanup_interval).map(|_| StreamMessage::CleanMemoryTick),
|
||||
)
|
||||
.merge());
|
||||
|
||||
while let Some(msg) = msg_stream.next().await {
|
||||
match msg {
|
||||
// Job return status messages
|
||||
StreamMessage::ReturnStatus((job_id, status)) => {
|
||||
runner.process_return_status(job_id, status).await;
|
||||
}
|
||||
|
||||
// Runner messages
|
||||
StreamMessage::RunnerMessage(RunnerMessage::NewJob {
|
||||
id,
|
||||
location_id,
|
||||
dyn_job,
|
||||
job_ctx,
|
||||
ack_tx,
|
||||
}) => {
|
||||
ack_tx
|
||||
.send(
|
||||
runner
|
||||
.new_job(id, location_id, dyn_job, job_ctx, None)
|
||||
.await,
|
||||
)
|
||||
.expect("ack channel closed before sending new job response");
|
||||
}
|
||||
|
||||
StreamMessage::RunnerMessage(RunnerMessage::ResumeStoredJob {
|
||||
id,
|
||||
location_id,
|
||||
dyn_job,
|
||||
job_ctx,
|
||||
serialized_tasks,
|
||||
ack_tx,
|
||||
}) => {
|
||||
ack_tx
|
||||
.send(
|
||||
runner
|
||||
.new_job(id, location_id, dyn_job, job_ctx, serialized_tasks)
|
||||
.await,
|
||||
)
|
||||
.expect("ack channel closed before sending resume job response");
|
||||
}
|
||||
|
||||
StreamMessage::RunnerMessage(RunnerMessage::Command {
|
||||
id,
|
||||
command,
|
||||
ack_tx,
|
||||
}) => {
|
||||
ack_tx
|
||||
.send(runner.process_command(id, command).await)
|
||||
.unwrap_or_else(|_| {
|
||||
panic!("ack channel closed before sending {command:?} response")
|
||||
});
|
||||
}
|
||||
|
||||
StreamMessage::RunnerMessage(RunnerMessage::Shutdown) => {
|
||||
// Consuming all pending return status messages
|
||||
loop {
|
||||
while let Ok((job_id, status)) = job_return_status_rx_to_shutdown.try_recv() {
|
||||
runner.process_return_status(job_id, status).await;
|
||||
}
|
||||
|
||||
if runner.is_empty() {
|
||||
break;
|
||||
}
|
||||
|
||||
debug!("Waiting for all jobs to complete before shutting down...");
|
||||
}
|
||||
|
||||
// Now the runner can shutdown
|
||||
if let Err(e) = runner.save_jobs(store_jobs_file).await {
|
||||
error!("Failed to save jobs before shutting down: {e:#?}");
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
StreamMessage::RunnerMessage(RunnerMessage::CheckIfJobAreRunning {
|
||||
job_names,
|
||||
location_id,
|
||||
ack_tx,
|
||||
}) => {
|
||||
ack_tx
|
||||
.send(runner.check_if_job_are_running(job_names, location_id))
|
||||
.expect("ack channel closed before sending resume job response");
|
||||
}
|
||||
|
||||
// Memory cleanup tick
|
||||
StreamMessage::CleanMemoryTick => {
|
||||
runner.clean_memory();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
219
core/crates/heavy-lifting/src/job_system/store.rs
Normal file
219
core/crates/heavy-lifting/src/job_system/store.rs
Normal file
|
@ -0,0 +1,219 @@
|
|||
use crate::indexer::IndexerJob;
|
||||
|
||||
use sd_prisma::prisma::{job, location};
|
||||
use sd_utils::uuid_to_bytes;
|
||||
|
||||
use std::{
|
||||
collections::{HashMap, VecDeque},
|
||||
future::Future,
|
||||
iter,
|
||||
marker::PhantomData,
|
||||
};
|
||||
|
||||
use futures_concurrency::future::TryJoin;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use super::{
|
||||
job::{DynJob, Job, JobContext, JobHolder, JobName},
|
||||
report::{Report, ReportError},
|
||||
JobId, JobSystemError,
|
||||
};
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct SerializedTasks(pub Vec<u8>);
|
||||
|
||||
pub trait SerializableJob: 'static
|
||||
where
|
||||
Self: Sized,
|
||||
{
|
||||
fn serialize(
|
||||
self,
|
||||
) -> impl Future<Output = Result<Option<Vec<u8>>, rmp_serde::encode::Error>> + Send {
|
||||
async move { Ok(None) }
|
||||
}
|
||||
|
||||
#[allow(unused_variables)]
|
||||
fn deserialize(
|
||||
serialized_job: &[u8],
|
||||
ctx: &impl JobContext,
|
||||
) -> impl Future<
|
||||
Output = Result<Option<(Self, Option<SerializedTasks>)>, rmp_serde::decode::Error>,
|
||||
> + Send {
|
||||
async move { Ok(None) }
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct StoredJob {
|
||||
pub(super) id: JobId,
|
||||
pub(super) name: JobName,
|
||||
pub(super) serialized_job: Vec<u8>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct StoredJobEntry {
|
||||
pub(super) location_id: location::id::Type,
|
||||
pub(super) root_job: StoredJob,
|
||||
pub(super) next_jobs: Vec<StoredJob>,
|
||||
}
|
||||
|
||||
pub async fn load_jobs<Ctx: JobContext>(
|
||||
entries: Vec<StoredJobEntry>,
|
||||
job_ctx: &Ctx,
|
||||
) -> Result<
|
||||
Vec<(
|
||||
location::id::Type,
|
||||
Box<dyn DynJob<Ctx>>,
|
||||
Option<SerializedTasks>,
|
||||
)>,
|
||||
JobSystemError,
|
||||
> {
|
||||
let mut reports = job_ctx
|
||||
.db()
|
||||
.job()
|
||||
.find_many(vec![job::id::in_vec(
|
||||
entries
|
||||
.iter()
|
||||
.flat_map(
|
||||
|StoredJobEntry {
|
||||
root_job: StoredJob { id, .. },
|
||||
next_jobs,
|
||||
..
|
||||
}| { iter::once(*id).chain(next_jobs.iter().map(|StoredJob { id, .. }| *id)) },
|
||||
)
|
||||
.map(uuid_to_bytes)
|
||||
.collect::<Vec<_>>(),
|
||||
)])
|
||||
.exec()
|
||||
.await
|
||||
.map_err(JobSystemError::LoadReportsForResume)?
|
||||
.into_iter()
|
||||
.map(Report::try_from)
|
||||
.map(|report_res| report_res.map(|report| (report.id, report)))
|
||||
.collect::<Result<HashMap<_, _>, _>>()?;
|
||||
|
||||
entries
|
||||
.into_iter()
|
||||
.map(
|
||||
|StoredJobEntry {
|
||||
location_id,
|
||||
root_job,
|
||||
next_jobs,
|
||||
}| {
|
||||
let report = reports
|
||||
.remove(&root_job.id)
|
||||
.ok_or(ReportError::MissingReport(root_job.id))?;
|
||||
|
||||
Ok(async move {
|
||||
load_job(root_job, report, job_ctx)
|
||||
.await
|
||||
.map(|maybe_loaded_job| {
|
||||
maybe_loaded_job
|
||||
.map(|(dyn_job, tasks)| (location_id, dyn_job, tasks, next_jobs))
|
||||
})
|
||||
})
|
||||
},
|
||||
)
|
||||
.collect::<Result<Vec<_>, JobSystemError>>()?
|
||||
.try_join()
|
||||
.await?
|
||||
.into_iter()
|
||||
.flatten()
|
||||
.map(|(location_id, mut dyn_job, tasks, next_jobs)| {
|
||||
let next_jobs_and_reports = next_jobs
|
||||
.into_iter()
|
||||
.map(|next_job| {
|
||||
let next_job_id = next_job.id;
|
||||
reports
|
||||
.remove(&next_job.id)
|
||||
.map(|report| (next_job, report))
|
||||
.ok_or(ReportError::MissingReport(next_job_id))
|
||||
})
|
||||
.collect::<Result<Vec<_>, _>>()?;
|
||||
|
||||
Ok(async move {
|
||||
next_jobs_and_reports
|
||||
.into_iter()
|
||||
.map(|(next_job, report)| async move {
|
||||
load_job(next_job, report, job_ctx)
|
||||
.await
|
||||
.map(|maybe_loaded_next_job| {
|
||||
maybe_loaded_next_job.map(|(next_dyn_job, next_tasks)| {
|
||||
assert!(
|
||||
next_tasks.is_none(),
|
||||
"Next jobs must not have tasks as they haven't run yet"
|
||||
);
|
||||
assert!(
|
||||
next_dyn_job.next_jobs().is_empty(),
|
||||
"Next jobs must not have next jobs"
|
||||
);
|
||||
next_dyn_job
|
||||
})
|
||||
})
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
.try_join()
|
||||
.await
|
||||
.map(|maybe_next_dyn_jobs| {
|
||||
dyn_job.set_next_jobs(maybe_next_dyn_jobs.into_iter().flatten().collect());
|
||||
(location_id, dyn_job, tasks)
|
||||
})
|
||||
})
|
||||
})
|
||||
.collect::<Result<Vec<_>, JobSystemError>>()?
|
||||
.try_join()
|
||||
.await
|
||||
}
|
||||
|
||||
macro_rules! match_deserialize_job {
|
||||
($stored_job:ident, $report:ident, $job_ctx:ident, $ctx_type:ty, [$($job_type:ty),+ $(,)?]) => {{
|
||||
let StoredJob {
|
||||
id,
|
||||
name,
|
||||
serialized_job,
|
||||
} = $stored_job;
|
||||
|
||||
|
||||
match name {
|
||||
$(<$job_type as Job>::NAME => <$job_type as SerializableJob>::deserialize(
|
||||
&serialized_job,
|
||||
$job_ctx,
|
||||
).await
|
||||
.map(|maybe_job| maybe_job.map(|(job, tasks)| -> (
|
||||
Box<dyn DynJob<$ctx_type>>,
|
||||
Option<SerializedTasks>
|
||||
) {
|
||||
(
|
||||
Box::new(JobHolder {
|
||||
id,
|
||||
job,
|
||||
report: $report,
|
||||
next_jobs: VecDeque::new(),
|
||||
_ctx: PhantomData,
|
||||
}),
|
||||
tasks,
|
||||
)
|
||||
}
|
||||
))
|
||||
.map_err(Into::into),)+
|
||||
}
|
||||
}};
|
||||
}
|
||||
|
||||
async fn load_job<Ctx: JobContext>(
|
||||
stored_job: StoredJob,
|
||||
report: Report,
|
||||
job_ctx: &Ctx,
|
||||
) -> Result<Option<(Box<dyn DynJob<Ctx>>, Option<SerializedTasks>)>, JobSystemError> {
|
||||
match_deserialize_job!(
|
||||
stored_job,
|
||||
report,
|
||||
job_ctx,
|
||||
Ctx,
|
||||
[
|
||||
IndexerJob,
|
||||
// TODO: Add more jobs here
|
||||
// e.g.: FileIdentifierJob, MediaProcessorJob, etc.,
|
||||
]
|
||||
)
|
||||
}
|
16
core/crates/heavy-lifting/src/job_system/utils.rs
Normal file
16
core/crates/heavy-lifting/src/job_system/utils.rs
Normal file
|
@ -0,0 +1,16 @@
|
|||
use crate::Error;
|
||||
|
||||
use sd_task_system::TaskHandle;
|
||||
|
||||
use futures_concurrency::future::Join;
|
||||
|
||||
pub async fn cancel_pending_tasks(
|
||||
pending_tasks: impl IntoIterator<Item = &TaskHandle<Error>> + Send,
|
||||
) {
|
||||
pending_tasks
|
||||
.into_iter()
|
||||
.map(TaskHandle::cancel)
|
||||
.collect::<Vec<_>>()
|
||||
.join()
|
||||
.await;
|
||||
}
|
71
core/crates/heavy-lifting/src/lib.rs
Normal file
71
core/crates/heavy-lifting/src/lib.rs
Normal file
|
@ -0,0 +1,71 @@
|
|||
#![warn(
|
||||
clippy::all,
|
||||
clippy::pedantic,
|
||||
clippy::correctness,
|
||||
clippy::perf,
|
||||
clippy::style,
|
||||
clippy::suspicious,
|
||||
clippy::complexity,
|
||||
clippy::nursery,
|
||||
clippy::unwrap_used,
|
||||
unused_qualifications,
|
||||
rust_2018_idioms,
|
||||
trivial_casts,
|
||||
trivial_numeric_casts,
|
||||
unused_allocation,
|
||||
clippy::unnecessary_cast,
|
||||
clippy::cast_lossless,
|
||||
clippy::cast_possible_truncation,
|
||||
clippy::cast_possible_wrap,
|
||||
clippy::cast_precision_loss,
|
||||
clippy::cast_sign_loss,
|
||||
clippy::dbg_macro,
|
||||
clippy::deprecated_cfg_attr,
|
||||
clippy::separated_literal_suffix,
|
||||
deprecated
|
||||
)]
|
||||
#![forbid(deprecated_in_future)]
|
||||
#![allow(clippy::missing_errors_doc, clippy::module_name_repetitions)]
|
||||
|
||||
use sd_task_system::TaskSystemError;
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
use specta::Type;
|
||||
use thiserror::Error;
|
||||
|
||||
pub mod indexer;
|
||||
pub mod job_system;
|
||||
|
||||
use indexer::{IndexerError, NonCriticalIndexerError};
|
||||
|
||||
pub use job_system::{
|
||||
job::{IntoJob, JobBuilder, JobContext, JobName, JobOutput, JobOutputData, ProgressUpdate},
|
||||
JobId, JobSystem,
|
||||
};
|
||||
|
||||
#[derive(Error, Debug)]
|
||||
pub enum Error {
|
||||
#[error(transparent)]
|
||||
Indexer(#[from] IndexerError),
|
||||
|
||||
#[error(transparent)]
|
||||
TaskSystem(#[from] TaskSystemError),
|
||||
}
|
||||
|
||||
impl From<Error> for rspc::Error {
|
||||
fn from(e: Error) -> Self {
|
||||
match e {
|
||||
Error::Indexer(e) => e.into(),
|
||||
Error::TaskSystem(e) => {
|
||||
Self::with_cause(rspc::ErrorCode::InternalServerError, e.to_string(), e)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(thiserror::Error, Debug, Serialize, Deserialize, Type)]
|
||||
pub enum NonCriticalJobError {
|
||||
// TODO: Add variants as needed
|
||||
#[error(transparent)]
|
||||
Indexer(#[from] NonCriticalIndexerError),
|
||||
}
|
30
core/crates/indexer-rules/Cargo.toml
Normal file
30
core/crates/indexer-rules/Cargo.toml
Normal file
|
@ -0,0 +1,30 @@
|
|||
[package]
|
||||
name = "sd-core-indexer-rules"
|
||||
version = "0.1.0"
|
||||
authors = ["Ericson Soares <ericson@spacedrive.com>"]
|
||||
license = { workspace = true }
|
||||
repository = { workspace = true }
|
||||
edition = { workspace = true }
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
# Spacedrive Sub-crates
|
||||
sd-prisma = { path = "../../../crates/prisma" }
|
||||
sd-utils = { path = "../../../crates/utils" }
|
||||
|
||||
chrono = { workspace = true }
|
||||
futures-concurrency = { workspace = true }
|
||||
globset = { workspace = true, features = ["serde1"] }
|
||||
prisma-client-rust = { workspace = true }
|
||||
rmp-serde = { workspace = true }
|
||||
rspc = { workspace = true }
|
||||
serde = { workspace = true, features = ["derive"] }
|
||||
specta = { workspace = true }
|
||||
thiserror = { workspace = true }
|
||||
tokio = { workspace = true, features = ["fs"] }
|
||||
tracing = { workspace = true }
|
||||
uuid = { workspace = true, features = ["v4", "serde"] }
|
||||
|
||||
[dev-dependencies]
|
||||
tempfile = { workspace = true }
|
|
@ -1,30 +1,60 @@
|
|||
use crate::library::Library;
|
||||
#![warn(
|
||||
clippy::all,
|
||||
clippy::pedantic,
|
||||
clippy::correctness,
|
||||
clippy::perf,
|
||||
clippy::style,
|
||||
clippy::suspicious,
|
||||
clippy::complexity,
|
||||
clippy::nursery,
|
||||
clippy::unwrap_used,
|
||||
unused_qualifications,
|
||||
rust_2018_idioms,
|
||||
trivial_casts,
|
||||
trivial_numeric_casts,
|
||||
unused_allocation,
|
||||
clippy::unnecessary_cast,
|
||||
clippy::cast_lossless,
|
||||
clippy::cast_possible_truncation,
|
||||
clippy::cast_possible_wrap,
|
||||
clippy::cast_precision_loss,
|
||||
clippy::cast_sign_loss,
|
||||
clippy::dbg_macro,
|
||||
clippy::deprecated_cfg_attr,
|
||||
clippy::separated_literal_suffix,
|
||||
deprecated
|
||||
)]
|
||||
#![forbid(deprecated_in_future)]
|
||||
#![allow(clippy::missing_errors_doc)]
|
||||
|
||||
use sd_prisma::prisma::indexer_rule;
|
||||
use sd_prisma::prisma::{indexer_rule, PrismaClient};
|
||||
use sd_utils::{
|
||||
db::{maybe_missing, MissingFieldError},
|
||||
error::{FileIOError, NonUtf8PathError},
|
||||
};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use std::{
|
||||
collections::{HashMap, HashSet},
|
||||
marker::PhantomData,
|
||||
fs::Metadata,
|
||||
path::Path,
|
||||
sync::Arc,
|
||||
};
|
||||
|
||||
use chrono::{DateTime, Utc};
|
||||
use futures::future::try_join_all;
|
||||
use futures_concurrency::future::TryJoin;
|
||||
use globset::{Glob, GlobSet, GlobSetBuilder};
|
||||
use rmp_serde::{decode, encode};
|
||||
use rspc::ErrorCode;
|
||||
use serde::{de, ser, Deserialize, Serialize};
|
||||
|
||||
use specta::Type;
|
||||
use thiserror::Error;
|
||||
use tokio::fs;
|
||||
use tokio::{fs, sync::RwLock};
|
||||
use tracing::debug;
|
||||
use uuid::Uuid;
|
||||
|
||||
pub mod seed;
|
||||
mod serde_impl;
|
||||
|
||||
#[derive(Error, Debug)]
|
||||
pub enum IndexerRuleError {
|
||||
|
@ -57,10 +87,10 @@ impl From<IndexerRuleError> for rspc::Error {
|
|||
IndexerRuleError::InvalidRuleKindInt(_)
|
||||
| IndexerRuleError::Glob(_)
|
||||
| IndexerRuleError::NonUtf8Path(_) => {
|
||||
rspc::Error::with_cause(ErrorCode::BadRequest, err.to_string(), err)
|
||||
Self::with_cause(ErrorCode::BadRequest, err.to_string(), err)
|
||||
}
|
||||
|
||||
_ => rspc::Error::with_cause(ErrorCode::InternalServerError, err.to_string(), err),
|
||||
_ => Self::with_cause(ErrorCode::InternalServerError, err.to_string(), err),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -83,8 +113,10 @@ pub struct IndexerRuleCreateArgs {
|
|||
impl IndexerRuleCreateArgs {
|
||||
pub async fn create(
|
||||
self,
|
||||
library: &Library,
|
||||
db: &PrismaClient,
|
||||
) -> Result<Option<indexer_rule::Data>, IndexerRuleError> {
|
||||
use indexer_rule::{date_created, date_modified, name, rules_per_kind};
|
||||
|
||||
debug!(
|
||||
"{} a new indexer rule (name = {}, params = {:?})",
|
||||
if self.dry_run {
|
||||
|
@ -127,12 +159,8 @@ impl IndexerRuleCreateArgs {
|
|||
|
||||
let date_created = Utc::now();
|
||||
|
||||
use indexer_rule::*;
|
||||
|
||||
Ok(Some(
|
||||
library
|
||||
.db
|
||||
.indexer_rule()
|
||||
db.indexer_rule()
|
||||
.create(
|
||||
sd_utils::uuid_to_bytes(generate_pub_id()),
|
||||
vec![
|
||||
|
@ -159,6 +187,7 @@ pub enum RuleKind {
|
|||
}
|
||||
|
||||
impl RuleKind {
|
||||
#[must_use]
|
||||
pub const fn variant_count() -> usize {
|
||||
// TODO: Use https://doc.rust-lang.org/std/mem/fn.variant_count.html if it ever gets stabilized
|
||||
4
|
||||
|
@ -168,9 +197,10 @@ impl RuleKind {
|
|||
/// `ParametersPerKind` is a mapping from `RuleKind` to the parameters required for each kind of rule.
|
||||
/// In case of doubt about globs, consult <https://docs.rs/globset/latest/globset/#syntax>
|
||||
///
|
||||
/// We store directly globs in the database, serialized using rmp_serde.
|
||||
/// We store directly globs in the database, serialized using [rmp_serde](https://docs.rs/rmp-serde).
|
||||
///
|
||||
/// In case of `ParametersPerKind::AcceptIfChildrenDirectoriesArePresent` or `ParametersPerKind::RejectIfChildrenDirectoriesArePresent`
|
||||
/// In case of `ParametersPerKind::AcceptIfChildrenDirectoriesArePresent` or
|
||||
/// `ParametersPerKind::RejectIfChildrenDirectoriesArePresent`
|
||||
/// first we change the data structure to a vector, then we serialize it.
|
||||
#[derive(Debug)]
|
||||
pub enum RulePerKind {
|
||||
|
@ -219,232 +249,67 @@ impl RulePerKind {
|
|||
}
|
||||
}
|
||||
|
||||
/// We're implementing `Serialize` by hand as `GlobSet`s aren't serializable, so we ignore them on
|
||||
/// serialization
|
||||
impl Serialize for RulePerKind {
|
||||
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
|
||||
where
|
||||
S: ser::Serializer,
|
||||
{
|
||||
match *self {
|
||||
RulePerKind::AcceptFilesByGlob(ref globs, ref _glob_set) => serializer
|
||||
.serialize_newtype_variant("ParametersPerKind", 0, "AcceptFilesByGlob", globs),
|
||||
RulePerKind::RejectFilesByGlob(ref globs, ref _glob_set) => serializer
|
||||
.serialize_newtype_variant("ParametersPerKind", 1, "RejectFilesByGlob", globs),
|
||||
RulePerKind::AcceptIfChildrenDirectoriesArePresent(ref children) => serializer
|
||||
.serialize_newtype_variant(
|
||||
"ParametersPerKind",
|
||||
2,
|
||||
"AcceptIfChildrenDirectoriesArePresent",
|
||||
children,
|
||||
),
|
||||
RulePerKind::RejectIfChildrenDirectoriesArePresent(ref children) => serializer
|
||||
.serialize_newtype_variant(
|
||||
"ParametersPerKind",
|
||||
3,
|
||||
"RejectIfChildrenDirectoriesArePresent",
|
||||
children,
|
||||
),
|
||||
}
|
||||
}
|
||||
pub trait MetadataForIndexerRules: Send + Sync + 'static {
|
||||
fn is_dir(&self) -> bool;
|
||||
}
|
||||
|
||||
impl<'de> Deserialize<'de> for RulePerKind {
|
||||
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
|
||||
where
|
||||
D: de::Deserializer<'de>,
|
||||
{
|
||||
const VARIANTS: &[&str] = &[
|
||||
"AcceptFilesByGlob",
|
||||
"RejectFilesByGlob",
|
||||
"AcceptIfChildrenDirectoriesArePresent",
|
||||
"RejectIfChildrenDirectoriesArePresent",
|
||||
];
|
||||
|
||||
enum Fields {
|
||||
AcceptFilesByGlob,
|
||||
RejectFilesByGlob,
|
||||
AcceptIfChildrenDirectoriesArePresent,
|
||||
RejectIfChildrenDirectoriesArePresent,
|
||||
}
|
||||
|
||||
struct FieldsVisitor;
|
||||
|
||||
impl<'de> de::Visitor<'de> for FieldsVisitor {
|
||||
type Value = Fields;
|
||||
|
||||
fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
formatter.write_str(
|
||||
"`AcceptFilesByGlob` \
|
||||
or `RejectFilesByGlob` \
|
||||
or `AcceptIfChildrenDirectoriesArePresent` \
|
||||
or `RejectIfChildrenDirectoriesArePresent`",
|
||||
)
|
||||
}
|
||||
|
||||
fn visit_u64<E>(self, value: u64) -> Result<Self::Value, E>
|
||||
where
|
||||
E: de::Error,
|
||||
{
|
||||
match value {
|
||||
0 => Ok(Fields::AcceptFilesByGlob),
|
||||
1 => Ok(Fields::RejectFilesByGlob),
|
||||
2 => Ok(Fields::AcceptIfChildrenDirectoriesArePresent),
|
||||
3 => Ok(Fields::RejectIfChildrenDirectoriesArePresent),
|
||||
_ => Err(de::Error::invalid_value(
|
||||
de::Unexpected::Unsigned(value),
|
||||
&"variant index 0 <= i < 3",
|
||||
)),
|
||||
}
|
||||
}
|
||||
fn visit_str<E>(self, value: &str) -> Result<Self::Value, E>
|
||||
where
|
||||
E: de::Error,
|
||||
{
|
||||
match value {
|
||||
"AcceptFilesByGlob" => Ok(Fields::AcceptFilesByGlob),
|
||||
"RejectFilesByGlob" => Ok(Fields::RejectFilesByGlob),
|
||||
"AcceptIfChildrenDirectoriesArePresent" => {
|
||||
Ok(Fields::AcceptIfChildrenDirectoriesArePresent)
|
||||
}
|
||||
"RejectIfChildrenDirectoriesArePresent" => {
|
||||
Ok(Fields::RejectIfChildrenDirectoriesArePresent)
|
||||
}
|
||||
_ => Err(de::Error::unknown_variant(value, VARIANTS)),
|
||||
}
|
||||
}
|
||||
fn visit_bytes<E>(self, bytes: &[u8]) -> Result<Self::Value, E>
|
||||
where
|
||||
E: de::Error,
|
||||
{
|
||||
match bytes {
|
||||
b"AcceptFilesByGlob" => Ok(Fields::AcceptFilesByGlob),
|
||||
b"RejectFilesByGlob" => Ok(Fields::RejectFilesByGlob),
|
||||
b"AcceptIfChildrenDirectoriesArePresent" => {
|
||||
Ok(Fields::AcceptIfChildrenDirectoriesArePresent)
|
||||
}
|
||||
b"RejectIfChildrenDirectoriesArePresent" => {
|
||||
Ok(Fields::RejectIfChildrenDirectoriesArePresent)
|
||||
}
|
||||
_ => Err(de::Error::unknown_variant(
|
||||
&String::from_utf8_lossy(bytes),
|
||||
VARIANTS,
|
||||
)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'de> Deserialize<'de> for Fields {
|
||||
#[inline]
|
||||
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
|
||||
where
|
||||
D: de::Deserializer<'de>,
|
||||
{
|
||||
deserializer.deserialize_identifier(FieldsVisitor)
|
||||
}
|
||||
}
|
||||
|
||||
struct ParametersPerKindVisitor<'de> {
|
||||
marker: PhantomData<RulePerKind>,
|
||||
lifetime: PhantomData<&'de ()>,
|
||||
}
|
||||
|
||||
impl<'de> de::Visitor<'de> for ParametersPerKindVisitor<'de> {
|
||||
type Value = RulePerKind;
|
||||
|
||||
fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
formatter.write_str("enum ParametersPerKind")
|
||||
}
|
||||
|
||||
fn visit_enum<PPK>(self, data: PPK) -> Result<Self::Value, PPK::Error>
|
||||
where
|
||||
PPK: de::EnumAccess<'de>,
|
||||
{
|
||||
use de::Error;
|
||||
|
||||
de::EnumAccess::variant(data).and_then(|value| match value {
|
||||
(Fields::AcceptFilesByGlob, accept_files_by_glob) => {
|
||||
de::VariantAccess::newtype_variant::<Vec<Glob>>(accept_files_by_glob)
|
||||
.and_then(|globs| {
|
||||
globs
|
||||
.iter()
|
||||
.fold(&mut GlobSetBuilder::new(), |builder, glob| {
|
||||
builder.add(glob.to_owned())
|
||||
})
|
||||
.build()
|
||||
.map_or_else(
|
||||
|e| Err(PPK::Error::custom(e)),
|
||||
|glob_set| {
|
||||
Ok(Self::Value::AcceptFilesByGlob(globs, glob_set))
|
||||
},
|
||||
)
|
||||
})
|
||||
}
|
||||
(Fields::RejectFilesByGlob, reject_files_by_glob) => {
|
||||
de::VariantAccess::newtype_variant::<Vec<Glob>>(reject_files_by_glob)
|
||||
.and_then(|globs| {
|
||||
globs
|
||||
.iter()
|
||||
.fold(&mut GlobSetBuilder::new(), |builder, glob| {
|
||||
builder.add(glob.to_owned())
|
||||
})
|
||||
.build()
|
||||
.map_or_else(
|
||||
|e| Err(PPK::Error::custom(e)),
|
||||
|glob_set| {
|
||||
Ok(Self::Value::RejectFilesByGlob(globs, glob_set))
|
||||
},
|
||||
)
|
||||
})
|
||||
}
|
||||
(
|
||||
Fields::AcceptIfChildrenDirectoriesArePresent,
|
||||
accept_if_children_directories_are_present,
|
||||
) => de::VariantAccess::newtype_variant::<HashSet<String>>(
|
||||
accept_if_children_directories_are_present,
|
||||
)
|
||||
.map(Self::Value::AcceptIfChildrenDirectoriesArePresent),
|
||||
(
|
||||
Fields::RejectIfChildrenDirectoriesArePresent,
|
||||
reject_if_children_directories_are_present,
|
||||
) => de::VariantAccess::newtype_variant::<HashSet<String>>(
|
||||
reject_if_children_directories_are_present,
|
||||
)
|
||||
.map(Self::Value::RejectIfChildrenDirectoriesArePresent),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
deserializer.deserialize_enum(
|
||||
"ParametersPerKind",
|
||||
VARIANTS,
|
||||
ParametersPerKindVisitor {
|
||||
marker: PhantomData::<RulePerKind>,
|
||||
lifetime: PhantomData,
|
||||
},
|
||||
)
|
||||
impl MetadataForIndexerRules for Metadata {
|
||||
fn is_dir(&self) -> bool {
|
||||
self.is_dir()
|
||||
}
|
||||
}
|
||||
|
||||
impl RulePerKind {
|
||||
async fn apply(&self, source: impl AsRef<Path>) -> Result<(RuleKind, bool), IndexerRuleError> {
|
||||
#[deprecated]
|
||||
async fn apply(
|
||||
&self,
|
||||
source: impl AsRef<Path> + Send,
|
||||
) -> Result<(RuleKind, bool), IndexerRuleError> {
|
||||
match self {
|
||||
RulePerKind::AcceptIfChildrenDirectoriesArePresent(children) => {
|
||||
Self::AcceptIfChildrenDirectoriesArePresent(children) => {
|
||||
accept_dir_for_its_children(source, children)
|
||||
.await
|
||||
.map(|accepted| (RuleKind::AcceptIfChildrenDirectoriesArePresent, accepted))
|
||||
}
|
||||
RulePerKind::RejectIfChildrenDirectoriesArePresent(children) => {
|
||||
Self::RejectIfChildrenDirectoriesArePresent(children) => {
|
||||
reject_dir_for_its_children(source, children)
|
||||
.await
|
||||
.map(|rejected| (RuleKind::RejectIfChildrenDirectoriesArePresent, rejected))
|
||||
}
|
||||
|
||||
RulePerKind::AcceptFilesByGlob(_globs, accept_glob_set) => Ok((
|
||||
Self::AcceptFilesByGlob(_globs, accept_glob_set) => Ok((
|
||||
RuleKind::AcceptFilesByGlob,
|
||||
accept_by_glob(source, accept_glob_set),
|
||||
)),
|
||||
RulePerKind::RejectFilesByGlob(_globs, reject_glob_set) => Ok((
|
||||
Self::RejectFilesByGlob(_globs, reject_glob_set) => Ok((
|
||||
RuleKind::RejectFilesByGlob,
|
||||
reject_by_glob(source, reject_glob_set),
|
||||
)),
|
||||
}
|
||||
}
|
||||
|
||||
async fn apply_with_metadata(
|
||||
&self,
|
||||
source: impl AsRef<Path> + Send,
|
||||
metadata: &impl MetadataForIndexerRules,
|
||||
) -> Result<(RuleKind, bool), IndexerRuleError> {
|
||||
match self {
|
||||
Self::AcceptIfChildrenDirectoriesArePresent(children) => {
|
||||
accept_dir_for_its_children_with_metadata(source, metadata, children)
|
||||
.await
|
||||
.map(|accepted| (RuleKind::AcceptIfChildrenDirectoriesArePresent, accepted))
|
||||
}
|
||||
Self::RejectIfChildrenDirectoriesArePresent(children) => {
|
||||
reject_dir_for_its_children_with_metadata(source, metadata, children)
|
||||
.await
|
||||
.map(|rejected| (RuleKind::RejectIfChildrenDirectoriesArePresent, rejected))
|
||||
}
|
||||
|
||||
Self::AcceptFilesByGlob(_globs, accept_glob_set) => Ok((
|
||||
RuleKind::AcceptFilesByGlob,
|
||||
accept_by_glob(source, accept_glob_set),
|
||||
)),
|
||||
Self::RejectFilesByGlob(_globs, reject_glob_set) => Ok((
|
||||
RuleKind::RejectFilesByGlob,
|
||||
reject_by_glob(source, reject_glob_set),
|
||||
)),
|
||||
|
@ -463,18 +328,50 @@ pub struct IndexerRule {
|
|||
}
|
||||
|
||||
impl IndexerRule {
|
||||
#[deprecated]
|
||||
pub async fn apply(
|
||||
&self,
|
||||
source: impl AsRef<Path>,
|
||||
source: impl AsRef<Path> + Send,
|
||||
) -> Result<Vec<(RuleKind, bool)>, IndexerRuleError> {
|
||||
try_join_all(self.rules.iter().map(|rule| rule.apply(source.as_ref()))).await
|
||||
self.rules
|
||||
.iter()
|
||||
.map(|rule| rule.apply(source.as_ref()))
|
||||
.collect::<Vec<_>>()
|
||||
.try_join()
|
||||
.await
|
||||
}
|
||||
|
||||
pub async fn apply_with_metadata(
|
||||
&self,
|
||||
source: impl AsRef<Path> + Send,
|
||||
metadata: &impl MetadataForIndexerRules,
|
||||
) -> Result<Vec<(RuleKind, bool)>, IndexerRuleError> {
|
||||
async fn inner(
|
||||
rules: &[RulePerKind],
|
||||
source: &Path,
|
||||
metadata: &impl MetadataForIndexerRules,
|
||||
) -> Result<Vec<(RuleKind, bool)>, IndexerRuleError> {
|
||||
rules
|
||||
.iter()
|
||||
.map(|rule| rule.apply_with_metadata(source, metadata))
|
||||
.collect::<Vec<_>>()
|
||||
.try_join()
|
||||
.await
|
||||
}
|
||||
|
||||
inner(&self.rules, source.as_ref(), metadata).await
|
||||
}
|
||||
|
||||
#[deprecated]
|
||||
pub async fn apply_all(
|
||||
rules: &[IndexerRule],
|
||||
source: impl AsRef<Path>,
|
||||
rules: &[Self],
|
||||
source: impl AsRef<Path> + Send,
|
||||
) -> Result<HashMap<RuleKind, Vec<bool>>, IndexerRuleError> {
|
||||
try_join_all(rules.iter().map(|rule| rule.apply(source.as_ref())))
|
||||
rules
|
||||
.iter()
|
||||
.map(|rule| rule.apply(source.as_ref()))
|
||||
.collect::<Vec<_>>()
|
||||
.try_join()
|
||||
.await
|
||||
.map(|results| {
|
||||
results.into_iter().flatten().fold(
|
||||
|
@ -488,6 +385,59 @@ impl IndexerRule {
|
|||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Default)]
|
||||
pub struct IndexerRuler {
|
||||
// TODO(fogodev): Use this RwLock later to acquire new rules while applying rules, like from a .gitignore file
|
||||
rules: Arc<RwLock<Vec<IndexerRule>>>,
|
||||
}
|
||||
|
||||
impl IndexerRuler {
|
||||
#[must_use]
|
||||
pub fn new(rules: Vec<IndexerRule>) -> Self {
|
||||
Self {
|
||||
rules: Arc::new(RwLock::new(rules)),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn serialize(&self) -> Result<Vec<u8>, rmp_serde::encode::Error> {
|
||||
rmp_serde::to_vec_named(&*self.rules.read().await)
|
||||
}
|
||||
|
||||
pub fn deserialize(data: &[u8]) -> Result<Self, rmp_serde::decode::Error> {
|
||||
rmp_serde::from_slice(data).map(Self::new)
|
||||
}
|
||||
|
||||
pub async fn apply_all(
|
||||
&self,
|
||||
source: impl AsRef<Path> + Send,
|
||||
metadata: &impl MetadataForIndexerRules,
|
||||
) -> Result<HashMap<RuleKind, Vec<bool>>, IndexerRuleError> {
|
||||
async fn inner(
|
||||
rules: &[IndexerRule],
|
||||
source: &Path,
|
||||
metadata: &impl MetadataForIndexerRules,
|
||||
) -> Result<HashMap<RuleKind, Vec<bool>>, IndexerRuleError> {
|
||||
rules
|
||||
.iter()
|
||||
.map(|rule| rule.apply_with_metadata(source, metadata))
|
||||
.collect::<Vec<_>>()
|
||||
.try_join()
|
||||
.await
|
||||
.map(|results| {
|
||||
results.into_iter().flatten().fold(
|
||||
HashMap::<_, Vec<_>>::with_capacity(RuleKind::variant_count()),
|
||||
|mut map, (kind, result)| {
|
||||
map.entry(kind).or_default().push(result);
|
||||
map
|
||||
},
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
inner(&self.rules.read().await, source.as_ref(), metadata).await
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<&indexer_rule::Data> for IndexerRule {
|
||||
type Error = IndexerRuleError;
|
||||
|
||||
|
@ -522,8 +472,9 @@ fn reject_by_glob(source: impl AsRef<Path>, reject_glob_set: &GlobSet) -> bool {
|
|||
!accept_by_glob(source.as_ref(), reject_glob_set)
|
||||
}
|
||||
|
||||
#[deprecated]
|
||||
async fn accept_dir_for_its_children(
|
||||
source: impl AsRef<Path>,
|
||||
source: impl AsRef<Path> + Send,
|
||||
children: &HashSet<String>,
|
||||
) -> Result<bool, IndexerRuleError> {
|
||||
let source = source.as_ref();
|
||||
|
@ -566,8 +517,50 @@ async fn accept_dir_for_its_children(
|
|||
Ok(false)
|
||||
}
|
||||
|
||||
async fn accept_dir_for_its_children_with_metadata(
|
||||
source: impl AsRef<Path> + Send,
|
||||
metadata: &impl MetadataForIndexerRules,
|
||||
children: &HashSet<String>,
|
||||
) -> Result<bool, IndexerRuleError> {
|
||||
let source = source.as_ref();
|
||||
|
||||
// FIXME(fogodev): Just check for io::ErrorKind::NotADirectory error instead (feature = "io_error_more", issue = "86442")
|
||||
if !metadata.is_dir() {
|
||||
return Ok(false);
|
||||
}
|
||||
|
||||
let mut read_dir = fs::read_dir(source)
|
||||
.await // TODO: Check NotADirectory error here when available
|
||||
.map_err(|e| IndexerRuleError::AcceptByItsChildrenFileIO(FileIOError::from((source, e))))?;
|
||||
while let Some(entry) = read_dir
|
||||
.next_entry()
|
||||
.await
|
||||
.map_err(|e| IndexerRuleError::AcceptByItsChildrenFileIO(FileIOError::from((source, e))))?
|
||||
{
|
||||
let entry_name = entry
|
||||
.file_name()
|
||||
.to_str()
|
||||
.ok_or_else(|| NonUtf8PathError(entry.path().into()))?
|
||||
.to_string();
|
||||
|
||||
if entry
|
||||
.metadata()
|
||||
.await
|
||||
.map_err(|e| {
|
||||
IndexerRuleError::AcceptByItsChildrenFileIO(FileIOError::from((source, e)))
|
||||
})?
|
||||
.is_dir() && children.contains(&entry_name)
|
||||
{
|
||||
return Ok(true);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(false)
|
||||
}
|
||||
|
||||
#[deprecated]
|
||||
async fn reject_dir_for_its_children(
|
||||
source: impl AsRef<Path>,
|
||||
source: impl AsRef<Path> + Send,
|
||||
children: &HashSet<String>,
|
||||
) -> Result<bool, IndexerRuleError> {
|
||||
let source = source.as_ref();
|
||||
|
@ -608,6 +601,46 @@ async fn reject_dir_for_its_children(
|
|||
Ok(true)
|
||||
}
|
||||
|
||||
async fn reject_dir_for_its_children_with_metadata(
|
||||
source: impl AsRef<Path> + Send,
|
||||
metadata: &impl MetadataForIndexerRules,
|
||||
children: &HashSet<String>,
|
||||
) -> Result<bool, IndexerRuleError> {
|
||||
let source = source.as_ref();
|
||||
|
||||
// FIXME(fogodev): Just check for io::ErrorKind::NotADirectory error instead (feature = "io_error_more", issue = "86442")
|
||||
if !metadata.is_dir() {
|
||||
return Ok(true);
|
||||
}
|
||||
|
||||
let mut read_dir = fs::read_dir(source)
|
||||
.await // TODO: Check NotADirectory error here when available
|
||||
.map_err(|e| IndexerRuleError::RejectByItsChildrenFileIO(FileIOError::from((source, e))))?;
|
||||
while let Some(entry) = read_dir
|
||||
.next_entry()
|
||||
.await
|
||||
.map_err(|e| IndexerRuleError::RejectByItsChildrenFileIO(FileIOError::from((source, e))))?
|
||||
{
|
||||
if entry
|
||||
.metadata()
|
||||
.await
|
||||
.map_err(|e| {
|
||||
IndexerRuleError::RejectByItsChildrenFileIO(FileIOError::from((source, e)))
|
||||
})?
|
||||
.is_dir() && children.contains(
|
||||
entry
|
||||
.file_name()
|
||||
.to_str()
|
||||
.ok_or_else(|| NonUtf8PathError(entry.path().into()))?,
|
||||
) {
|
||||
return Ok(false);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(true)
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn generate_pub_id() -> Uuid {
|
||||
loop {
|
||||
let pub_id = Uuid::new_v4();
|
||||
|
@ -624,6 +657,7 @@ mod tests {
|
|||
use tempfile::tempdir;
|
||||
|
||||
impl IndexerRule {
|
||||
#[must_use]
|
||||
pub fn new(name: String, default: bool, rules: Vec<RulePerKind>) -> Self {
|
||||
Self {
|
||||
id: None,
|
||||
|
@ -636,7 +670,7 @@ mod tests {
|
|||
}
|
||||
}
|
||||
|
||||
async fn check_rule(indexer_rule: &IndexerRule, path: impl AsRef<Path>) -> bool {
|
||||
async fn check_rule(indexer_rule: &IndexerRule, path: impl AsRef<Path> + Send) -> bool {
|
||||
indexer_rule
|
||||
.apply(path)
|
||||
.await
|
||||
|
@ -697,6 +731,7 @@ mod tests {
|
|||
}
|
||||
|
||||
#[tokio::test]
|
||||
#[allow(clippy::similar_names)]
|
||||
async fn test_only_photos() {
|
||||
let text = Path::new("file.txt");
|
||||
let png = Path::new("photo1.png");
|
||||
|
@ -748,7 +783,7 @@ mod tests {
|
|||
fs::create_dir(project2.join(".git")).await.unwrap();
|
||||
fs::create_dir(project2.join("books")).await.unwrap();
|
||||
|
||||
let childrens = [".git".to_string()].into_iter().collect::<HashSet<_>>();
|
||||
let childrens = HashSet::from([".git".to_string()]);
|
||||
|
||||
let rule = IndexerRule::new(
|
||||
"git projects".to_string(),
|
||||
|
@ -779,7 +814,7 @@ mod tests {
|
|||
fs::create_dir(project2.join(".git")).await.unwrap();
|
||||
fs::create_dir(project2.join("books")).await.unwrap();
|
||||
|
||||
let childrens = [".git".to_string()].into_iter().collect::<HashSet<_>>();
|
||||
let childrens = HashSet::from([".git".to_string()]);
|
||||
|
||||
let rule = IndexerRule::new(
|
||||
"git projects".to_string(),
|
||||
|
@ -798,21 +833,23 @@ mod tests {
|
|||
fn eq(&self, other: &Self) -> bool {
|
||||
match (self, other) {
|
||||
(
|
||||
RulePerKind::AcceptFilesByGlob(self_globs, _),
|
||||
RulePerKind::AcceptFilesByGlob(other_globs, _),
|
||||
Self::AcceptFilesByGlob(self_globs, _),
|
||||
Self::AcceptFilesByGlob(other_globs, _),
|
||||
)
|
||||
| (
|
||||
Self::RejectFilesByGlob(self_globs, _),
|
||||
Self::RejectFilesByGlob(other_globs, _),
|
||||
) => self_globs == other_globs,
|
||||
|
||||
(
|
||||
RulePerKind::RejectFilesByGlob(self_globs, _),
|
||||
RulePerKind::RejectFilesByGlob(other_globs, _),
|
||||
) => self_globs == other_globs,
|
||||
(
|
||||
RulePerKind::AcceptIfChildrenDirectoriesArePresent(self_childrens),
|
||||
RulePerKind::AcceptIfChildrenDirectoriesArePresent(other_childrens),
|
||||
) => self_childrens == other_childrens,
|
||||
(
|
||||
RulePerKind::RejectIfChildrenDirectoriesArePresent(self_childrens),
|
||||
RulePerKind::RejectIfChildrenDirectoriesArePresent(other_childrens),
|
||||
Self::AcceptIfChildrenDirectoriesArePresent(self_childrens),
|
||||
Self::AcceptIfChildrenDirectoriesArePresent(other_childrens),
|
||||
)
|
||||
| (
|
||||
Self::RejectIfChildrenDirectoriesArePresent(self_childrens),
|
||||
Self::RejectIfChildrenDirectoriesArePresent(other_childrens),
|
||||
) => self_childrens == other_childrens,
|
||||
|
||||
_ => false,
|
||||
}
|
||||
}
|
|
@ -1,14 +1,11 @@
|
|||
use crate::{
|
||||
library::Library,
|
||||
location::indexer::rules::{IndexerRule, IndexerRuleError, RulePerKind},
|
||||
};
|
||||
|
||||
use sd_prisma::prisma::indexer_rule;
|
||||
use sd_prisma::prisma::{indexer_rule, PrismaClient};
|
||||
|
||||
use chrono::Utc;
|
||||
use thiserror::Error;
|
||||
use uuid::Uuid;
|
||||
|
||||
use super::{IndexerRule, IndexerRuleError, RulePerKind};
|
||||
|
||||
#[derive(Error, Debug)]
|
||||
pub enum SeederError {
|
||||
#[error("Failed to run indexer rules seeder: {0}")]
|
||||
|
@ -37,7 +34,9 @@ impl From<SystemIndexerRule> for IndexerRule {
|
|||
}
|
||||
|
||||
/// Seeds system indexer rules into a new or existing library,
|
||||
pub async fn new_or_existing_library(library: &Library) -> Result<(), SeederError> {
|
||||
pub async fn new_or_existing_library(db: &PrismaClient) -> Result<(), SeederError> {
|
||||
use indexer_rule::{date_created, date_modified, default, name, rules_per_kind};
|
||||
|
||||
// DO NOT REORDER THIS ARRAY!
|
||||
for (i, rule) in [no_os_protected(), no_hidden(), no_git(), only_images()]
|
||||
.into_iter()
|
||||
|
@ -46,8 +45,6 @@ pub async fn new_or_existing_library(library: &Library) -> Result<(), SeederErro
|
|||
let pub_id = sd_utils::uuid_to_bytes(Uuid::from_u128(i as u128));
|
||||
let rules = rmp_serde::to_vec_named(&rule.rules).map_err(IndexerRuleError::from)?;
|
||||
|
||||
use indexer_rule::*;
|
||||
|
||||
let data = vec![
|
||||
name::set(Some(rule.name.to_string())),
|
||||
rules_per_kind::set(Some(rules.clone())),
|
||||
|
@ -56,9 +53,7 @@ pub async fn new_or_existing_library(library: &Library) -> Result<(), SeederErro
|
|||
date_modified::set(Some(Utc::now().into())),
|
||||
];
|
||||
|
||||
library
|
||||
.db
|
||||
.indexer_rule()
|
||||
db.indexer_rule()
|
||||
.upsert(
|
||||
indexer_rule::pub_id::equals(pub_id.clone()),
|
||||
indexer_rule::create(pub_id.clone(), data.clone()),
|
||||
|
@ -71,6 +66,8 @@ pub async fn new_or_existing_library(library: &Library) -> Result<(), SeederErro
|
|||
Ok(())
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
#[allow(clippy::missing_panics_doc)]
|
||||
pub fn no_os_protected() -> SystemIndexerRule {
|
||||
SystemIndexerRule {
|
||||
// TODO: On windows, beside the listed files, any file with the FILE_ATTRIBUTE_SYSTEM should be considered a system file
|
||||
|
@ -105,7 +102,7 @@ pub fn no_os_protected() -> SystemIndexerRule {
|
|||
"C:/Users/*/NTUSER.DAT*",
|
||||
"C:/Users/*/ntuser.dat*",
|
||||
"C:/Users/*/{ntuser.ini,ntuser.dat,NTUSER.DAT}",
|
||||
// User special folders (most of these the user dont even have permission to access)
|
||||
// User special folders (most of these the user don't even have permission to access)
|
||||
"C:/Users/*/{Cookies,AppData,NetHood,Recent,PrintHood,SendTo,Templates,Start Menu,Application Data,Local Settings,My Documents}",
|
||||
// System special folders
|
||||
"C:/{$Recycle.Bin,$WinREAgent,Documents and Settings,Program Files,Program Files (x86),ProgramData,Recovery,PerfLogs,Windows,Windows.old}",
|
||||
|
@ -177,6 +174,8 @@ pub fn no_os_protected() -> SystemIndexerRule {
|
|||
}
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
#[allow(clippy::missing_panics_doc)]
|
||||
pub fn no_hidden() -> SystemIndexerRule {
|
||||
SystemIndexerRule {
|
||||
name: "No Hidden",
|
||||
|
@ -186,6 +185,8 @@ pub fn no_hidden() -> SystemIndexerRule {
|
|||
}
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
#[allow(clippy::missing_panics_doc)]
|
||||
fn no_git() -> SystemIndexerRule {
|
||||
SystemIndexerRule {
|
||||
name: "No Git",
|
||||
|
@ -197,6 +198,8 @@ fn no_git() -> SystemIndexerRule {
|
|||
}
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
#[allow(clippy::missing_panics_doc)]
|
||||
fn only_images() -> SystemIndexerRule {
|
||||
SystemIndexerRule {
|
||||
name: "Only Images",
|
214
core/crates/indexer-rules/src/serde_impl.rs
Normal file
214
core/crates/indexer-rules/src/serde_impl.rs
Normal file
|
@ -0,0 +1,214 @@
|
|||
use std::{collections::HashSet, marker::PhantomData};
|
||||
|
||||
use globset::{Glob, GlobSetBuilder};
|
||||
use serde::{de, ser, Deserialize, Serialize};
|
||||
|
||||
use super::RulePerKind;
|
||||
|
||||
/// We're implementing `Serialize` by hand as `GlobSet`s aren't serializable, so we ignore them on
|
||||
/// serialization
|
||||
impl Serialize for RulePerKind {
|
||||
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
|
||||
where
|
||||
S: ser::Serializer,
|
||||
{
|
||||
match *self {
|
||||
Self::AcceptFilesByGlob(ref globs, ref _glob_set) => serializer
|
||||
.serialize_newtype_variant("ParametersPerKind", 0, "AcceptFilesByGlob", globs),
|
||||
Self::RejectFilesByGlob(ref globs, ref _glob_set) => serializer
|
||||
.serialize_newtype_variant("ParametersPerKind", 1, "RejectFilesByGlob", globs),
|
||||
Self::AcceptIfChildrenDirectoriesArePresent(ref children) => serializer
|
||||
.serialize_newtype_variant(
|
||||
"ParametersPerKind",
|
||||
2,
|
||||
"AcceptIfChildrenDirectoriesArePresent",
|
||||
children,
|
||||
),
|
||||
Self::RejectIfChildrenDirectoriesArePresent(ref children) => serializer
|
||||
.serialize_newtype_variant(
|
||||
"ParametersPerKind",
|
||||
3,
|
||||
"RejectIfChildrenDirectoriesArePresent",
|
||||
children,
|
||||
),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'de> Deserialize<'de> for RulePerKind {
|
||||
#[allow(clippy::too_many_lines)]
|
||||
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
|
||||
where
|
||||
D: de::Deserializer<'de>,
|
||||
{
|
||||
const VARIANTS: &[&str] = &[
|
||||
"AcceptFilesByGlob",
|
||||
"RejectFilesByGlob",
|
||||
"AcceptIfChildrenDirectoriesArePresent",
|
||||
"RejectIfChildrenDirectoriesArePresent",
|
||||
];
|
||||
|
||||
enum Fields {
|
||||
AcceptFilesByGlob,
|
||||
RejectFilesByGlob,
|
||||
AcceptIfChildrenDirectoriesArePresent,
|
||||
RejectIfChildrenDirectoriesArePresent,
|
||||
}
|
||||
|
||||
struct FieldsVisitor;
|
||||
|
||||
impl<'de> de::Visitor<'de> for FieldsVisitor {
|
||||
type Value = Fields;
|
||||
|
||||
fn expecting(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
formatter.write_str(
|
||||
"`AcceptFilesByGlob` \
|
||||
or `RejectFilesByGlob` \
|
||||
or `AcceptIfChildrenDirectoriesArePresent` \
|
||||
or `RejectIfChildrenDirectoriesArePresent`",
|
||||
)
|
||||
}
|
||||
|
||||
fn visit_u64<E>(self, value: u64) -> Result<Self::Value, E>
|
||||
where
|
||||
E: de::Error,
|
||||
{
|
||||
match value {
|
||||
0 => Ok(Fields::AcceptFilesByGlob),
|
||||
1 => Ok(Fields::RejectFilesByGlob),
|
||||
2 => Ok(Fields::AcceptIfChildrenDirectoriesArePresent),
|
||||
3 => Ok(Fields::RejectIfChildrenDirectoriesArePresent),
|
||||
_ => Err(de::Error::invalid_value(
|
||||
de::Unexpected::Unsigned(value),
|
||||
&"variant index 0 <= i < 3",
|
||||
)),
|
||||
}
|
||||
}
|
||||
fn visit_str<E>(self, value: &str) -> Result<Self::Value, E>
|
||||
where
|
||||
E: de::Error,
|
||||
{
|
||||
match value {
|
||||
"AcceptFilesByGlob" => Ok(Fields::AcceptFilesByGlob),
|
||||
"RejectFilesByGlob" => Ok(Fields::RejectFilesByGlob),
|
||||
"AcceptIfChildrenDirectoriesArePresent" => {
|
||||
Ok(Fields::AcceptIfChildrenDirectoriesArePresent)
|
||||
}
|
||||
"RejectIfChildrenDirectoriesArePresent" => {
|
||||
Ok(Fields::RejectIfChildrenDirectoriesArePresent)
|
||||
}
|
||||
_ => Err(de::Error::unknown_variant(value, VARIANTS)),
|
||||
}
|
||||
}
|
||||
fn visit_bytes<E>(self, bytes: &[u8]) -> Result<Self::Value, E>
|
||||
where
|
||||
E: de::Error,
|
||||
{
|
||||
match bytes {
|
||||
b"AcceptFilesByGlob" => Ok(Fields::AcceptFilesByGlob),
|
||||
b"RejectFilesByGlob" => Ok(Fields::RejectFilesByGlob),
|
||||
b"AcceptIfChildrenDirectoriesArePresent" => {
|
||||
Ok(Fields::AcceptIfChildrenDirectoriesArePresent)
|
||||
}
|
||||
b"RejectIfChildrenDirectoriesArePresent" => {
|
||||
Ok(Fields::RejectIfChildrenDirectoriesArePresent)
|
||||
}
|
||||
_ => Err(de::Error::unknown_variant(
|
||||
&String::from_utf8_lossy(bytes),
|
||||
VARIANTS,
|
||||
)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'de> Deserialize<'de> for Fields {
|
||||
#[inline]
|
||||
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
|
||||
where
|
||||
D: de::Deserializer<'de>,
|
||||
{
|
||||
deserializer.deserialize_identifier(FieldsVisitor)
|
||||
}
|
||||
}
|
||||
|
||||
struct ParametersPerKindVisitor<'de> {
|
||||
marker: PhantomData<RulePerKind>,
|
||||
lifetime: PhantomData<&'de ()>,
|
||||
}
|
||||
|
||||
impl<'de> de::Visitor<'de> for ParametersPerKindVisitor<'de> {
|
||||
type Value = RulePerKind;
|
||||
|
||||
fn expecting(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
formatter.write_str("enum ParametersPerKind")
|
||||
}
|
||||
|
||||
fn visit_enum<PPK>(self, data: PPK) -> Result<Self::Value, PPK::Error>
|
||||
where
|
||||
PPK: de::EnumAccess<'de>,
|
||||
{
|
||||
use de::Error;
|
||||
|
||||
de::EnumAccess::variant(data).and_then(|value| match value {
|
||||
(Fields::AcceptFilesByGlob, accept_files_by_glob) => {
|
||||
de::VariantAccess::newtype_variant::<Vec<Glob>>(accept_files_by_glob)
|
||||
.and_then(|globs| {
|
||||
globs
|
||||
.iter()
|
||||
.fold(&mut GlobSetBuilder::new(), |builder, glob| {
|
||||
builder.add(glob.to_owned())
|
||||
})
|
||||
.build()
|
||||
.map_or_else(
|
||||
|e| Err(PPK::Error::custom(e)),
|
||||
|glob_set| {
|
||||
Ok(Self::Value::AcceptFilesByGlob(globs, glob_set))
|
||||
},
|
||||
)
|
||||
})
|
||||
}
|
||||
(Fields::RejectFilesByGlob, reject_files_by_glob) => {
|
||||
de::VariantAccess::newtype_variant::<Vec<Glob>>(reject_files_by_glob)
|
||||
.and_then(|globs| {
|
||||
globs
|
||||
.iter()
|
||||
.fold(&mut GlobSetBuilder::new(), |builder, glob| {
|
||||
builder.add(glob.to_owned())
|
||||
})
|
||||
.build()
|
||||
.map_or_else(
|
||||
|e| Err(PPK::Error::custom(e)),
|
||||
|glob_set| {
|
||||
Ok(Self::Value::RejectFilesByGlob(globs, glob_set))
|
||||
},
|
||||
)
|
||||
})
|
||||
}
|
||||
(
|
||||
Fields::AcceptIfChildrenDirectoriesArePresent,
|
||||
accept_if_children_directories_are_present,
|
||||
) => de::VariantAccess::newtype_variant::<HashSet<String>>(
|
||||
accept_if_children_directories_are_present,
|
||||
)
|
||||
.map(Self::Value::AcceptIfChildrenDirectoriesArePresent),
|
||||
(
|
||||
Fields::RejectIfChildrenDirectoriesArePresent,
|
||||
reject_if_children_directories_are_present,
|
||||
) => de::VariantAccess::newtype_variant::<HashSet<String>>(
|
||||
reject_if_children_directories_are_present,
|
||||
)
|
||||
.map(Self::Value::RejectIfChildrenDirectoriesArePresent),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
deserializer.deserialize_enum(
|
||||
"ParametersPerKind",
|
||||
VARIANTS,
|
||||
ParametersPerKindVisitor {
|
||||
marker: PhantomData::<Self>,
|
||||
lifetime: PhantomData,
|
||||
},
|
||||
)
|
||||
}
|
||||
}
|
16
core/crates/prisma-helpers/Cargo.toml
Normal file
16
core/crates/prisma-helpers/Cargo.toml
Normal file
|
@ -0,0 +1,16 @@
|
|||
[package]
|
||||
name = "sd-core-prisma-helpers"
|
||||
version = "0.1.0"
|
||||
authors = ["Ericson Soares <ericson@spacedrive.com>"]
|
||||
license = { workspace = true }
|
||||
repository = { workspace = true }
|
||||
edition = { workspace = true }
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
# Spacedrive Sub-crates
|
||||
sd-prisma = { path = "../../../crates/prisma" }
|
||||
|
||||
prisma-client-rust = { workspace = true }
|
||||
serde = { workspace = true }
|
226
core/crates/prisma-helpers/src/lib.rs
Normal file
226
core/crates/prisma-helpers/src/lib.rs
Normal file
|
@ -0,0 +1,226 @@
|
|||
#![warn(
|
||||
clippy::all,
|
||||
clippy::pedantic,
|
||||
clippy::correctness,
|
||||
clippy::perf,
|
||||
clippy::style,
|
||||
clippy::suspicious,
|
||||
clippy::complexity,
|
||||
clippy::nursery,
|
||||
clippy::unwrap_used,
|
||||
unused_qualifications,
|
||||
rust_2018_idioms,
|
||||
trivial_casts,
|
||||
trivial_numeric_casts,
|
||||
unused_allocation,
|
||||
clippy::unnecessary_cast,
|
||||
clippy::cast_lossless,
|
||||
clippy::cast_possible_truncation,
|
||||
clippy::cast_possible_wrap,
|
||||
clippy::cast_precision_loss,
|
||||
clippy::cast_sign_loss,
|
||||
clippy::dbg_macro,
|
||||
clippy::deprecated_cfg_attr,
|
||||
clippy::separated_literal_suffix,
|
||||
deprecated
|
||||
)]
|
||||
#![forbid(deprecated_in_future)]
|
||||
#![allow(clippy::missing_errors_doc, clippy::module_name_repetitions)]
|
||||
|
||||
use sd_prisma::prisma::{file_path, job, label, location, object};
|
||||
|
||||
// File Path selectables!
|
||||
file_path::select!(file_path_pub_and_cas_ids { id pub_id cas_id });
|
||||
file_path::select!(file_path_just_pub_id_materialized_path {
|
||||
pub_id
|
||||
materialized_path
|
||||
});
|
||||
file_path::select!(file_path_for_file_identifier {
|
||||
id
|
||||
pub_id
|
||||
materialized_path
|
||||
date_created
|
||||
is_dir
|
||||
name
|
||||
extension
|
||||
object_id
|
||||
});
|
||||
file_path::select!(file_path_for_object_validator {
|
||||
pub_id
|
||||
materialized_path
|
||||
is_dir
|
||||
name
|
||||
extension
|
||||
integrity_checksum
|
||||
});
|
||||
file_path::select!(file_path_for_media_processor {
|
||||
id
|
||||
materialized_path
|
||||
is_dir
|
||||
name
|
||||
extension
|
||||
cas_id
|
||||
object_id
|
||||
});
|
||||
file_path::select!(file_path_to_isolate {
|
||||
location_id
|
||||
materialized_path
|
||||
is_dir
|
||||
name
|
||||
extension
|
||||
});
|
||||
file_path::select!(file_path_to_isolate_with_pub_id {
|
||||
pub_id
|
||||
location_id
|
||||
materialized_path
|
||||
is_dir
|
||||
name
|
||||
extension
|
||||
});
|
||||
file_path::select!(file_path_to_isolate_with_id {
|
||||
id
|
||||
location_id
|
||||
materialized_path
|
||||
is_dir
|
||||
name
|
||||
extension
|
||||
});
|
||||
file_path::select!(file_path_walker {
|
||||
pub_id
|
||||
location_id
|
||||
object_id
|
||||
materialized_path
|
||||
is_dir
|
||||
name
|
||||
extension
|
||||
date_modified
|
||||
inode
|
||||
size_in_bytes_bytes
|
||||
hidden
|
||||
});
|
||||
file_path::select!(file_path_to_handle_custom_uri {
|
||||
pub_id
|
||||
materialized_path
|
||||
is_dir
|
||||
name
|
||||
extension
|
||||
location: select {
|
||||
id
|
||||
path
|
||||
instance: select {
|
||||
identity
|
||||
remote_identity
|
||||
}
|
||||
}
|
||||
});
|
||||
file_path::select!(file_path_to_handle_p2p_serve_file {
|
||||
materialized_path
|
||||
name
|
||||
extension
|
||||
is_dir // For isolated file path
|
||||
location: select {
|
||||
id
|
||||
path
|
||||
}
|
||||
});
|
||||
file_path::select!(file_path_to_full_path {
|
||||
id
|
||||
materialized_path
|
||||
is_dir
|
||||
name
|
||||
extension
|
||||
location: select {
|
||||
id
|
||||
path
|
||||
}
|
||||
});
|
||||
|
||||
// File Path includes!
|
||||
file_path::include!(file_path_with_object { object });
|
||||
|
||||
// Object selectables!
|
||||
object::select!(object_for_file_identifier {
|
||||
pub_id
|
||||
file_paths: select { pub_id cas_id extension is_dir materialized_path name }
|
||||
});
|
||||
|
||||
// Object includes!
|
||||
object::include!(object_with_file_paths { file_paths });
|
||||
|
||||
// Job selectables!
|
||||
job::select!(job_without_data {
|
||||
id
|
||||
name
|
||||
action
|
||||
status
|
||||
parent_id
|
||||
errors_text
|
||||
metadata
|
||||
date_created
|
||||
date_started
|
||||
date_completed
|
||||
task_count
|
||||
completed_task_count
|
||||
date_estimated_completion
|
||||
});
|
||||
|
||||
// Location includes!
|
||||
location::include!(location_with_indexer_rules {
|
||||
indexer_rules: select { indexer_rule }
|
||||
});
|
||||
|
||||
impl From<location_with_indexer_rules::Data> for location::Data {
|
||||
fn from(data: location_with_indexer_rules::Data) -> Self {
|
||||
Self {
|
||||
id: data.id,
|
||||
pub_id: data.pub_id,
|
||||
path: data.path,
|
||||
instance_id: data.instance_id,
|
||||
name: data.name,
|
||||
total_capacity: data.total_capacity,
|
||||
available_capacity: data.available_capacity,
|
||||
is_archived: data.is_archived,
|
||||
size_in_bytes: data.size_in_bytes,
|
||||
generate_preview_media: data.generate_preview_media,
|
||||
sync_preview_media: data.sync_preview_media,
|
||||
hidden: data.hidden,
|
||||
date_created: data.date_created,
|
||||
file_paths: None,
|
||||
indexer_rules: None,
|
||||
instance: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<&location_with_indexer_rules::Data> for location::Data {
|
||||
fn from(data: &location_with_indexer_rules::Data) -> Self {
|
||||
Self {
|
||||
id: data.id,
|
||||
pub_id: data.pub_id.clone(),
|
||||
path: data.path.clone(),
|
||||
instance_id: data.instance_id,
|
||||
name: data.name.clone(),
|
||||
total_capacity: data.total_capacity,
|
||||
available_capacity: data.available_capacity,
|
||||
size_in_bytes: data.size_in_bytes.clone(),
|
||||
is_archived: data.is_archived,
|
||||
generate_preview_media: data.generate_preview_media,
|
||||
sync_preview_media: data.sync_preview_media,
|
||||
hidden: data.hidden,
|
||||
date_created: data.date_created,
|
||||
file_paths: None,
|
||||
indexer_rules: None,
|
||||
instance: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Label includes!
|
||||
label::include!((take: i64) => label_with_objects {
|
||||
label_objects(vec![]).take(take): select {
|
||||
object: select {
|
||||
id
|
||||
file_paths(vec![]).take(1)
|
||||
}
|
||||
}
|
||||
});
|
|
@ -7,6 +7,7 @@ edition = "2021"
|
|||
default = []
|
||||
|
||||
[dependencies]
|
||||
# Spacedrive Sub-crates
|
||||
sd-prisma = { path = "../../../crates/prisma" }
|
||||
sd-sync = { path = "../../../crates/sync" }
|
||||
sd-utils = { path = "../../../crates/utils" }
|
||||
|
|
|
@ -7,6 +7,7 @@ use sd_utils::uuid_to_bytes;
|
|||
use std::{
|
||||
cmp::Ordering,
|
||||
collections::HashMap,
|
||||
fmt,
|
||||
ops::Deref,
|
||||
sync::{
|
||||
atomic::{self, AtomicBool},
|
||||
|
@ -25,6 +26,12 @@ pub struct Manager {
|
|||
pub shared: Arc<SharedState>,
|
||||
}
|
||||
|
||||
impl fmt::Debug for Manager {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
f.debug_struct("SyncManager").finish()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(serde::Serialize, serde::Deserialize, Debug, PartialEq, Eq)]
|
||||
pub struct GetOpsArgs {
|
||||
pub clocks: Vec<(Uuid, NTP64)>,
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
mod mock_instance;
|
||||
|
||||
use sd_core_sync::*;
|
||||
|
||||
use sd_prisma::{prisma, prisma_sync};
|
||||
use sd_sync::*;
|
||||
use sd_utils::uuid_to_bytes;
|
||||
|
|
|
@ -403,10 +403,13 @@ model Job {
|
|||
// Enum: sd_core::job::job_manager:JobStatus
|
||||
status Int? // 0 = Queued
|
||||
|
||||
// List of errors, separated by "\n\n" in case of failed jobs or completed with errors
|
||||
errors_text String?
|
||||
|
||||
data Bytes? // Serialized data to be used on pause/resume
|
||||
// List of errors, separated by "\n\n" in case of failed jobs or completed with errors
|
||||
errors_text String? // Deprecated, use `critical_error` or `non_critical_errors` instead
|
||||
critical_error String? // Serialized error field with info about the failed job after completion
|
||||
non_critical_errors Bytes? // Serialized non-critical errors field with info about the completed job with errors after completion
|
||||
|
||||
data Bytes? // Deprecated
|
||||
metadata Bytes? // Serialized metadata field with info about the job after completion
|
||||
|
||||
parent_id Bytes?
|
||||
|
|
|
@ -10,8 +10,9 @@ use crate::{
|
|||
},
|
||||
};
|
||||
|
||||
use sd_core_file_path_helper::IsolatedFilePathData;
|
||||
|
||||
use sd_file_ext::extensions::ImageExtension;
|
||||
use sd_file_path_helper::IsolatedFilePathData;
|
||||
use sd_media_metadata::MediaMetadata;
|
||||
use sd_utils::error::FileIOError;
|
||||
|
||||
|
@ -37,9 +38,10 @@ const UNTITLED_FILE_STR: &str = "Untitled";
|
|||
const UNTITLED_TEXT_FILE_STR: &str = "Untitled.txt";
|
||||
|
||||
#[derive(Type, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
enum EphemeralFileCreateContextTypes {
|
||||
empty,
|
||||
text,
|
||||
Empty,
|
||||
Text,
|
||||
}
|
||||
|
||||
pub(crate) fn mount() -> AlphaRouter<Ctx> {
|
||||
|
@ -103,10 +105,10 @@ pub(crate) fn mount() -> AlphaRouter<Ctx> {
|
|||
context,
|
||||
}: CreateEphemeralFileArgs| async move {
|
||||
match context {
|
||||
EphemeralFileCreateContextTypes::empty => {
|
||||
EphemeralFileCreateContextTypes::Empty => {
|
||||
path.push(name.as_deref().unwrap_or(UNTITLED_FILE_STR));
|
||||
}
|
||||
EphemeralFileCreateContextTypes::text => {
|
||||
EphemeralFileCreateContextTypes::Text => {
|
||||
path.push(name.as_deref().unwrap_or(UNTITLED_TEXT_FILE_STR));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
use crate::{
|
||||
api::{locations::object_with_file_paths, utils::library},
|
||||
api::utils::library,
|
||||
invalidate_query,
|
||||
library::Library,
|
||||
location::{get_location_path_from_location_id, LocationError},
|
||||
|
@ -14,11 +14,13 @@ use crate::{
|
|||
old_job::Job,
|
||||
};
|
||||
|
||||
use sd_core_file_path_helper::{FilePathError, IsolatedFilePathData};
|
||||
use sd_core_prisma_helpers::{
|
||||
file_path_to_isolate, file_path_to_isolate_with_id, object_with_file_paths,
|
||||
};
|
||||
|
||||
use sd_cache::{CacheNode, Model, NormalisedResult, Reference};
|
||||
use sd_file_ext::kind::ObjectKind;
|
||||
use sd_file_path_helper::{
|
||||
file_path_to_isolate, file_path_to_isolate_with_id, FilePathError, IsolatedFilePathData,
|
||||
};
|
||||
use sd_images::ConvertibleExtension;
|
||||
use sd_media_metadata::MediaMetadata;
|
||||
use sd_prisma::{
|
||||
|
@ -50,9 +52,10 @@ const UNTITLED_FILE_STR: &str = "Untitled";
|
|||
const UNTITLED_TEXT_FILE_STR: &str = "Untitled.txt";
|
||||
|
||||
#[derive(Type, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
enum FileCreateContextTypes {
|
||||
empty,
|
||||
text,
|
||||
Empty,
|
||||
Text,
|
||||
}
|
||||
|
||||
pub(crate) fn mount() -> AlphaRouter<Ctx> {
|
||||
|
@ -329,10 +332,10 @@ pub(crate) fn mount() -> AlphaRouter<Ctx> {
|
|||
}
|
||||
|
||||
match context {
|
||||
FileCreateContextTypes::empty => {
|
||||
FileCreateContextTypes::Empty => {
|
||||
path.push(name.as_deref().unwrap_or(UNTITLED_FILE_STR))
|
||||
}
|
||||
FileCreateContextTypes::text => {
|
||||
FileCreateContextTypes::Text => {
|
||||
path.push(name.as_deref().unwrap_or(UNTITLED_TEXT_FILE_STR))
|
||||
}
|
||||
}
|
||||
|
@ -645,7 +648,7 @@ pub(crate) fn mount() -> AlphaRouter<Ctx> {
|
|||
Ok(())
|
||||
})
|
||||
})
|
||||
.procedure("getConvertableImageExtensions", {
|
||||
.procedure("getConvertibleImageExtensions", {
|
||||
R.query(|_, _: ()| async move { Ok(sd_images::all_compatible_extensions()) })
|
||||
})
|
||||
.procedure("eraseFiles", {
|
||||
|
|
|
@ -6,9 +6,11 @@ use crate::{
|
|||
old_file_identifier::old_file_identifier_job::OldFileIdentifierJobInit,
|
||||
validation::old_validator_job::OldObjectValidatorJobInit,
|
||||
},
|
||||
old_job::{job_without_data, Job, JobReport, JobStatus, OldJobs},
|
||||
old_job::{Job, JobReport, JobStatus, OldJobs},
|
||||
};
|
||||
|
||||
use sd_core_prisma_helpers::job_without_data;
|
||||
|
||||
use sd_prisma::prisma::{job, location, SortOrder};
|
||||
|
||||
use std::{
|
||||
|
|
|
@ -2,6 +2,8 @@ use crate::{
|
|||
invalidate_query, library::Library, object::media::old_thumbnail::get_indexed_thumb_key,
|
||||
};
|
||||
|
||||
use sd_core_prisma_helpers::label_with_objects;
|
||||
|
||||
use sd_prisma::{
|
||||
prisma::{label, label_on_object, object, SortOrder},
|
||||
prisma_sync,
|
||||
|
@ -14,15 +16,6 @@ use rspc::alpha::AlphaRouter;
|
|||
|
||||
use super::{locations::ExplorerItem, utils::library, Ctx, R};
|
||||
|
||||
label::include!((take: i64) => label_with_objects {
|
||||
label_objects(vec![]).take(take): select {
|
||||
object: select {
|
||||
id
|
||||
file_paths(vec![]).take(1)
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
pub(crate) fn mount() -> AlphaRouter<Ctx> {
|
||||
R.router()
|
||||
.procedure("list", {
|
||||
|
|
|
@ -1,12 +1,9 @@
|
|||
use crate::{
|
||||
invalidate_query,
|
||||
location::{
|
||||
delete_location, find_location,
|
||||
indexer::{rules::IndexerRuleCreateArgs, OldIndexerJobInit},
|
||||
light_scan_location, location_with_indexer_rules,
|
||||
non_indexed::NonIndexedPathItem,
|
||||
relink_location, scan_location, scan_location_sub_path, LocationCreateArgs, LocationError,
|
||||
LocationUpdateArgs,
|
||||
delete_location, find_location, indexer::OldIndexerJobInit, light_scan_location,
|
||||
non_indexed::NonIndexedPathItem, relink_location, scan_location, scan_location_sub_path,
|
||||
LocationCreateArgs, LocationError, LocationUpdateArgs,
|
||||
},
|
||||
object::old_file_identifier::old_file_identifier_job::OldFileIdentifierJobInit,
|
||||
old_job::StatefulJob,
|
||||
|
@ -14,11 +11,14 @@ use crate::{
|
|||
util::AbortOnDrop,
|
||||
};
|
||||
|
||||
use sd_cache::{CacheNode, Model, Normalise, NormalisedResult, NormalisedResults, Reference};
|
||||
use sd_prisma::prisma::{
|
||||
file_path, indexer_rule, indexer_rules_in_location, location, object, SortOrder,
|
||||
use sd_core_indexer_rules::IndexerRuleCreateArgs;
|
||||
use sd_core_prisma_helpers::{
|
||||
file_path_with_object, label_with_objects, location_with_indexer_rules, object_with_file_paths,
|
||||
};
|
||||
|
||||
use sd_cache::{CacheNode, Model, Normalise, NormalisedResult, NormalisedResults, Reference};
|
||||
use sd_prisma::prisma::{file_path, indexer_rule, indexer_rules_in_location, location, SortOrder};
|
||||
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
use chrono::{DateTime, FixedOffset, Utc};
|
||||
|
@ -28,7 +28,7 @@ use serde::{Deserialize, Serialize};
|
|||
use specta::Type;
|
||||
use tracing::{debug, error};
|
||||
|
||||
use super::{labels::label_with_objects, utils::library, Ctx, R};
|
||||
use super::{utils::library, Ctx, R};
|
||||
|
||||
// it includes the shard hex formatted as ([["f02", "cab34a76fbf3469f"]])
|
||||
// Will be None if no thumbnail exists
|
||||
|
@ -197,9 +197,6 @@ impl ExplorerItem {
|
|||
}
|
||||
}
|
||||
|
||||
file_path::include!(file_path_with_object { object });
|
||||
object::include!(object_with_file_paths { file_paths });
|
||||
|
||||
pub(crate) fn mount() -> AlphaRouter<Ctx> {
|
||||
R.router()
|
||||
.procedure("list", {
|
||||
|
@ -518,7 +515,7 @@ fn mount_indexer_rule_routes() -> AlphaRouter<Ctx> {
|
|||
.procedure("create", {
|
||||
R.with2(library())
|
||||
.mutation(|(_, library), args: IndexerRuleCreateArgs| async move {
|
||||
if args.create(&library).await?.is_some() {
|
||||
if args.create(&library.db).await?.is_some() {
|
||||
invalidate_query!(library, "locations.indexer_rules.list");
|
||||
}
|
||||
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
use crate::location::LocationError;
|
||||
|
||||
use sd_file_path_helper::{check_file_path_exists, IsolatedFilePathData};
|
||||
use sd_core_file_path_helper::{check_file_path_exists, IsolatedFilePathData};
|
||||
|
||||
use sd_prisma::prisma::{self, file_path};
|
||||
|
||||
use chrono::{DateTime, FixedOffset, Utc};
|
||||
|
|
|
@ -1,14 +1,13 @@
|
|||
use crate::{
|
||||
api::{
|
||||
locations::{file_path_with_object, object_with_file_paths, ExplorerItem},
|
||||
utils::library,
|
||||
},
|
||||
api::{locations::ExplorerItem, utils::library},
|
||||
library::Library,
|
||||
location::{non_indexed, LocationError},
|
||||
object::media::old_thumbnail::get_indexed_thumb_key,
|
||||
util::{unsafe_streamed_query, BatchedStream},
|
||||
};
|
||||
|
||||
use sd_core_prisma_helpers::{file_path_with_object, object_with_file_paths};
|
||||
|
||||
use sd_cache::{CacheNode, Model, Normalise, Reference};
|
||||
use sd_prisma::prisma::{self, PrismaClient};
|
||||
|
||||
|
|
|
@ -56,10 +56,10 @@ pub async fn run_actor(
|
|||
}
|
||||
|
||||
debug!(
|
||||
"Sending {} messages ({} to {}) to ingester",
|
||||
"Sending {} messages ({:?} to {:?}) to ingester",
|
||||
ops.len(),
|
||||
ops.first().unwrap().timestamp.as_u64(),
|
||||
ops.last().unwrap().timestamp.as_u64(),
|
||||
ops.first().map(|operation| operation.timestamp.as_u64()),
|
||||
ops.last().map(|operation| operation.timestamp.as_u64()),
|
||||
);
|
||||
|
||||
err_break!(
|
||||
|
|
|
@ -1,12 +1,10 @@
|
|||
use crate::{library::Libraries, Node};
|
||||
|
||||
use super::{err_break, CompressedCRDTOperations};
|
||||
use sd_cloud_api::RequestConfigProvider;
|
||||
use sd_p2p::RemoteIdentity;
|
||||
use sd_prisma::prisma::{cloud_crdt_operation, instance, PrismaClient, SortOrder};
|
||||
use sd_sync::CRDTOperation;
|
||||
use sd_utils::uuid_to_bytes;
|
||||
use tracing::{debug, info};
|
||||
|
||||
use std::{
|
||||
collections::{hash_map::Entry, HashMap},
|
||||
|
@ -21,8 +19,11 @@ use base64::prelude::*;
|
|||
use chrono::Utc;
|
||||
use serde_json::to_vec;
|
||||
use tokio::{sync::Notify, time::sleep};
|
||||
use tracing::{debug, info};
|
||||
use uuid::Uuid;
|
||||
|
||||
use super::{err_break, CompressedCRDTOperations};
|
||||
|
||||
// Responsible for downloading sync operations from the cloud to be processed by the ingester
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
|
@ -42,7 +43,7 @@ pub async fn run_actor(
|
|||
active_notify.notify_waiters();
|
||||
|
||||
loop {
|
||||
// We need to know the lastest operations we should be retrieving
|
||||
// We need to know the latest operations we should be retrieving
|
||||
let mut cloud_timestamps = {
|
||||
let timestamps = sync.timestamps.read().await;
|
||||
|
||||
|
@ -181,10 +182,14 @@ pub async fn run_actor(
|
|||
let operations = compressed_operations.into_ops();
|
||||
|
||||
debug!(
|
||||
"Processing collection. Instance {}, Start {}, End {}",
|
||||
"Processing collection. Instance {}, Start {:?}, End {:?}",
|
||||
&collection.instance_uuid,
|
||||
operations.first().unwrap().timestamp.as_u64(),
|
||||
operations.last().unwrap().timestamp.as_u64(),
|
||||
operations
|
||||
.first()
|
||||
.map(|operation| operation.timestamp.as_u64()),
|
||||
operations
|
||||
.last()
|
||||
.map(|operation| operation.timestamp.as_u64()),
|
||||
);
|
||||
|
||||
err_break!(write_cloud_ops_to_db(operations, &db).await);
|
||||
|
@ -233,6 +238,7 @@ fn crdt_op_db(op: &CRDTOperation) -> cloud_crdt_operation::Create {
|
|||
}
|
||||
}
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub async fn upsert_instance(
|
||||
library_id: Uuid,
|
||||
db: &PrismaClient,
|
||||
|
|
|
@ -1,9 +1,6 @@
|
|||
use super::CompressedCRDTOperations;
|
||||
use sd_core_sync::{SyncMessage, NTP64};
|
||||
|
||||
use sd_cloud_api::RequestConfigProvider;
|
||||
use sd_core_sync::{SyncMessage, NTP64};
|
||||
use tracing::debug;
|
||||
use uuid::Uuid;
|
||||
|
||||
use std::{
|
||||
sync::{
|
||||
|
@ -14,10 +11,10 @@ use std::{
|
|||
};
|
||||
|
||||
use tokio::{sync::Notify, time::sleep};
|
||||
use tracing::debug;
|
||||
use uuid::Uuid;
|
||||
|
||||
use super::err_break;
|
||||
|
||||
// Responsible for sending its instance's sync operations to the cloud.
|
||||
use super::{err_break, CompressedCRDTOperations};
|
||||
|
||||
pub async fn run_actor(
|
||||
library_id: Uuid,
|
||||
|
|
|
@ -7,10 +7,10 @@ use crate::{
|
|||
Node,
|
||||
};
|
||||
|
||||
use http_body::combinators::UnsyncBoxBody;
|
||||
use hyper::{header, upgrade::OnUpgrade};
|
||||
use sd_core_file_path_helper::IsolatedFilePathData;
|
||||
use sd_core_prisma_helpers::file_path_to_handle_custom_uri;
|
||||
|
||||
use sd_file_ext::text::is_text;
|
||||
use sd_file_path_helper::{file_path_to_handle_custom_uri, IsolatedFilePathData};
|
||||
use sd_p2p::{RemoteIdentity, P2P};
|
||||
use sd_prisma::prisma::{file_path, location};
|
||||
use sd_utils::db::maybe_missing;
|
||||
|
@ -34,6 +34,8 @@ use axum::{
|
|||
routing::get,
|
||||
Router,
|
||||
};
|
||||
use http_body::combinators::UnsyncBoxBody;
|
||||
use hyper::{header, upgrade::OnUpgrade};
|
||||
use mini_moka::sync::Cache;
|
||||
use tokio::{
|
||||
fs::{self, File},
|
||||
|
@ -353,7 +355,7 @@ pub fn with_state(node: Arc<Node>) -> LocalState {
|
|||
if let CoreEvent::InvalidateOperation(e) = event {
|
||||
match e {
|
||||
InvalidateOperationEvent::Single(event) => {
|
||||
// TODO: This is inefficent as any change will invalidate who cache. We need the new invalidation system!!!
|
||||
// TODO: This is inefficient as any change will invalidate who cache. We need the new invalidation system!!!
|
||||
// TODO: It's also error prone and a fine-grained resource based invalidation system would avoid that.
|
||||
if event.key == "search.objects" || event.key == "search.paths" {
|
||||
file_metadata_cache.invalidate_all();
|
||||
|
|
|
@ -405,9 +405,7 @@ impl LibraryConfig {
|
|||
.await?
|
||||
.into_iter()
|
||||
.filter_map(|i| {
|
||||
let Some(identity) = i.identity else {
|
||||
return None;
|
||||
};
|
||||
let identity = i.identity?;
|
||||
|
||||
let (remote_identity, identity) = if identity[0] == b'I' {
|
||||
// We have an `IdentityOrRemoteIdentity::Identity`
|
||||
|
|
|
@ -2,7 +2,9 @@ use crate::{
|
|||
api::CoreEvent, cloud, object::media::old_thumbnail::get_indexed_thumbnail_path, sync, Node,
|
||||
};
|
||||
|
||||
use sd_file_path_helper::{file_path_to_full_path, IsolatedFilePathData};
|
||||
use sd_core_file_path_helper::IsolatedFilePathData;
|
||||
use sd_core_prisma_helpers::file_path_to_full_path;
|
||||
|
||||
use sd_p2p::Identity;
|
||||
use sd_prisma::prisma::{file_path, location, PrismaClient};
|
||||
use sd_utils::{db::maybe_missing, error::FileIOError};
|
||||
|
|
|
@ -1,7 +1,6 @@
|
|||
use crate::{
|
||||
library::LibraryConfigError,
|
||||
location::{indexer, LocationManagerError},
|
||||
};
|
||||
use crate::{library::LibraryConfigError, location::LocationManagerError};
|
||||
|
||||
use sd_core_indexer_rules::seed::SeederError;
|
||||
|
||||
use sd_p2p::IdentityErr;
|
||||
use sd_utils::{
|
||||
|
@ -23,7 +22,7 @@ pub enum LibraryManagerError {
|
|||
#[error("failed to parse uuid: {0}")]
|
||||
Uuid(#[from] uuid::Error),
|
||||
#[error("failed to run indexer rules seeder: {0}")]
|
||||
IndexerRulesSeeder(#[from] indexer::rules::seed::SeederError),
|
||||
IndexerRulesSeeder(#[from] SeederError),
|
||||
// #[error("failed to initialize the key manager: {0}")]
|
||||
// KeyManager(#[from] sd_crypto::Error),
|
||||
#[error("error migrating the library: {0}")]
|
||||
|
|
|
@ -1,10 +1,7 @@
|
|||
use crate::{
|
||||
api::{utils::InvalidateOperationEvent, CoreEvent},
|
||||
cloud, invalidate_query,
|
||||
location::{
|
||||
indexer,
|
||||
metadata::{LocationMetadataError, SpacedriveLocationMetadataFile},
|
||||
},
|
||||
location::metadata::{LocationMetadataError, SpacedriveLocationMetadataFile},
|
||||
object::tag,
|
||||
p2p, sync,
|
||||
util::{mpscrr, MaybeUndefined},
|
||||
|
@ -160,6 +157,7 @@ impl Libraries {
|
|||
.await
|
||||
}
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub(crate) async fn create_with_uuid(
|
||||
self: &Arc<Self>,
|
||||
id: Uuid,
|
||||
|
@ -230,7 +228,7 @@ impl Libraries {
|
|||
|
||||
if should_seed {
|
||||
tag::seed::new_library(&library).await?;
|
||||
indexer::rules::seed::new_or_existing_library(&library).await?;
|
||||
sd_core_indexer_rules::seed::new_or_existing_library(&library.db).await?;
|
||||
debug!("Seeded library '{id:?}'");
|
||||
}
|
||||
|
||||
|
@ -452,7 +450,7 @@ impl Libraries {
|
|||
instance::node_id::set(node_config.id.as_bytes().to_vec()),
|
||||
instance::metadata::set(Some(
|
||||
serde_json::to_vec(&node.p2p.peer_metadata())
|
||||
.expect("invalid peer metdata"),
|
||||
.expect("invalid peer metadata"),
|
||||
)),
|
||||
],
|
||||
)
|
||||
|
@ -525,7 +523,7 @@ impl Libraries {
|
|||
|
||||
if should_seed {
|
||||
// library.orphan_remover.invoke().await;
|
||||
indexer::rules::seed::new_or_existing_library(&library).await?;
|
||||
sd_core_indexer_rules::seed::new_or_existing_library(&library.db).await?;
|
||||
}
|
||||
|
||||
for location in library
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
use sd_file_path_helper::FilePathError;
|
||||
use sd_core_file_path_helper::FilePathError;
|
||||
|
||||
use sd_prisma::prisma::location;
|
||||
use sd_utils::{
|
||||
db::MissingFieldError,
|
||||
|
|
|
@ -1,10 +1,11 @@
|
|||
use crate::library::Library;
|
||||
|
||||
use sd_file_path_helper::{
|
||||
file_path_pub_and_cas_ids, FilePathError, IsolatedFilePathData, IsolatedFilePathDataParts,
|
||||
};
|
||||
use sd_core_file_path_helper::{FilePathError, IsolatedFilePathData, IsolatedFilePathDataParts};
|
||||
use sd_core_indexer_rules::IndexerRuleError;
|
||||
use sd_core_prisma_helpers::file_path_pub_and_cas_ids;
|
||||
|
||||
use sd_prisma::{
|
||||
prisma::{file_path, location, object as prisma_object, PrismaClient},
|
||||
prisma::{file_path, location, PrismaClient},
|
||||
prisma_sync,
|
||||
};
|
||||
use sd_sync::*;
|
||||
|
@ -26,10 +27,8 @@ use super::location_with_indexer_rules;
|
|||
pub mod old_indexer_job;
|
||||
mod old_shallow;
|
||||
mod old_walk;
|
||||
pub mod rules;
|
||||
|
||||
use old_walk::WalkedEntry;
|
||||
use rules::IndexerRuleError;
|
||||
|
||||
pub use old_indexer_job::OldIndexerJobInit;
|
||||
pub use old_shallow::*;
|
||||
|
@ -84,13 +83,12 @@ impl From<IndexerError> for rspc::Error {
|
|||
|
||||
async fn execute_indexer_save_step(
|
||||
location: &location_with_indexer_rules::Data,
|
||||
save_step: &OldIndexerJobSaveStep,
|
||||
OldIndexerJobSaveStep { walked, .. }: &OldIndexerJobSaveStep,
|
||||
library: &Library,
|
||||
) -> Result<i64, IndexerError> {
|
||||
let Library { sync, db, .. } = library;
|
||||
|
||||
let (sync_stuff, paths): (Vec<_>, Vec<_>) = save_step
|
||||
.walked
|
||||
let (sync_stuff, paths): (Vec<_>, Vec<_>) = walked
|
||||
.iter()
|
||||
.map(|entry| {
|
||||
let IsolatedFilePathDataParts {
|
||||
|
@ -181,8 +179,8 @@ async fn execute_indexer_update_step(
|
|||
let pub_id = sd_utils::uuid_to_bytes(entry.pub_id);
|
||||
|
||||
let should_unlink_object = if let Some(object_id) = entry.maybe_object_id {
|
||||
db.object()
|
||||
.count(vec![prisma_object::id::equals(object_id)])
|
||||
db.file_path()
|
||||
.count(vec![file_path::object_id::equals(Some(object_id))])
|
||||
.exec()
|
||||
.await? > 1
|
||||
} else {
|
||||
|
@ -310,7 +308,7 @@ macro_rules! file_paths_db_fetcher_fn {
|
|||
.find_many(vec![::prisma_client_rust::operator::or(
|
||||
founds.collect::<Vec<_>>(),
|
||||
)])
|
||||
.select(::sd_file_path_helper::file_path_walker::select())
|
||||
.select(::sd_core_prisma_helpers::file_path_walker::select())
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
|
@ -332,7 +330,7 @@ macro_rules! to_remove_db_fetcher_fn {
|
|||
|parent_iso_file_path, unique_location_id_materialized_path_name_extension_params| async {
|
||||
let location_id: ::sd_prisma::prisma::location::id::Type = $location_id;
|
||||
let db: &::sd_prisma::prisma::PrismaClient = $db;
|
||||
let parent_iso_file_path: ::sd_file_path_helper::IsolatedFilePathData<
|
||||
let parent_iso_file_path: ::sd_core_file_path_helper::IsolatedFilePathData<
|
||||
'static,
|
||||
> = parent_iso_file_path;
|
||||
let unique_location_id_materialized_path_name_extension_params: ::std::vec::Vec<
|
||||
|
@ -396,7 +394,7 @@ macro_rules! to_remove_db_fetcher_fn {
|
|||
found
|
||||
.into_iter()
|
||||
.filter(|file_path| !founds_ids.contains(&file_path.id))
|
||||
.map(|file_path| ::sd_file_path_helper::file_path_pub_and_cas_ids::Data {
|
||||
.map(|file_path| ::sd_core_prisma_helpers::file_path_pub_and_cas_ids::Data {
|
||||
id: file_path.id,
|
||||
pub_id: file_path.pub_id,
|
||||
cas_id: file_path.cas_id,
|
||||
|
|
|
@ -9,10 +9,12 @@ use crate::{
|
|||
to_remove_db_fetcher_fn,
|
||||
};
|
||||
|
||||
use sd_file_path_helper::{
|
||||
use sd_core_file_path_helper::{
|
||||
ensure_file_path_exists, ensure_sub_path_is_directory, ensure_sub_path_is_in_location,
|
||||
IsolatedFilePathData,
|
||||
};
|
||||
use sd_core_indexer_rules::IndexerRule;
|
||||
|
||||
use sd_prisma::{
|
||||
prisma::{file_path, location},
|
||||
prisma_sync,
|
||||
|
@ -38,9 +40,8 @@ use tracing::{debug, info, warn};
|
|||
use super::{
|
||||
execute_indexer_save_step, execute_indexer_update_step, iso_file_path_factory,
|
||||
old_walk::{keep_walking, walk, ToWalkEntry, WalkResult},
|
||||
remove_non_existing_file_paths, reverse_update_directories_sizes,
|
||||
rules::IndexerRule,
|
||||
IndexerError, OldIndexerJobSaveStep, OldIndexerJobUpdateStep,
|
||||
remove_non_existing_file_paths, reverse_update_directories_sizes, IndexerError,
|
||||
OldIndexerJobSaveStep, OldIndexerJobUpdateStep,
|
||||
};
|
||||
|
||||
/// BATCH_SIZE is the number of files to index at each step, writing the chunk of files metadata in the database.
|
||||
|
|
|
@ -11,10 +11,12 @@ use crate::{
|
|||
to_remove_db_fetcher_fn, Node,
|
||||
};
|
||||
|
||||
use sd_file_path_helper::{
|
||||
use sd_core_file_path_helper::{
|
||||
check_file_path_exists, ensure_sub_path_is_directory, ensure_sub_path_is_in_location,
|
||||
IsolatedFilePathData,
|
||||
};
|
||||
use sd_core_indexer_rules::IndexerRule;
|
||||
|
||||
use sd_utils::db::maybe_missing;
|
||||
|
||||
use std::{
|
||||
|
@ -29,8 +31,7 @@ use tracing::{debug, error};
|
|||
|
||||
use super::{
|
||||
execute_indexer_save_step, iso_file_path_factory, location_with_indexer_rules,
|
||||
old_walk::walk_single_dir, remove_non_existing_file_paths, rules::IndexerRule, IndexerError,
|
||||
OldIndexerJobSaveStep,
|
||||
old_walk::walk_single_dir, remove_non_existing_file_paths, IndexerError, OldIndexerJobSaveStep,
|
||||
};
|
||||
|
||||
/// BATCH_SIZE is the number of files to index at each step, writing the chunk of files metadata in the database.
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
use sd_file_path_helper::{
|
||||
file_path_pub_and_cas_ids, file_path_walker, FilePathMetadata, IsolatedFilePathData,
|
||||
};
|
||||
use sd_core_file_path_helper::{FilePathMetadata, IsolatedFilePathData};
|
||||
use sd_core_indexer_rules::{IndexerRule, RuleKind};
|
||||
use sd_core_prisma_helpers::{file_path_pub_and_cas_ids, file_path_walker};
|
||||
|
||||
use sd_prisma::prisma::file_path;
|
||||
use sd_utils::{db::inode_from_db, error::FileIOError};
|
||||
|
||||
|
@ -17,10 +18,7 @@ use tokio::fs;
|
|||
use tracing::trace;
|
||||
use uuid::Uuid;
|
||||
|
||||
use super::{
|
||||
rules::{IndexerRule, RuleKind},
|
||||
IndexerError,
|
||||
};
|
||||
use super::IndexerError;
|
||||
|
||||
const TO_WALK_QUEUE_INITIAL_CAPACITY: usize = 32;
|
||||
const WALKER_PATHS_BUFFER_INITIAL_CAPACITY: usize = 256;
|
||||
|
@ -299,7 +297,7 @@ where
|
|||
|
||||
indexed_paths.insert(WalkingEntry {
|
||||
iso_file_path: iso_file_path_factory(root, true)?,
|
||||
maybe_metadata: Some(FilePathMetadata::from_path(&root, &metadata).await?),
|
||||
maybe_metadata: Some(FilePathMetadata::from_path(root, &metadata)?),
|
||||
});
|
||||
}
|
||||
|
||||
|
@ -605,7 +603,6 @@ where
|
|||
};
|
||||
|
||||
let Ok(metadata) = FilePathMetadata::from_path(¤t_path, &metadata)
|
||||
.await
|
||||
.map_err(|e| errors.push(e.into()))
|
||||
else {
|
||||
continue;
|
||||
|
@ -643,8 +640,7 @@ where
|
|||
continue;
|
||||
};
|
||||
|
||||
let Ok(metadata) = FilePathMetadata::from_path(&ancestor, &metadata)
|
||||
.await
|
||||
let Ok(metadata) = FilePathMetadata::from_path(ancestor, &metadata)
|
||||
.map_err(|e| errors.push(e.into()))
|
||||
else {
|
||||
continue;
|
||||
|
@ -696,10 +692,10 @@ where
|
|||
#[cfg(test)]
|
||||
#[allow(clippy::unwrap_used, clippy::panic)]
|
||||
mod tests {
|
||||
use super::super::rules::RulePerKind;
|
||||
use super::*;
|
||||
use chrono::Utc;
|
||||
use globset::{Glob, GlobSetBuilder};
|
||||
use sd_core_indexer_rules::RulePerKind;
|
||||
use tempfile::{tempdir, TempDir};
|
||||
// use tracing_test::traced_test;
|
||||
|
||||
|
@ -717,6 +713,21 @@ mod tests {
|
|||
}
|
||||
}
|
||||
|
||||
fn new_indexer_rule(
|
||||
name: impl Into<String>,
|
||||
default: bool,
|
||||
rules: Vec<RulePerKind>,
|
||||
) -> IndexerRule {
|
||||
IndexerRule {
|
||||
id: None,
|
||||
name: name.into(),
|
||||
default,
|
||||
rules,
|
||||
date_created: Utc::now(),
|
||||
date_modified: Utc::now(),
|
||||
}
|
||||
}
|
||||
|
||||
async fn prepare_location() -> TempDir {
|
||||
let root = tempdir().unwrap();
|
||||
let root_path = root.path();
|
||||
|
@ -872,7 +883,7 @@ mod tests {
|
|||
.into_iter()
|
||||
.collect::<HashSet<_>>();
|
||||
|
||||
let only_photos_rule = &[IndexerRule::new(
|
||||
let only_photos_rule = &[new_indexer_rule(
|
||||
"only photos".to_string(),
|
||||
false,
|
||||
vec![RulePerKind::AcceptFilesByGlob(
|
||||
|
@ -950,7 +961,7 @@ mod tests {
|
|||
.into_iter()
|
||||
.collect::<HashSet<_>>();
|
||||
|
||||
let git_repos = &[IndexerRule::new(
|
||||
let git_repos = &[new_indexer_rule(
|
||||
"git repos".to_string(),
|
||||
false,
|
||||
vec![RulePerKind::AcceptIfChildrenDirectoriesArePresent(
|
||||
|
@ -1019,14 +1030,14 @@ mod tests {
|
|||
.collect::<HashSet<_>>();
|
||||
|
||||
let git_repos_no_deps_no_build_dirs = &[
|
||||
IndexerRule::new(
|
||||
new_indexer_rule(
|
||||
"git repos".to_string(),
|
||||
false,
|
||||
vec![RulePerKind::AcceptIfChildrenDirectoriesArePresent(
|
||||
[".git".to_string()].into_iter().collect(),
|
||||
)],
|
||||
),
|
||||
IndexerRule::new(
|
||||
new_indexer_rule(
|
||||
"reject node_modules".to_string(),
|
||||
false,
|
||||
vec![RulePerKind::RejectFilesByGlob(
|
||||
|
@ -1037,7 +1048,7 @@ mod tests {
|
|||
.unwrap(),
|
||||
)],
|
||||
),
|
||||
IndexerRule::new(
|
||||
new_indexer_rule(
|
||||
"reject rust build dir".to_string(),
|
||||
false,
|
||||
vec![RulePerKind::RejectFilesByGlob(
|
||||
|
|
|
@ -4,7 +4,8 @@ use crate::{
|
|||
Node,
|
||||
};
|
||||
|
||||
use sd_file_path_helper::FilePathError;
|
||||
use sd_core_file_path_helper::FilePathError;
|
||||
|
||||
use sd_prisma::prisma::location;
|
||||
use sd_utils::{db::MissingFieldError, error::FileIOError};
|
||||
|
||||
|
@ -18,11 +19,9 @@ use futures::executor::block_on;
|
|||
use thiserror::Error;
|
||||
use tokio::sync::{
|
||||
broadcast::{self, Receiver},
|
||||
oneshot, RwLock,
|
||||
mpsc, oneshot, RwLock,
|
||||
};
|
||||
use tracing::{debug, error};
|
||||
|
||||
use tokio::sync::mpsc;
|
||||
use uuid::Uuid;
|
||||
|
||||
mod watcher;
|
||||
|
|
|
@ -2,7 +2,10 @@
|
|||
|
||||
use crate::{invalidate_query, library::Library, location::manager::LocationManagerError, Node};
|
||||
|
||||
use sd_file_path_helper::{check_file_path_exists, get_inode, FilePathError, IsolatedFilePathData};
|
||||
use sd_core_file_path_helper::{
|
||||
check_file_path_exists, get_inode, FilePathError, IsolatedFilePathData,
|
||||
};
|
||||
|
||||
use sd_prisma::prisma::location;
|
||||
use sd_utils::error::FileIOError;
|
||||
|
||||
|
|
|
@ -11,7 +11,10 @@
|
|||
|
||||
use crate::{invalidate_query, library::Library, location::manager::LocationManagerError, Node};
|
||||
|
||||
use sd_file_path_helper::{check_file_path_exists, get_inode, FilePathError, IsolatedFilePathData};
|
||||
use sd_core_file_path_helper::{
|
||||
check_file_path_exists, get_inode, FilePathError, IsolatedFilePathData,
|
||||
};
|
||||
|
||||
use sd_prisma::prisma::location;
|
||||
use sd_utils::error::FileIOError;
|
||||
|
||||
|
|
|
@ -18,13 +18,15 @@ use crate::{
|
|||
Node,
|
||||
};
|
||||
|
||||
use sd_file_ext::{extensions::ImageExtension, kind::ObjectKind};
|
||||
use sd_file_path_helper::{
|
||||
check_file_path_exists, file_path_with_object, filter_existing_file_path_params,
|
||||
use sd_core_file_path_helper::{
|
||||
check_file_path_exists, filter_existing_file_path_params,
|
||||
isolated_file_path_data::extract_normalized_materialized_path_str,
|
||||
loose_find_existing_file_path_params, path_is_hidden, FilePathError, FilePathMetadata,
|
||||
IsolatedFilePathData, MetadataExt,
|
||||
};
|
||||
use sd_core_prisma_helpers::file_path_with_object;
|
||||
|
||||
use sd_file_ext::{extensions::ImageExtension, kind::ObjectKind};
|
||||
use sd_prisma::{
|
||||
prisma::{file_path, location, media_data, object},
|
||||
prisma_sync,
|
||||
|
@ -37,10 +39,10 @@ use sd_utils::{
|
|||
};
|
||||
|
||||
#[cfg(target_family = "unix")]
|
||||
use sd_file_path_helper::get_inode;
|
||||
use sd_core_file_path_helper::get_inode;
|
||||
|
||||
#[cfg(target_family = "windows")]
|
||||
use sd_file_path_helper::get_inode_from_path;
|
||||
use sd_core_file_path_helper::get_inode_from_path;
|
||||
|
||||
use std::{
|
||||
collections::{HashMap, HashSet},
|
||||
|
@ -120,7 +122,7 @@ pub(super) async fn create_dir(
|
|||
library,
|
||||
iso_file_path.to_parts(),
|
||||
None,
|
||||
FilePathMetadata::from_path(&path, metadata).await?,
|
||||
FilePathMetadata::from_path(path, metadata)?,
|
||||
)
|
||||
.await?;
|
||||
|
||||
|
@ -177,7 +179,7 @@ async fn inner_create_file(
|
|||
let iso_file_path_parts = iso_file_path.to_parts();
|
||||
let extension = iso_file_path_parts.extension.to_string();
|
||||
|
||||
let metadata = FilePathMetadata::from_path(&path, metadata).await?;
|
||||
let metadata = FilePathMetadata::from_path(path, metadata)?;
|
||||
|
||||
// First we check if already exist a file with this same inode number
|
||||
// if it does, we just update it
|
||||
|
|
|
@ -9,7 +9,8 @@
|
|||
|
||||
use crate::{invalidate_query, library::Library, location::manager::LocationManagerError, Node};
|
||||
|
||||
use sd_file_path_helper::{get_inode_from_path, FilePathError};
|
||||
use sd_core_file_path_helper::{get_inode_from_path, FilePathError};
|
||||
|
||||
use sd_prisma::prisma::location;
|
||||
use sd_utils::error::FileIOError;
|
||||
|
||||
|
|
|
@ -9,7 +9,11 @@ use crate::{
|
|||
Node,
|
||||
};
|
||||
|
||||
use sd_file_path_helper::{filter_existing_file_path_params, IsolatedFilePathData};
|
||||
use sd_core_file_path_helper::{
|
||||
filter_existing_file_path_params, IsolatedFilePathData, IsolatedFilePathDataParts,
|
||||
};
|
||||
use sd_core_prisma_helpers::location_with_indexer_rules;
|
||||
|
||||
use sd_prisma::{
|
||||
prisma::{file_path, indexer_rules_in_location, location, PrismaClient},
|
||||
prisma_sync,
|
||||
|
@ -21,8 +25,6 @@ use sd_utils::{
|
|||
msgpack, uuid_to_bytes,
|
||||
};
|
||||
|
||||
use sd_file_path_helper::IsolatedFilePathDataParts;
|
||||
|
||||
use std::{
|
||||
collections::HashSet,
|
||||
path::{Component, Path, PathBuf},
|
||||
|
@ -53,11 +55,6 @@ use metadata::SpacedriveLocationMetadataFile;
|
|||
|
||||
pub type LocationPubId = Uuid;
|
||||
|
||||
// Location includes!
|
||||
location::include!(location_with_indexer_rules {
|
||||
indexer_rules: select { indexer_rule }
|
||||
});
|
||||
|
||||
/// `LocationCreateArgs` is the argument received from the client using `rspc` to create a new location.
|
||||
/// It has the actual path and a vector of indexer rules ids, to create many-to-many relationships
|
||||
/// between the location and indexer rules.
|
||||
|
@ -867,52 +864,6 @@ pub async fn delete_directory(
|
|||
Ok(())
|
||||
}
|
||||
|
||||
impl From<location_with_indexer_rules::Data> for location::Data {
|
||||
fn from(data: location_with_indexer_rules::Data) -> Self {
|
||||
Self {
|
||||
id: data.id,
|
||||
pub_id: data.pub_id,
|
||||
path: data.path,
|
||||
instance_id: data.instance_id,
|
||||
name: data.name,
|
||||
total_capacity: data.total_capacity,
|
||||
available_capacity: data.available_capacity,
|
||||
is_archived: data.is_archived,
|
||||
size_in_bytes: data.size_in_bytes,
|
||||
generate_preview_media: data.generate_preview_media,
|
||||
sync_preview_media: data.sync_preview_media,
|
||||
hidden: data.hidden,
|
||||
date_created: data.date_created,
|
||||
file_paths: None,
|
||||
indexer_rules: None,
|
||||
instance: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<&location_with_indexer_rules::Data> for location::Data {
|
||||
fn from(data: &location_with_indexer_rules::Data) -> Self {
|
||||
Self {
|
||||
id: data.id,
|
||||
pub_id: data.pub_id.clone(),
|
||||
path: data.path.clone(),
|
||||
instance_id: data.instance_id,
|
||||
name: data.name.clone(),
|
||||
total_capacity: data.total_capacity,
|
||||
available_capacity: data.available_capacity,
|
||||
size_in_bytes: data.size_in_bytes.clone(),
|
||||
is_archived: data.is_archived,
|
||||
generate_preview_media: data.generate_preview_media,
|
||||
sync_preview_media: data.sync_preview_media,
|
||||
hidden: data.hidden,
|
||||
date_created: data.date_created,
|
||||
file_paths: None,
|
||||
indexer_rules: None,
|
||||
instance: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn check_nested_location(
|
||||
location_path: impl AsRef<Path>,
|
||||
db: &PrismaClient,
|
||||
|
@ -1049,8 +1000,8 @@ pub async fn create_file_path(
|
|||
..
|
||||
}: IsolatedFilePathDataParts<'_>,
|
||||
cas_id: Option<String>,
|
||||
metadata: sd_file_path_helper::FilePathMetadata,
|
||||
) -> Result<file_path::Data, sd_file_path_helper::FilePathError> {
|
||||
metadata: sd_core_file_path_helper::FilePathMetadata,
|
||||
) -> Result<file_path::Data, sd_core_file_path_helper::FilePathError> {
|
||||
use sd_utils::db::inode_to_db;
|
||||
|
||||
use sd_prisma::prisma;
|
||||
|
@ -1063,7 +1014,7 @@ pub async fn create_file_path(
|
|||
.select(location::select!({ id pub_id }))
|
||||
.exec()
|
||||
.await?
|
||||
.ok_or(sd_file_path_helper::FilePathError::LocationNotFound(
|
||||
.ok_or(sd_core_file_path_helper::FilePathError::LocationNotFound(
|
||||
location_id,
|
||||
))?;
|
||||
|
||||
|
|
|
@ -8,10 +8,13 @@ use crate::{
|
|||
Node,
|
||||
};
|
||||
|
||||
use futures::Stream;
|
||||
use itertools::Either;
|
||||
use sd_core_file_path_helper::{path_is_hidden, MetadataExt};
|
||||
use sd_core_indexer_rules::{
|
||||
seed::{no_hidden, no_os_protected},
|
||||
IndexerRule, RuleKind,
|
||||
};
|
||||
|
||||
use sd_file_ext::{extensions::Extension, kind::ObjectKind};
|
||||
use sd_file_path_helper::{path_is_hidden, MetadataExt};
|
||||
use sd_prisma::prisma::location;
|
||||
use sd_utils::{chain_optional_iter, error::FileIOError};
|
||||
|
||||
|
@ -23,6 +26,8 @@ use std::{
|
|||
};
|
||||
|
||||
use chrono::{DateTime, Utc};
|
||||
use futures::Stream;
|
||||
use itertools::Either;
|
||||
use rspc::ErrorCode;
|
||||
use serde::Serialize;
|
||||
use specta::Type;
|
||||
|
@ -31,13 +36,7 @@ use tokio::{io, sync::mpsc, task::JoinError};
|
|||
use tokio_stream::wrappers::ReceiverStream;
|
||||
use tracing::{error, span, warn, Level};
|
||||
|
||||
use super::{
|
||||
indexer::rules::{
|
||||
seed::{no_hidden, no_os_protected},
|
||||
IndexerRule, RuleKind,
|
||||
},
|
||||
normalize_path,
|
||||
};
|
||||
use super::normalize_path;
|
||||
|
||||
#[derive(Debug, Error)]
|
||||
pub enum NonIndexedLocationError {
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
use crate::location::LocationError;
|
||||
|
||||
use sd_file_path_helper::FilePathError;
|
||||
use sd_core_file_path_helper::FilePathError;
|
||||
|
||||
use sd_prisma::prisma::file_path;
|
||||
use sd_utils::{
|
||||
db::MissingFieldError,
|
||||
|
|
|
@ -1,6 +1,8 @@
|
|||
use crate::location::LocationError;
|
||||
|
||||
use sd_file_path_helper::{file_path_with_object, IsolatedFilePathData};
|
||||
use sd_core_file_path_helper::IsolatedFilePathData;
|
||||
use sd_core_prisma_helpers::file_path_with_object;
|
||||
|
||||
use sd_prisma::prisma::{file_path, location, PrismaClient};
|
||||
use sd_utils::{
|
||||
db::maybe_missing,
|
||||
|
|
|
@ -7,7 +7,8 @@ use crate::{
|
|||
},
|
||||
};
|
||||
|
||||
use sd_file_path_helper::{join_location_relative_path, IsolatedFilePathData};
|
||||
use sd_core_file_path_helper::{join_location_relative_path, IsolatedFilePathData};
|
||||
|
||||
use sd_prisma::prisma::{file_path, location};
|
||||
use sd_utils::{db::maybe_missing, error::FileIOError};
|
||||
|
||||
|
|
|
@ -8,7 +8,8 @@ use crate::{
|
|||
},
|
||||
};
|
||||
|
||||
use sd_file_path_helper::push_location_relative_path;
|
||||
use sd_core_file_path_helper::push_location_relative_path;
|
||||
|
||||
use sd_prisma::prisma::{file_path, location};
|
||||
use sd_utils::error::FileIOError;
|
||||
|
||||
|
|
|
@ -8,7 +8,8 @@ use crate::{
|
|||
},
|
||||
};
|
||||
|
||||
use sd_file_path_helper::IsolatedFilePathData;
|
||||
use sd_core_file_path_helper::IsolatedFilePathData;
|
||||
|
||||
use sd_prisma::prisma::{file_path, location};
|
||||
use sd_utils::{db::maybe_missing, error::FileIOError};
|
||||
|
||||
|
|
|
@ -1,7 +1,9 @@
|
|||
use crate::old_job::JobRunErrors;
|
||||
|
||||
use sd_core_file_path_helper::IsolatedFilePathData;
|
||||
use sd_core_prisma_helpers::file_path_for_media_processor;
|
||||
|
||||
use sd_file_ext::extensions::{Extension, ImageExtension, ALL_IMAGE_EXTENSIONS};
|
||||
use sd_file_path_helper::{file_path_for_media_processor, IsolatedFilePathData};
|
||||
use sd_media_metadata::ImageMetadata;
|
||||
use sd_prisma::prisma::{location, media_data, PrismaClient};
|
||||
use sd_utils::error::FileIOError;
|
||||
|
|
|
@ -11,11 +11,13 @@ use crate::{
|
|||
#[cfg(feature = "ai")]
|
||||
use crate::old_job::JobRunErrors;
|
||||
|
||||
use sd_file_ext::extensions::Extension;
|
||||
use sd_file_path_helper::{
|
||||
use sd_core_file_path_helper::{
|
||||
ensure_file_path_exists, ensure_sub_path_is_directory, ensure_sub_path_is_in_location,
|
||||
file_path_for_media_processor, IsolatedFilePathData,
|
||||
IsolatedFilePathData,
|
||||
};
|
||||
use sd_core_prisma_helpers::file_path_for_media_processor;
|
||||
|
||||
use sd_file_ext::extensions::Extension;
|
||||
use sd_prisma::prisma::{location, PrismaClient};
|
||||
use sd_utils::db::maybe_missing;
|
||||
|
||||
|
|
|
@ -1,6 +1,8 @@
|
|||
use crate::old_job::{JobRunErrors, JobRunMetadata};
|
||||
|
||||
use sd_file_path_helper::{file_path_for_media_processor, FilePathError};
|
||||
use sd_core_file_path_helper::FilePathError;
|
||||
use sd_core_prisma_helpers::file_path_for_media_processor;
|
||||
|
||||
use sd_prisma::prisma::{location, PrismaClient};
|
||||
|
||||
use std::path::Path;
|
||||
|
|
|
@ -6,11 +6,13 @@ use crate::{
|
|||
Node,
|
||||
};
|
||||
|
||||
use sd_file_ext::extensions::Extension;
|
||||
use sd_file_path_helper::{
|
||||
use sd_core_file_path_helper::{
|
||||
ensure_file_path_exists, ensure_sub_path_is_directory, ensure_sub_path_is_in_location,
|
||||
file_path_for_media_processor, IsolatedFilePathData,
|
||||
IsolatedFilePathData,
|
||||
};
|
||||
use sd_core_prisma_helpers::file_path_for_media_processor;
|
||||
|
||||
use sd_file_ext::extensions::Extension;
|
||||
use sd_prisma::prisma::{location, PrismaClient};
|
||||
use sd_utils::db::maybe_missing;
|
||||
|
||||
|
|
|
@ -15,12 +15,6 @@ pub mod validation;
|
|||
// Some Objects are purely virtual, unless they have one or more associated Paths, which refer to a file found in a Location
|
||||
// Objects are what can be added to Spaces
|
||||
|
||||
// Object selectables!
|
||||
object::select!(object_for_file_identifier {
|
||||
pub_id
|
||||
file_paths: select { pub_id cas_id extension is_dir materialized_path name }
|
||||
});
|
||||
|
||||
// The response to provide the Explorer when looking at Objects
|
||||
#[derive(Debug, Serialize, Deserialize, Type)]
|
||||
pub struct ObjectsForExplorer {
|
||||
|
|
|
@ -1,11 +1,9 @@
|
|||
use crate::{
|
||||
library::Library,
|
||||
object::{cas::generate_cas_id, object_for_file_identifier},
|
||||
old_job::JobError,
|
||||
};
|
||||
use crate::{library::Library, object::cas::generate_cas_id, old_job::JobError};
|
||||
|
||||
use sd_core_file_path_helper::{FilePathError, IsolatedFilePathData};
|
||||
use sd_core_prisma_helpers::{file_path_for_file_identifier, object_for_file_identifier};
|
||||
|
||||
use sd_file_ext::{extensions::Extension, kind::ObjectKind};
|
||||
use sd_file_path_helper::{file_path_for_file_identifier, FilePathError, IsolatedFilePathData};
|
||||
use sd_prisma::{
|
||||
prisma::{file_path, location, object, PrismaClient},
|
||||
prisma_sync,
|
||||
|
|
|
@ -6,10 +6,12 @@ use crate::{
|
|||
},
|
||||
};
|
||||
|
||||
use sd_file_path_helper::{
|
||||
use sd_core_file_path_helper::{
|
||||
ensure_file_path_exists, ensure_sub_path_is_directory, ensure_sub_path_is_in_location,
|
||||
file_path_for_file_identifier, IsolatedFilePathData,
|
||||
IsolatedFilePathData,
|
||||
};
|
||||
use sd_core_prisma_helpers::file_path_for_file_identifier;
|
||||
|
||||
use sd_prisma::prisma::{file_path, location, PrismaClient, SortOrder};
|
||||
use sd_utils::db::maybe_missing;
|
||||
|
||||
|
|
|
@ -1,9 +1,11 @@
|
|||
use crate::{invalidate_query, library::Library, old_job::JobError};
|
||||
|
||||
use sd_file_path_helper::{
|
||||
use sd_core_file_path_helper::{
|
||||
ensure_file_path_exists, ensure_sub_path_is_directory, ensure_sub_path_is_in_location,
|
||||
file_path_for_file_identifier, IsolatedFilePathData,
|
||||
IsolatedFilePathData,
|
||||
};
|
||||
use sd_core_prisma_helpers::file_path_for_file_identifier;
|
||||
|
||||
use sd_prisma::prisma::{file_path, location, PrismaClient, SortOrder};
|
||||
use sd_utils::db::maybe_missing;
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
use sd_file_path_helper::FilePathError;
|
||||
use sd_core_file_path_helper::FilePathError;
|
||||
use sd_utils::error::FileIOError;
|
||||
|
||||
use std::path::Path;
|
||||
|
|
|
@ -5,10 +5,12 @@ use crate::{
|
|||
},
|
||||
};
|
||||
|
||||
use sd_file_path_helper::{
|
||||
use sd_core_file_path_helper::{
|
||||
ensure_file_path_exists, ensure_sub_path_is_directory, ensure_sub_path_is_in_location,
|
||||
file_path_for_object_validator, IsolatedFilePathData,
|
||||
IsolatedFilePathData,
|
||||
};
|
||||
use sd_core_prisma_helpers::file_path_for_object_validator;
|
||||
|
||||
use sd_prisma::{
|
||||
prisma::{file_path, location},
|
||||
prisma_sync,
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
use crate::library::Library;
|
||||
|
||||
use sd_core_prisma_helpers::job_without_data;
|
||||
|
||||
use sd_prisma::prisma::job;
|
||||
use sd_utils::db::{maybe_missing, MissingFieldError};
|
||||
|
||||
|
@ -24,22 +26,6 @@ pub enum JobReportUpdate {
|
|||
Phase(String),
|
||||
}
|
||||
|
||||
job::select!(job_without_data {
|
||||
id
|
||||
name
|
||||
action
|
||||
status
|
||||
parent_id
|
||||
errors_text
|
||||
metadata
|
||||
date_created
|
||||
date_started
|
||||
date_completed
|
||||
task_count
|
||||
completed_task_count
|
||||
date_estimated_completion
|
||||
});
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Type, Clone)]
|
||||
pub struct JobReport {
|
||||
pub id: Uuid,
|
||||
|
|
|
@ -10,11 +10,15 @@ repository = { workspace = true }
|
|||
edition = { workspace = true }
|
||||
|
||||
[dependencies]
|
||||
sd-prisma = { path = "../prisma" }
|
||||
# Inner Core Sub-crates
|
||||
sd-core-file-path-helper = { path = "../../core/crates/file-path-helper" }
|
||||
sd-core-prisma-helpers = { path = "../../core/crates/prisma-helpers" }
|
||||
sd-core-sync = { path = "../../core/crates/sync" }
|
||||
|
||||
# Spacedrive Sub-crates
|
||||
sd-prisma = { path = "../prisma" }
|
||||
sd-sync = { path = "../sync" }
|
||||
sd-utils = { path = "../utils" }
|
||||
sd-file-path-helper = { path = "../file-path-helper" }
|
||||
|
||||
async-channel = { workspace = true }
|
||||
chrono = { workspace = true, features = ["serde"] }
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
use sd_file_path_helper::file_path_for_media_processor;
|
||||
use sd_core_prisma_helpers::file_path_for_media_processor;
|
||||
|
||||
use sd_prisma::prisma::{location, PrismaClient};
|
||||
use sd_utils::error::FileIOError;
|
||||
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
#![allow(non_camel_case_types)]
|
||||
use sd_core_file_path_helper::IsolatedFilePathData;
|
||||
use sd_core_prisma_helpers::file_path_for_media_processor;
|
||||
|
||||
use sd_file_path_helper::{file_path_for_media_processor, IsolatedFilePathData};
|
||||
use sd_prisma::{
|
||||
prisma::{file_path, label, label_on_object, object, PrismaClient},
|
||||
prisma_sync,
|
||||
|
|
|
@ -6,14 +6,18 @@ edition.workspace = true
|
|||
repository.workspace = true
|
||||
|
||||
[dependencies]
|
||||
# Spacedrive Sub-crates
|
||||
sd-p2p = { path = "../p2p" }
|
||||
reqwest = "0.11.22"
|
||||
serde.workspace = true
|
||||
serde_json.workspace = true
|
||||
thiserror = "1.0.50"
|
||||
uuid.workspace = true
|
||||
|
||||
base64 = { workspace = true }
|
||||
rmpv = { workspace = true }
|
||||
rspc = { workspace = true }
|
||||
specta.workspace = true
|
||||
base64.workspace = true
|
||||
rmpv.workspace = true
|
||||
tracing.workspace = true
|
||||
|
||||
serde = { workspace = true }
|
||||
serde_json = { workspace = true }
|
||||
specta = { workspace = true }
|
||||
thiserror = { workspace = true }
|
||||
tracing = { workspace = true }
|
||||
uuid = { workspace = true }
|
||||
|
||||
reqwest = "0.11.22"
|
||||
|
|
|
@ -7,9 +7,11 @@ edition.workspace = true
|
|||
repository.workspace = true
|
||||
|
||||
[dependencies]
|
||||
# Spacedrive Sub-crates
|
||||
sd-p2p = { path = "../p2p" }
|
||||
sd-p2p-proto = { path = "../p2p-proto" }
|
||||
thiserror.workspace = true
|
||||
tokio.workspace = true
|
||||
tracing.workspace = true
|
||||
uuid.workspace = true
|
||||
|
||||
thiserror = { workspace = true }
|
||||
tokio = { workspace = true }
|
||||
tracing = { workspace = true }
|
||||
uuid = { workspace = true }
|
||||
|
|
|
@ -7,7 +7,8 @@ edition.workspace = true
|
|||
repository.workspace = true
|
||||
|
||||
[dependencies]
|
||||
ed25519-dalek = "2.1.1"
|
||||
thiserror.workspace = true
|
||||
thiserror = { workspace = true }
|
||||
tokio = { workspace = true, features = ["io-util"] }
|
||||
uuid.workspace = true
|
||||
uuid = { workspace = true }
|
||||
|
||||
ed25519-dalek = "2.1.1"
|
||||
|
|
|
@ -7,5 +7,7 @@ edition.workspace = true
|
|||
repository.workspace = true
|
||||
|
||||
[dependencies]
|
||||
# Spacedrive Sub-crates
|
||||
sd-p2p = { path = "../p2p" }
|
||||
|
||||
tokio = { workspace = true, features = ["io-util"] }
|
||||
|
|
|
@ -6,6 +6,7 @@ repository = { workspace = true }
|
|||
edition = { workspace = true }
|
||||
|
||||
[dependencies]
|
||||
# Spacedrive Sub-crates
|
||||
sd-sync-generator = { path = "../sync-generator" }
|
||||
|
||||
prisma-client-rust-cli = { workspace = true }
|
||||
|
|
|
@ -4,6 +4,7 @@ version = "0.1.0"
|
|||
edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
# Spacedrive Sub-crates
|
||||
sd-cache = { path = "../cache" }
|
||||
sd-sync = { path = "../sync" }
|
||||
|
||||
|
|
|
@ -8,14 +8,17 @@ repository = { workspace = true }
|
|||
edition = { workspace = true }
|
||||
|
||||
[dependencies]
|
||||
serde_json = "1.0.85"
|
||||
serde = { version = "1.0.145", features = ["derive"] }
|
||||
# Spacedrive Sub-crates
|
||||
sd-sync = { path = ".." }
|
||||
|
||||
axum = { workspace = true }
|
||||
rspc = { workspace = true, features = ["axum"] }
|
||||
tokio = { workspace = true, features = ["full"] }
|
||||
prisma-client-rust = { workspace = true }
|
||||
serde_json = { workspace = true }
|
||||
serde = { workspace = true, features = ["derive"] }
|
||||
uuid = { workspace = true, features = ["v4"] }
|
||||
|
||||
dotenv = "0.15.0"
|
||||
tower-http = { version = "0.3.4", features = ["cors"] }
|
||||
sd-sync = { path = ".." }
|
||||
uuid = { workspace = true, features = ["v4"] }
|
||||
http = "0.2.8"
|
||||
|
|
|
@ -34,9 +34,11 @@ pin-project = "1.1.4"
|
|||
tokio = { workspace = true, features = ["macros", "test-util", "fs"] }
|
||||
tempfile = { workspace = true }
|
||||
rand = "0.8.5"
|
||||
tracing-test = { version = "^0.2.4", features = ["no-env-filter"] }
|
||||
tracing-test = { workspace.dev-dependencies = true, features = [
|
||||
"no-env-filter",
|
||||
] }
|
||||
thiserror = { workspace = true }
|
||||
lending-stream = "1.0.0"
|
||||
lending-stream = { workspace = true }
|
||||
serde = { workspace = true, features = ["derive"] }
|
||||
rmp-serde = { workspace = true }
|
||||
uuid = { workspace = true, features = ["serde"] }
|
||||
|
|
|
@ -94,8 +94,11 @@ mod task;
|
|||
mod worker;
|
||||
|
||||
pub use error::{RunError, SystemError as TaskSystemError};
|
||||
pub use system::{Dispatcher as TaskDispatcher, System as TaskSystem};
|
||||
pub use task::{
|
||||
AnyTaskOutput, ExecStatus, Interrupter, InterrupterFuture, InterruptionKind, IntoAnyTaskOutput,
|
||||
IntoTask, Task, TaskHandle, TaskId, TaskOutput, TaskStatus,
|
||||
pub use system::{
|
||||
BaseDispatcher as BaseTaskDispatcher, Dispatcher as TaskDispatcher, System as TaskSystem,
|
||||
};
|
||||
pub use task::{
|
||||
AnyTaskOutput, CancelTaskOnDrop, ExecStatus, Interrupter, InterrupterFuture, InterruptionKind,
|
||||
IntoAnyTaskOutput, IntoTask, SerializableTask, Task, TaskHandle, TaskId, TaskOutput,
|
||||
TaskRemoteController, TaskStatus,
|
||||
};
|
||||
|
|
|
@ -23,7 +23,7 @@ pub enum SystemMessage {
|
|||
CancelNotRunningTask {
|
||||
task_id: TaskId,
|
||||
worker_id: WorkerId,
|
||||
ack: oneshot::Sender<Result<(), SystemError>>,
|
||||
ack: oneshot::Sender<()>,
|
||||
},
|
||||
ForceAbortion {
|
||||
task_id: TaskId,
|
||||
|
@ -51,7 +51,7 @@ pub enum WorkerMessage<E: RunError> {
|
|||
},
|
||||
CancelNotRunningTask {
|
||||
task_id: TaskId,
|
||||
ack: oneshot::Sender<Result<(), SystemError>>,
|
||||
ack: oneshot::Sender<()>,
|
||||
},
|
||||
ForceAbortion {
|
||||
task_id: TaskId,
|
||||
|
|
|
@ -1,6 +1,8 @@
|
|||
use std::{
|
||||
cell::RefCell,
|
||||
collections::HashSet,
|
||||
fmt,
|
||||
future::Future,
|
||||
num::NonZeroUsize,
|
||||
pin::pin,
|
||||
sync::{
|
||||
|
@ -30,7 +32,7 @@ use super::{
|
|||
pub struct System<E: RunError> {
|
||||
workers: Arc<Vec<Worker<E>>>,
|
||||
msgs_tx: chan::Sender<SystemMessage>,
|
||||
dispatcher: Dispatcher<E>,
|
||||
dispatcher: BaseDispatcher<E>,
|
||||
handle: RefCell<Option<JoinHandle<()>>>,
|
||||
}
|
||||
|
||||
|
@ -94,7 +96,7 @@ impl<E: RunError> System<E> {
|
|||
Self {
|
||||
workers: Arc::clone(&workers),
|
||||
msgs_tx,
|
||||
dispatcher: Dispatcher {
|
||||
dispatcher: BaseDispatcher {
|
||||
workers,
|
||||
idle_workers,
|
||||
last_worker_id: Arc::new(AtomicWorkerId::new(0)),
|
||||
|
@ -115,12 +117,18 @@ impl<E: RunError> System<E> {
|
|||
}
|
||||
|
||||
/// Dispatches many tasks to the system, the tasks will be assigned to workers and executed as soon as possible.
|
||||
pub async fn dispatch_many(&self, into_tasks: Vec<impl IntoTask<E>>) -> Vec<TaskHandle<E>> {
|
||||
pub async fn dispatch_many<I: IntoIterator<Item = impl IntoTask<E>> + Send>(
|
||||
&self,
|
||||
into_tasks: I,
|
||||
) -> Vec<TaskHandle<E>>
|
||||
where
|
||||
<I as IntoIterator>::IntoIter: Send,
|
||||
{
|
||||
self.dispatcher.dispatch_many(into_tasks).await
|
||||
}
|
||||
|
||||
/// Returns a dispatcher that can be used to remotely dispatch tasks to the system.
|
||||
pub fn get_dispatcher(&self) -> Dispatcher<E> {
|
||||
pub fn get_dispatcher(&self) -> BaseDispatcher<E> {
|
||||
self.dispatcher.clone()
|
||||
}
|
||||
|
||||
|
@ -314,11 +322,7 @@ impl SystemComm {
|
|||
.expect("System channel closed trying receive pause not running task response")
|
||||
}
|
||||
|
||||
pub async fn cancel_not_running_task(
|
||||
&self,
|
||||
task_id: TaskId,
|
||||
worker_id: WorkerId,
|
||||
) -> Result<(), SystemError> {
|
||||
pub async fn cancel_not_running_task(&self, task_id: TaskId, worker_id: WorkerId) {
|
||||
let (tx, rx) = oneshot::channel();
|
||||
|
||||
self.0
|
||||
|
@ -331,7 +335,7 @@ impl SystemComm {
|
|||
.expect("System channel closed trying to cancel a not running task");
|
||||
|
||||
rx.await
|
||||
.expect("System channel closed trying receive cancel a not running task response")
|
||||
.expect("System channel closed trying receive cancel a not running task response");
|
||||
}
|
||||
|
||||
pub async fn request_help(&self, worker_id: WorkerId, task_count: usize) {
|
||||
|
@ -390,13 +394,45 @@ impl SystemComm {
|
|||
/// It can be used to dispatch tasks to the system from other threads or tasks.
|
||||
/// It uses [`Arc`] internally so it can be cheaply cloned and put inside tasks so tasks can dispatch other tasks.
|
||||
#[derive(Debug)]
|
||||
pub struct Dispatcher<E: RunError> {
|
||||
pub struct BaseDispatcher<E: RunError> {
|
||||
workers: Arc<Vec<Worker<E>>>,
|
||||
idle_workers: Arc<Vec<AtomicBool>>,
|
||||
last_worker_id: Arc<AtomicWorkerId>,
|
||||
}
|
||||
|
||||
impl<E: RunError> Clone for Dispatcher<E> {
|
||||
pub trait Dispatcher<E: RunError>: fmt::Debug + Clone + Send + Sync + 'static {
|
||||
/// Dispatches a task to the system, the task will be assigned to a worker and executed as soon as possible.
|
||||
fn dispatch(&self, into_task: impl IntoTask<E>) -> impl Future<Output = TaskHandle<E>> + Send {
|
||||
self.dispatch_boxed(into_task.into_task())
|
||||
}
|
||||
|
||||
/// Dispatches an already boxed task to the system, the task will be assigned to a worker and executed as
|
||||
/// soon as possible.
|
||||
fn dispatch_boxed(
|
||||
&self,
|
||||
boxed_task: Box<dyn Task<E>>,
|
||||
) -> impl Future<Output = TaskHandle<E>> + Send;
|
||||
|
||||
/// Dispatches many tasks to the system, the tasks will be assigned to workers and executed as soon as possible.
|
||||
fn dispatch_many<I: IntoIterator<Item = impl IntoTask<E>> + Send>(
|
||||
&self,
|
||||
into_tasks: I,
|
||||
) -> impl Future<Output = Vec<TaskHandle<E>>> + Send
|
||||
where
|
||||
<I as IntoIterator>::IntoIter: Send,
|
||||
{
|
||||
self.dispatch_many_boxed(into_tasks.into_iter().map(IntoTask::into_task))
|
||||
}
|
||||
|
||||
/// Dispatches many already boxed tasks to the system, the tasks will be assigned to workers and executed as
|
||||
/// soon as possible.
|
||||
fn dispatch_many_boxed(
|
||||
&self,
|
||||
boxed_tasks: impl IntoIterator<Item = Box<dyn Task<E>>> + Send,
|
||||
) -> impl Future<Output = Vec<TaskHandle<E>>> + Send;
|
||||
}
|
||||
|
||||
impl<E: RunError> Clone for BaseDispatcher<E> {
|
||||
fn clone(&self) -> Self {
|
||||
Self {
|
||||
workers: Arc::clone(&self.workers),
|
||||
|
@ -406,14 +442,17 @@ impl<E: RunError> Clone for Dispatcher<E> {
|
|||
}
|
||||
}
|
||||
|
||||
impl<E: RunError> Dispatcher<E> {
|
||||
/// Dispatches a task to the system, the task will be assigned to a worker and executed as soon as possible.
|
||||
pub async fn dispatch(&self, into_task: impl IntoTask<E>) -> TaskHandle<E> {
|
||||
async fn inner<E: RunError>(this: &Dispatcher<E>, task: Box<dyn Task<E>>) -> TaskHandle<E> {
|
||||
let worker_id = this
|
||||
impl<E: RunError> Dispatcher<E> for BaseDispatcher<E> {
|
||||
async fn dispatch(&self, into_task: impl IntoTask<E>) -> TaskHandle<E> {
|
||||
self.dispatch_boxed(into_task.into_task()).await
|
||||
}
|
||||
|
||||
#[allow(clippy::missing_panics_doc)]
|
||||
async fn dispatch_boxed(&self, task: Box<dyn Task<E>>) -> TaskHandle<E> {
|
||||
let worker_id = self
|
||||
.last_worker_id
|
||||
.fetch_update(Ordering::Release, Ordering::Acquire, |last_worker_id| {
|
||||
Some((last_worker_id + 1) % this.workers.len())
|
||||
Some((last_worker_id + 1) % self.workers.len())
|
||||
})
|
||||
.expect("we hardcoded the update function to always return Some(next_worker_id) through dispatcher");
|
||||
|
||||
|
@ -421,18 +460,17 @@ impl<E: RunError> Dispatcher<E> {
|
|||
"Dispatching task to worker: <worker_id='{worker_id}', task_id='{}'>",
|
||||
task.id()
|
||||
);
|
||||
let handle = this.workers[worker_id].add_task(task).await;
|
||||
let handle = self.workers[worker_id].add_task(task).await;
|
||||
|
||||
this.idle_workers[worker_id].store(false, Ordering::Relaxed);
|
||||
self.idle_workers[worker_id].store(false, Ordering::Relaxed);
|
||||
|
||||
handle
|
||||
}
|
||||
|
||||
inner(self, into_task.into_task()).await
|
||||
}
|
||||
|
||||
/// Dispatches many tasks to the system, the tasks will be assigned to workers and executed as soon as possible.
|
||||
pub async fn dispatch_many(&self, into_tasks: Vec<impl IntoTask<E>>) -> Vec<TaskHandle<E>> {
|
||||
async fn dispatch_many_boxed(
|
||||
&self,
|
||||
into_tasks: impl IntoIterator<Item = Box<dyn Task<E>>> + Send,
|
||||
) -> Vec<TaskHandle<E>> {
|
||||
let mut workers_task_count = self
|
||||
.workers
|
||||
.iter()
|
||||
|
@ -445,7 +483,6 @@ impl<E: RunError> Dispatcher<E> {
|
|||
|
||||
let (handles, workers_ids_set) = into_tasks
|
||||
.into_iter()
|
||||
.map(IntoTask::into_task)
|
||||
.zip(workers_task_count.into_iter().cycle())
|
||||
.map(|(task, (worker_id, _))| async move {
|
||||
(self.workers[worker_id].add_task(task).await, worker_id)
|
||||
|
@ -462,7 +499,9 @@ impl<E: RunError> Dispatcher<E> {
|
|||
|
||||
handles
|
||||
}
|
||||
}
|
||||
|
||||
impl<E: RunError> BaseDispatcher<E> {
|
||||
/// Returns the number of workers in the system.
|
||||
#[must_use]
|
||||
pub fn workers_count(&self) -> usize {
|
||||
|
|
|
@ -13,6 +13,7 @@ use async_channel as chan;
|
|||
use async_trait::async_trait;
|
||||
use chan::{Recv, RecvError};
|
||||
use downcast_rs::{impl_downcast, Downcast};
|
||||
use futures::executor::block_on;
|
||||
use tokio::sync::oneshot;
|
||||
use tracing::{trace, warn};
|
||||
use uuid::Uuid;
|
||||
|
@ -61,7 +62,7 @@ pub enum TaskOutput {
|
|||
#[derive(Debug)]
|
||||
pub enum TaskStatus<E: RunError> {
|
||||
/// The task has finished successfully and maybe has some output for the user.
|
||||
Done(TaskOutput),
|
||||
Done((TaskId, TaskOutput)),
|
||||
/// Task was gracefully cancelled by the user.
|
||||
Canceled,
|
||||
/// Task was forcefully aborted by the user.
|
||||
|
@ -123,7 +124,7 @@ impl<T: Task<E> + 'static, E: RunError> IntoTask<E> for T {
|
|||
/// We're currently using the [`async_trait`](https://docs.rs/async-trait) crate to allow dyn async traits,
|
||||
/// due to a limitation in the Rust language.
|
||||
#[async_trait]
|
||||
pub trait Task<E: RunError>: fmt::Debug + Downcast + Send + 'static {
|
||||
pub trait Task<E: RunError>: fmt::Debug + Downcast + Send + Sync + 'static {
|
||||
/// This method represent the work that should be done by the worker, it will be called by the
|
||||
/// worker when there is a slot available in its internal queue.
|
||||
/// We receive a `&mut self` so any internal data can be mutated on each `run` invocation.
|
||||
|
@ -147,6 +148,21 @@ pub trait Task<E: RunError>: fmt::Debug + Downcast + Send + 'static {
|
|||
|
||||
impl_downcast!(Task<E> where E: RunError);
|
||||
|
||||
pub trait SerializableTask<E: RunError>: Task<E>
|
||||
where
|
||||
Self: Sized,
|
||||
{
|
||||
type SerializeError: std::error::Error + 'static;
|
||||
type DeserializeError: std::error::Error + 'static;
|
||||
type DeserializeCtx: 'static;
|
||||
|
||||
fn serialize(self) -> impl Future<Output = Result<Vec<u8>, Self::SerializeError>> + Send;
|
||||
fn deserialize(
|
||||
data: &[u8],
|
||||
ctx: Self::DeserializeCtx,
|
||||
) -> impl Future<Output = Result<Self, Self::DeserializeError>> + Send;
|
||||
}
|
||||
|
||||
/// Intermediate struct to wait until a pause or a cancel commands are sent by the user.
|
||||
#[must_use = "`InterrupterFuture` does nothing unless polled"]
|
||||
#[pin_project::pin_project]
|
||||
|
@ -164,7 +180,7 @@ impl Future for InterrupterFuture<'_> {
|
|||
|
||||
match this.fut.poll(cx) {
|
||||
Poll::Ready(Ok(InterruptionRequest { kind, ack })) => {
|
||||
if ack.send(Ok(())).is_err() {
|
||||
if ack.send(()).is_err() {
|
||||
warn!("TaskInterrupter ack channel closed");
|
||||
}
|
||||
this.has_interrupted.store(kind as u8, Ordering::Relaxed);
|
||||
|
@ -218,7 +234,7 @@ impl Interrupter {
|
|||
InterruptionKind::load(&self.has_interrupted).map_or_else(
|
||||
|| {
|
||||
if let Ok(InterruptionRequest { kind, ack }) = self.interrupt_rx.try_recv() {
|
||||
if ack.send(Ok(())).is_err() {
|
||||
if ack.send(()).is_err() {
|
||||
warn!("TaskInterrupter ack channel closed");
|
||||
}
|
||||
|
||||
|
@ -245,6 +261,39 @@ impl Interrupter {
|
|||
}
|
||||
}
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! check_interruption {
|
||||
($interrupter:ident) => {
|
||||
let interrupter: &Interrupter = $interrupter;
|
||||
|
||||
match interrupter.try_check_interrupt() {
|
||||
Some($crate::InterruptionKind::Cancel) => return Ok($crate::ExecStatus::Canceled),
|
||||
Some($crate::InterruptionKind::Pause) => return Ok($crate::ExecStatus::Paused),
|
||||
None => { /* Everything is Awesome! */ }
|
||||
}
|
||||
};
|
||||
|
||||
($interrupter:ident, $instant:ident, $duration_accumulator:ident) => {
|
||||
let interrupter: &Interrupter = $interrupter;
|
||||
let instant: Instant = $instant;
|
||||
let duration_accumulator: &mut Duration = $duration_accumulator;
|
||||
|
||||
match interrupter.try_check_interrupt() {
|
||||
Some($crate::InterruptionKind::Cancel) => {
|
||||
*duration_accumulator += instant.elapsed();
|
||||
|
||||
return Ok($crate::ExecStatus::Canceled);
|
||||
}
|
||||
Some($crate::InterruptionKind::Pause) => {
|
||||
*duration_accumulator += instant.elapsed();
|
||||
|
||||
return Ok($crate::ExecStatus::Paused);
|
||||
}
|
||||
None => { /* Everything is Awesome! */ }
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/// The kind of interruption that can be requested by the user, a pause or a cancel
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
#[repr(u8)]
|
||||
|
@ -266,30 +315,18 @@ impl InterruptionKind {
|
|||
#[derive(Debug)]
|
||||
pub struct InterruptionRequest {
|
||||
kind: InterruptionKind,
|
||||
ack: oneshot::Sender<Result<(), SystemError>>,
|
||||
ack: oneshot::Sender<()>,
|
||||
}
|
||||
|
||||
/// A handle returned when a task is dispatched to the task system, it can be used to pause, cancel, resume, or wait
|
||||
/// until the task gets completed.
|
||||
#[derive(Debug)]
|
||||
pub struct TaskHandle<E: RunError> {
|
||||
/// A remote controller of a task that can be used to pause, cancel, resume, or force abortion.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct TaskRemoteController {
|
||||
pub(crate) worktable: Arc<TaskWorktable>,
|
||||
pub(crate) done_rx: oneshot::Receiver<Result<TaskStatus<E>, SystemError>>,
|
||||
pub(crate) system_comm: SystemComm,
|
||||
pub(crate) task_id: TaskId,
|
||||
}
|
||||
|
||||
impl<E: RunError> Future for TaskHandle<E> {
|
||||
type Output = Result<TaskStatus<E>, SystemError>;
|
||||
|
||||
fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Self::Output> {
|
||||
Pin::new(&mut self.done_rx)
|
||||
.poll(cx)
|
||||
.map(|res| res.expect("TaskHandle done channel unexpectedly closed"))
|
||||
}
|
||||
}
|
||||
|
||||
impl<E: RunError> TaskHandle<E> {
|
||||
impl TaskRemoteController {
|
||||
/// Get the unique identifier of the task
|
||||
#[must_use]
|
||||
pub const fn task_id(&self) -> TaskId {
|
||||
|
@ -316,7 +353,7 @@ impl<E: RunError> TaskHandle<E> {
|
|||
|
||||
self.worktable.pause(tx).await;
|
||||
|
||||
rx.await.expect("Worker failed to ack pause request")?;
|
||||
rx.await.expect("Worker failed to ack pause request");
|
||||
} else {
|
||||
trace!("Task is not running, setting is_paused flag");
|
||||
self.worktable.is_paused.store(true, Ordering::Relaxed);
|
||||
|
@ -338,7 +375,7 @@ impl<E: RunError> TaskHandle<E> {
|
|||
/// # Panics
|
||||
///
|
||||
/// Will panic if the worker failed to ack the cancel request
|
||||
pub async fn cancel(&self) -> Result<(), SystemError> {
|
||||
pub async fn cancel(&self) {
|
||||
let is_canceled = self.worktable.is_canceled.load(Ordering::Relaxed);
|
||||
let is_done = self.worktable.is_done.load(Ordering::Relaxed);
|
||||
|
||||
|
@ -352,12 +389,11 @@ impl<E: RunError> TaskHandle<E> {
|
|||
|
||||
self.worktable.cancel(tx).await;
|
||||
|
||||
rx.await.expect("Worker failed to ack cancel request")?;
|
||||
rx.await.expect("Worker failed to ack cancel request");
|
||||
} else {
|
||||
trace!("Task is not running, setting is_canceled flag");
|
||||
self.worktable.is_canceled.store(true, Ordering::Relaxed);
|
||||
return self
|
||||
.system_comm
|
||||
self.system_comm
|
||||
.cancel_not_running_task(
|
||||
self.task_id,
|
||||
self.worktable.current_worker_id.load(Ordering::Relaxed),
|
||||
|
@ -365,8 +401,6 @@ impl<E: RunError> TaskHandle<E> {
|
|||
.await;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Forcefully abort the task, this can lead to corrupted data or inconsistent states, so use it with caution.
|
||||
|
@ -390,6 +424,92 @@ impl<E: RunError> TaskHandle<E> {
|
|||
)
|
||||
.await
|
||||
}
|
||||
|
||||
/// Verify if the task was already completed
|
||||
#[must_use]
|
||||
pub fn is_done(&self) -> bool {
|
||||
self.worktable.is_done.load(Ordering::Relaxed)
|
||||
}
|
||||
}
|
||||
|
||||
/// A handle returned when a task is dispatched to the task system, it can be used to pause, cancel, resume, or wait
|
||||
/// until the task gets completed.
|
||||
#[derive(Debug)]
|
||||
pub struct TaskHandle<E: RunError> {
|
||||
pub(crate) done_rx: oneshot::Receiver<Result<TaskStatus<E>, SystemError>>,
|
||||
pub(crate) controller: TaskRemoteController,
|
||||
}
|
||||
|
||||
impl<E: RunError> Future for TaskHandle<E> {
|
||||
type Output = Result<TaskStatus<E>, SystemError>;
|
||||
|
||||
fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Self::Output> {
|
||||
Pin::new(&mut self.done_rx)
|
||||
.poll(cx)
|
||||
.map(|res| res.expect("TaskHandle done channel unexpectedly closed"))
|
||||
}
|
||||
}
|
||||
|
||||
impl<E: RunError> TaskHandle<E> {
|
||||
/// Get the unique identifier of the task
|
||||
#[must_use]
|
||||
pub const fn task_id(&self) -> TaskId {
|
||||
self.controller.task_id
|
||||
}
|
||||
|
||||
/// Gracefully pause the task at a safe point defined by the user using the [`Interrupter`]
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// Will panic if the worker failed to ack the pause request
|
||||
pub async fn pause(&self) -> Result<(), SystemError> {
|
||||
self.controller.pause().await
|
||||
}
|
||||
|
||||
/// Gracefully cancel the task at a safe point defined by the user using the [`Interrupter`]
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// Will panic if the worker failed to ack the cancel request
|
||||
pub async fn cancel(&self) {
|
||||
self.controller.cancel().await;
|
||||
}
|
||||
|
||||
/// Forcefully abort the task, this can lead to corrupted data or inconsistent states, so use it with caution.
|
||||
pub async fn force_abortion(&self) -> Result<(), SystemError> {
|
||||
self.controller.force_abortion().await
|
||||
}
|
||||
|
||||
/// Marks the task to be resumed by the task system, the worker will start processing it if there is a slot
|
||||
/// available or will be enqueued otherwise.
|
||||
pub async fn resume(&self) -> Result<(), SystemError> {
|
||||
self.controller.resume().await
|
||||
}
|
||||
|
||||
/// Gets the [`TaskRemoteController`] object that can be used to control the task remotely, to
|
||||
/// pause, cancel, resume, or force abortion.
|
||||
#[must_use]
|
||||
pub fn remote_controller(&self) -> TaskRemoteController {
|
||||
self.controller.clone()
|
||||
}
|
||||
}
|
||||
|
||||
/// A helper struct when you just want to cancel a task if its `TaskHandle` gets dropped.
|
||||
pub struct CancelTaskOnDrop<E: RunError>(pub TaskHandle<E>);
|
||||
|
||||
impl<E: RunError> Future for CancelTaskOnDrop<E> {
|
||||
type Output = Result<TaskStatus<E>, SystemError>;
|
||||
|
||||
fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Self::Output> {
|
||||
Pin::new(&mut self.0).poll(cx)
|
||||
}
|
||||
}
|
||||
|
||||
impl<E: RunError> Drop for CancelTaskOnDrop<E> {
|
||||
fn drop(&mut self) {
|
||||
// FIXME: We should use async drop when it becomes stable
|
||||
block_on(self.0.cancel());
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
|
@ -436,7 +556,7 @@ impl TaskWorktable {
|
|||
self.is_aborted.store(true, Ordering::Relaxed);
|
||||
}
|
||||
|
||||
pub async fn pause(&self, tx: oneshot::Sender<Result<(), SystemError>>) {
|
||||
pub async fn pause(&self, tx: oneshot::Sender<()>) {
|
||||
self.is_paused.store(true, Ordering::Relaxed);
|
||||
self.is_running.store(false, Ordering::Relaxed);
|
||||
|
||||
|
@ -451,7 +571,7 @@ impl TaskWorktable {
|
|||
.expect("Worker channel closed trying to pause task");
|
||||
}
|
||||
|
||||
pub async fn cancel(&self, tx: oneshot::Sender<Result<(), SystemError>>) {
|
||||
pub async fn cancel(&self, tx: oneshot::Sender<()>) {
|
||||
self.is_canceled.store(true, Ordering::Relaxed);
|
||||
self.is_running.store(false, Ordering::Relaxed);
|
||||
|
||||
|
|
|
@ -8,6 +8,8 @@ use async_channel as chan;
|
|||
use tokio::{spawn, sync::oneshot, task::JoinHandle};
|
||||
use tracing::{error, info, trace, warn};
|
||||
|
||||
use crate::task::TaskRemoteController;
|
||||
|
||||
use super::{
|
||||
error::{RunError, SystemError},
|
||||
message::WorkerMessage,
|
||||
|
@ -127,10 +129,12 @@ impl<E: RunError> Worker<E> {
|
|||
.expect("Worker channel closed trying to add task");
|
||||
|
||||
TaskHandle {
|
||||
worktable,
|
||||
done_rx,
|
||||
controller: TaskRemoteController {
|
||||
worktable,
|
||||
system_comm: self.system_comm.clone(),
|
||||
task_id,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -168,11 +172,7 @@ impl<E: RunError> Worker<E> {
|
|||
.expect("Worker channel closed trying to pause a not running task");
|
||||
}
|
||||
|
||||
pub async fn cancel_not_running_task(
|
||||
&self,
|
||||
task_id: TaskId,
|
||||
ack: oneshot::Sender<Result<(), SystemError>>,
|
||||
) {
|
||||
pub async fn cancel_not_running_task(&self, task_id: TaskId, ack: oneshot::Sender<()>) {
|
||||
self.msgs_tx
|
||||
.send(WorkerMessage::CancelNotRunningTask { task_id, ack })
|
||||
.await
|
||||
|
|
|
@ -65,7 +65,7 @@ pub(super) async fn run<E: RunError>(
|
|||
|
||||
StreamMessage::Commands(WorkerMessage::CancelNotRunningTask { task_id, ack }) => {
|
||||
runner.cancel_not_running_task(task_id);
|
||||
if ack.send(Ok(())).is_err() {
|
||||
if ack.send(()).is_err() {
|
||||
warn!("Resume task channel closed before sending ack");
|
||||
}
|
||||
}
|
||||
|
|
|
@ -965,6 +965,7 @@ impl<E: RunError> Runner<E> {
|
|||
) {
|
||||
match status {
|
||||
InternalTaskExecStatus::Done(out) => {
|
||||
self.task_kinds.remove(&task_id);
|
||||
send_complete_task_response(self.worker_id, task_id, task_work_state, out);
|
||||
}
|
||||
|
||||
|
@ -977,10 +978,12 @@ impl<E: RunError> Runner<E> {
|
|||
}
|
||||
|
||||
InternalTaskExecStatus::Canceled => {
|
||||
self.task_kinds.remove(&task_id);
|
||||
send_cancel_task_response(self.worker_id, task_id, task_work_state);
|
||||
}
|
||||
|
||||
InternalTaskExecStatus::Error(e) => {
|
||||
self.task_kinds.remove(&task_id);
|
||||
send_error_task_response(self.worker_id, task_id, task_work_state, e);
|
||||
}
|
||||
|
||||
|
@ -1057,7 +1060,7 @@ impl<E: RunError> Runner<E> {
|
|||
}
|
||||
|
||||
if self.task_kinds.capacity() > TASK_QUEUE_INITIAL_SIZE {
|
||||
assert_eq!(self.task_kinds.len(), 0);
|
||||
assert_eq!(self.task_kinds.len(), self.paused_tasks.len());
|
||||
self.task_kinds.shrink_to(TASK_QUEUE_INITIAL_SIZE);
|
||||
}
|
||||
|
||||
|
@ -1190,15 +1193,10 @@ fn handle_task_suspension(
|
|||
worktable.pause(tx).await;
|
||||
|
||||
match rx.await {
|
||||
Ok(Ok(())) => {
|
||||
Ok(()) => {
|
||||
trace!("Suspending: <worker_id='{worker_id}', task_id='{task_id}'>");
|
||||
has_suspended.store(true, Ordering::Relaxed);
|
||||
}
|
||||
Ok(Err(e)) => {
|
||||
error!(
|
||||
"Task <worker_id='{worker_id}', task_id='{task_id}'> failed to suspend: {e:#?}",
|
||||
);
|
||||
}
|
||||
Err(_) => {
|
||||
// The task probably finished before we could suspend it so the channel was dropped
|
||||
trace!(
|
||||
|
@ -1408,7 +1406,7 @@ fn send_complete_task_response<E: RunError>(
|
|||
out: TaskOutput,
|
||||
) {
|
||||
worktable.set_completed();
|
||||
if done_tx.send(Ok(TaskStatus::Done(out))).is_err() {
|
||||
if done_tx.send(Ok(TaskStatus::Done((task_id, out)))).is_err() {
|
||||
warn!(
|
||||
"Task done channel closed before sending done response for task: \
|
||||
<worker_id='{worker_id}', task_id='{task_id}'>"
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Reference in a new issue