diff --git a/.vscode/tasks.json b/.vscode/tasks.json index c05ee4743..71c0c420f 100644 --- a/.vscode/tasks.json +++ b/.vscode/tasks.json @@ -23,20 +23,11 @@ "type": "shell", "label": "ui:dev", "problemMatcher": { - "owner": "vite", - "fileLocation": "autoDetect", - "pattern": { - "regexp": "^([^\\s].*)\\((\\d+|\\d+,\\d+|\\d+,\\d+,\\d+,\\d+)\\):\\s+(error|warning|info)\\s+(TS\\d+)\\s*:\\s*(.*)$", - "file": 1, - "location": 2, - "severity": 3, - "code": 4, - "message": 5 - }, + "base": "$tsc-watch", "background": { "activeOnStart": true, - "beginsPattern": "^> @sd\\/root@\\d\\.\\d\\.\\d desktop", - "endsPattern": "to show help$" + "beginsPattern": "VITE v", + "endsPattern": "http://localhost:8001/" } }, "isBackground": true, @@ -56,7 +47,7 @@ { "type": "cargo", "command": "run", - "args": ["--package", "spacedrive", "--bin", "spacedrive"], + "args": ["--manifest-path=./apps/desktop/src-tauri/Cargo.toml", "--no-default-features"], "env": { "RUST_BACKTRACE": "short" // Change this if you want more or less backtrace }, @@ -68,7 +59,7 @@ { "type": "cargo", "command": "run", - "args": ["--package", "spacedrive", "--bin", "spacedrive", "--release"], + "args": ["--manifest-path=./apps/desktop/src-tauri/Cargo.toml", "--release"], "env": { "RUST_BACKTRACE": "short" // Change this if you want more or less backtrace }, diff --git a/core/src/api/jobs.rs b/core/src/api/jobs.rs index 608aeb662..47f1ab861 100644 --- a/core/src/api/jobs.rs +++ b/core/src/api/jobs.rs @@ -1,15 +1,14 @@ use crate::{ job::{Job, JobManager}, - location::{fetch_location, LocationError}, + location::{find_location, LocationError}, object::{ - identifier_job::full_identifier_job::{FullFileIdentifierJob, FullFileIdentifierJobInit}, - preview::{ThumbnailJob, ThumbnailJobInit}, + file_identifier::file_identifier_job::{FileIdentifierJob, FileIdentifierJobInit}, + preview::thumbnailer_job::{ThumbnailerJob, ThumbnailerJobInit}, validation::validator_job::{ObjectValidatorJob, ObjectValidatorJobInit}, }, - prisma::location, }; -use rspc::{ErrorCode, Type}; +use rspc::Type; use serde::Deserialize; use std::path::PathBuf; @@ -41,24 +40,18 @@ pub(crate) fn mount() -> RouterBuilder { t( |_, args: GenerateThumbsForLocationArgs, library| async move { - if library - .db - .location() - .count(vec![location::id::equals(args.id)]) - .exec() - .await? == 0 - { + let Some(location) = find_location(&library, args.id).exec().await? else { return Err(LocationError::IdNotFound(args.id).into()); - } + }; library .spawn_job(Job::new( - ThumbnailJobInit { - location_id: args.id, - root_path: PathBuf::new(), + ThumbnailerJobInit { + location, + sub_path: Some(args.path), background: false, }, - ThumbnailJob {}, + ThumbnailerJob {}, )) .await; @@ -74,11 +67,8 @@ pub(crate) fn mount() -> RouterBuilder { } t(|_, args: ObjectValidatorArgs, library| async move { - if fetch_location(&library, args.id).exec().await?.is_none() { - return Err(rspc::Error::new( - ErrorCode::NotFound, - "Location not found".into(), - )); + if find_location(&library, args.id).exec().await?.is_none() { + return Err(LocationError::IdNotFound(args.id).into()); } library @@ -103,20 +93,17 @@ pub(crate) fn mount() -> RouterBuilder { } t(|_, args: IdentifyUniqueFilesArgs, library| async move { - if fetch_location(&library, args.id).exec().await?.is_none() { - return Err(rspc::Error::new( - ErrorCode::NotFound, - "Location not found".into(), - )); - } + let Some(location) = find_location(&library, args.id).exec().await? else { + return Err(LocationError::IdNotFound(args.id).into()); + }; library .spawn_job(Job::new( - FullFileIdentifierJobInit { - location_id: args.id, + FileIdentifierJobInit { + location, sub_path: Some(args.path), }, - FullFileIdentifierJob {}, + FileIdentifierJob {}, )) .await; diff --git a/core/src/api/locations.rs b/core/src/api/locations.rs index f4ac5b467..025c1b7b7 100644 --- a/core/src/api/locations.rs +++ b/core/src/api/locations.rs @@ -1,9 +1,9 @@ use crate::{ library::Library, location::{ - delete_location, fetch_location, - indexer::{indexer_job::indexer_job_location, rules::IndexerRuleCreateArgs}, - relink_location, scan_location, LocationCreateArgs, LocationError, LocationUpdateArgs, + delete_location, find_location, indexer::rules::IndexerRuleCreateArgs, light_scan_location, + location_with_indexer_rules, relink_location, scan_location, LocationCreateArgs, + LocationError, LocationUpdateArgs, }, prisma::{file_path, indexer_rule, indexer_rules_in_location, location, object, tag}, }; @@ -44,7 +44,6 @@ pub struct ExplorerData { file_path::include!(file_path_with_object { object }); object::include!(object_with_file_paths { file_paths }); -indexer_rules_in_location::include!(indexer_rules_in_location_with_rules { indexer_rule }); pub(crate) fn mount() -> impl RouterBuilderLike { ::new() @@ -65,7 +64,7 @@ pub(crate) fn mount() -> impl RouterBuilderLike { .db .location() .find_unique(location::id::equals(location_id)) - .include(location::include!({ indexer_rules })) + .include(location_with_indexer_rules::include()) .exec() .await?) }) @@ -82,14 +81,10 @@ pub(crate) fn mount() -> impl RouterBuilderLike { t(|_, mut args: LocationExplorerArgs, library| async move { let Library { db, .. } = &library; - let location = db - .location() - .find_unique(location::id::equals(args.location_id)) + let location = find_location(&library, args.location_id) .exec() .await? - .ok_or_else(|| { - rspc::Error::new(ErrorCode::NotFound, "Location not found".into()) - })?; + .ok_or(LocationError::IdNotFound(args.location_id))?; if !args.path.ends_with('/') { args.path += "/"; @@ -177,18 +172,11 @@ pub(crate) fn mount() -> impl RouterBuilderLike { }) .library_mutation("fullRescan", |t| { t(|_, location_id: i32, library| async move { - // remove existing paths - library - .db - .file_path() - .delete_many(vec![file_path::location_id::equals(location_id)]) - .exec() - .await?; // rescan location scan_location( &library, - fetch_location(&library, location_id) - .include(indexer_job_location::include()) + find_location(&library, location_id) + .include(location_with_indexer_rules::include()) .exec() .await? .ok_or(LocationError::IdNotFound(location_id))?, @@ -198,9 +186,25 @@ pub(crate) fn mount() -> impl RouterBuilderLike { }) }) .library_mutation("quickRescan", |t| { - t(|_, _: (), _| async move { - #[allow(unreachable_code)] - Ok(todo!()) + #[derive(Clone, Serialize, Deserialize, Type, Debug)] + pub struct LightScanArgs { + pub location_id: i32, + pub sub_path: String, + } + + t(|_, args: LightScanArgs, library| async move { + // light rescan location + light_scan_location( + &library, + find_location(&library, args.location_id) + .include(location_with_indexer_rules::include()) + .exec() + .await? + .ok_or(LocationError::IdNotFound(args.location_id))?, + &args.sub_path, + ) + .await + .map_err(Into::into) }) }) .subscription("online", |t| { diff --git a/core/src/job/job_manager.rs b/core/src/job/job_manager.rs index 22a1f2bd3..7efed330c 100644 --- a/core/src/job/job_manager.rs +++ b/core/src/job/job_manager.rs @@ -2,16 +2,27 @@ use crate::{ invalidate_query, job::{worker::Worker, DynJob, Job, JobError}, library::Library, - location::indexer::indexer_job::{IndexerJob, INDEXER_JOB_NAME}, + location::indexer::{ + indexer_job::{IndexerJob, INDEXER_JOB_NAME}, + shallow_indexer_job::{ShallowIndexerJob, SHALLOW_INDEXER_JOB_NAME}, + }, object::{ + file_identifier::{ + file_identifier_job::{FileIdentifierJob, FILE_IDENTIFIER_JOB_NAME}, + shallow_file_identifier_job::{ + ShallowFileIdentifierJob, SHALLOW_FILE_IDENTIFIER_JOB_NAME, + }, + }, fs::{ copy::{FileCopierJob, COPY_JOB_NAME}, cut::{FileCutterJob, CUT_JOB_NAME}, delete::{FileDeleterJob, DELETE_JOB_NAME}, erase::{FileEraserJob, ERASE_JOB_NAME}, }, - identifier_job::full_identifier_job::{FullFileIdentifierJob, FULL_IDENTIFIER_JOB_NAME}, - preview::{ThumbnailJob, THUMBNAIL_JOB_NAME}, + preview::{ + shallow_thumbnailer_job::{ShallowThumbnailerJob, SHALLOW_THUMBNAILER_JOB_NAME}, + thumbnailer_job::{ThumbnailerJob, THUMBNAILER_JOB_NAME}, + }, validation::validator_job::{ObjectValidatorJob, VALIDATOR_JOB_NAME}, }, prisma::{job, node}, @@ -71,8 +82,8 @@ impl JobManager { // FIXME: if this task crashes, the entire application is unusable while let Some(event) = internal_receiver.recv().await { match event { - JobManagerEvent::IngestJob(ctx, job) => { - this2.clone().dispatch_job(&ctx, job).await + JobManagerEvent::IngestJob(library, job) => { + this2.clone().dispatch_job(&library, job).await } } } @@ -83,7 +94,7 @@ impl JobManager { this } - pub async fn ingest(self: Arc, ctx: &Library, job: Box) { + pub async fn ingest(self: Arc, library: &Library, job: Box) { let job_hash = job.hash(); debug!( "Ingesting job: ", @@ -93,7 +104,7 @@ impl JobManager { if !self.current_jobs_hashes.read().await.contains(&job_hash) { self.current_jobs_hashes.write().await.insert(job_hash); - self.dispatch_job(ctx, job).await; + self.dispatch_job(library, job).await; } else { debug!( "Job already in queue: ", @@ -119,7 +130,7 @@ impl JobManager { } } - pub async fn complete(self: Arc, ctx: &Library, job_id: Uuid, job_hash: u64) { + pub async fn complete(self: Arc, library: &Library, job_id: Uuid, job_hash: u64) { // remove worker from running workers and from current jobs hashes self.current_jobs_hashes.write().await.remove(&job_hash); self.running_workers.write().await.remove(&job_id); @@ -128,7 +139,7 @@ impl JobManager { if let Some(job) = job { // We can't directly execute `self.ingest` here because it would cause an async cycle. self.internal_sender - .send(JobManagerEvent::IngestJob(ctx.clone(), job)) + .send(JobManagerEvent::IngestJob(library.clone(), job)) .unwrap_or_else(|_| { error!("Failed to ingest job!"); }); @@ -146,9 +157,9 @@ impl JobManager { } pub async fn get_history( - ctx: &Library, + library: &Library, ) -> Result, prisma_client_rust::QueryError> { - Ok(ctx + Ok(library .db .job() .find_many(vec![job::status::not(JobStatus::Running.int_value())]) @@ -161,10 +172,10 @@ impl JobManager { .collect()) } - pub async fn clear_all_jobs(ctx: &Library) -> Result<(), prisma_client_rust::QueryError> { - ctx.db.job().delete_many(vec![]).exec().await?; + pub async fn clear_all_jobs(library: &Library) -> Result<(), prisma_client_rust::QueryError> { + library.db.job().delete_many(vec![]).exec().await?; - invalidate_query!(ctx, "jobs.getHistory"); + invalidate_query!(library, "jobs.getHistory"); Ok(()) } @@ -190,8 +201,8 @@ impl JobManager { } } - pub async fn resume_jobs(self: Arc, ctx: &Library) -> Result<(), JobError> { - let paused_jobs = ctx + pub async fn resume_jobs(self: Arc, library: &Library) -> Result<(), JobError> { + let paused_jobs = library .db .job() .find_many(vec![job::status::equals(JobStatus::Paused.int_value())]) @@ -203,47 +214,65 @@ impl JobManager { info!("Resuming job: {}, id: {}", paused_job.name, paused_job.id); match paused_job.name.as_str() { - THUMBNAIL_JOB_NAME => { + THUMBNAILER_JOB_NAME => { Arc::clone(&self) - .dispatch_job(ctx, Job::resume(paused_job, ThumbnailJob {})?) + .dispatch_job(library, Job::resume(paused_job, ThumbnailerJob {})?) + .await; + } + SHALLOW_THUMBNAILER_JOB_NAME => { + Arc::clone(&self) + .dispatch_job(library, Job::resume(paused_job, ShallowThumbnailerJob {})?) .await; } INDEXER_JOB_NAME => { Arc::clone(&self) - .dispatch_job(ctx, Job::resume(paused_job, IndexerJob {})?) + .dispatch_job(library, Job::resume(paused_job, IndexerJob {})?) .await; } - FULL_IDENTIFIER_JOB_NAME => { + SHALLOW_INDEXER_JOB_NAME => { Arc::clone(&self) - .dispatch_job(ctx, Job::resume(paused_job, FullFileIdentifierJob {})?) + .dispatch_job(library, Job::resume(paused_job, ShallowIndexerJob {})?) + .await; + } + FILE_IDENTIFIER_JOB_NAME => { + Arc::clone(&self) + .dispatch_job(library, Job::resume(paused_job, FileIdentifierJob {})?) + .await; + } + SHALLOW_FILE_IDENTIFIER_JOB_NAME => { + Arc::clone(&self) + .dispatch_job( + library, + Job::resume(paused_job, ShallowFileIdentifierJob {})?, + ) .await; } VALIDATOR_JOB_NAME => { Arc::clone(&self) - .dispatch_job(ctx, Job::resume(paused_job, ObjectValidatorJob {})?) + .dispatch_job(library, Job::resume(paused_job, ObjectValidatorJob {})?) .await; } CUT_JOB_NAME => { Arc::clone(&self) - .dispatch_job(ctx, Job::resume(paused_job, FileCutterJob {})?) + .dispatch_job(library, Job::resume(paused_job, FileCutterJob {})?) .await; } COPY_JOB_NAME => { Arc::clone(&self) .dispatch_job( - ctx, + library, Job::resume(paused_job, FileCopierJob { done_tx: None })?, ) .await; } DELETE_JOB_NAME => { Arc::clone(&self) - .dispatch_job(ctx, Job::resume(paused_job, FileDeleterJob {})?) + .dispatch_job(library, Job::resume(paused_job, FileDeleterJob {})?) .await; } ERASE_JOB_NAME => { Arc::clone(&self) - .dispatch_job(ctx, Job::resume(paused_job, FileEraserJob {})?) + .dispatch_job(library, Job::resume(paused_job, FileEraserJob {})?) .await; } _ => { @@ -259,7 +288,7 @@ impl JobManager { Ok(()) } - async fn dispatch_job(self: Arc, ctx: &Library, mut job: Box) { + async fn dispatch_job(self: Arc, library: &Library, mut job: Box) { // create worker to process job let mut running_workers = self.running_workers.write().await; if running_workers.len() < MAX_WORKERS { @@ -276,8 +305,12 @@ impl JobManager { let wrapped_worker = Arc::new(Mutex::new(worker)); - if let Err(e) = - Worker::spawn(Arc::clone(&self), Arc::clone(&wrapped_worker), ctx.clone()).await + if let Err(e) = Worker::spawn( + Arc::clone(&self), + Arc::clone(&wrapped_worker), + library.clone(), + ) + .await { error!("Error spawning worker: {:?}", e); } else { @@ -375,22 +408,24 @@ impl JobReport { } } - pub async fn create(&self, ctx: &Library) -> Result<(), JobError> { - ctx.db + pub async fn create(&self, library: &Library) -> Result<(), JobError> { + library + .db .job() .create( self.id.as_bytes().to_vec(), self.name.clone(), JobStatus::Running as i32, - node::id::equals(ctx.node_local_id), + node::id::equals(library.node_local_id), vec![job::data::set(self.data.clone())], ) .exec() .await?; Ok(()) } - pub async fn update(&self, ctx: &Library) -> Result<(), JobError> { - ctx.db + pub async fn update(&self, library: &Library) -> Result<(), JobError> { + library + .db .job() .update( job::id::equals(self.id.as_bytes().to_vec()), diff --git a/core/src/job/mod.rs b/core/src/job/mod.rs index c6165c58c..2884ba550 100644 --- a/core/src/job/mod.rs +++ b/core/src/job/mod.rs @@ -1,6 +1,6 @@ use crate::{ location::{indexer::IndexerError, LocationError, LocationManagerError}, - object::{identifier_job::IdentifierJobError, preview::ThumbnailError}, + object::{file_identifier::FileIdentifierJobError, preview::ThumbnailerError}, }; use std::{ @@ -13,7 +13,7 @@ use rmp_serde::{decode::Error as DecodeError, encode::Error as EncodeError}; use sd_crypto::Error as CryptoError; use serde::{de::DeserializeOwned, Deserialize, Serialize}; use thiserror::Error; -use tracing::warn; +use tracing::info; use uuid::Uuid; mod job_manager; @@ -57,10 +57,10 @@ pub enum JobError { IndexerError(#[from] IndexerError), #[error("Location error: {0}")] LocationError(#[from] LocationError), - #[error("Thumbnail error: {0}")] - ThumbnailError(#[from] ThumbnailError), + #[error("Thumbnailer error: {0}")] + ThumbnailError(#[from] ThumbnailerError), #[error("Identifier error: {0}")] - IdentifierError(#[from] IdentifierJobError), + IdentifierError(#[from] FileIdentifierJobError), #[error("Crypto error: {0}")] CryptoError(#[from] CryptoError), @@ -172,23 +172,32 @@ impl DynJob for Job { } async fn run(&mut self, ctx: WorkerContext) -> JobResult { + let mut job_should_run = true; + // Checking if we have a brand new job, or if we are resuming an old one. if self.state.data.is_none() { - self.stateful_job.init(ctx.clone(), &mut self.state).await?; + if let Err(e) = self.stateful_job.init(ctx.clone(), &mut self.state).await { + if matches!(e, JobError::EarlyFinish { .. }) { + info!("{e}"); + job_should_run = false; + } else { + return Err(e); + } + } } let mut shutdown_rx = ctx.shutdown_rx(); let shutdown_rx_fut = shutdown_rx.recv(); tokio::pin!(shutdown_rx_fut); - while !self.state.steps.is_empty() { + while job_should_run && !self.state.steps.is_empty() { tokio::select! { step_result = self.stateful_job.execute_step( ctx.clone(), &mut self.state, ) => { if matches!(step_result, Err(JobError::EarlyFinish { .. })) { - warn!("{}", step_result.unwrap_err()); + info!("{}", step_result.unwrap_err()); break; } else { step_result?; diff --git a/core/src/library/library.rs b/core/src/library/library.rs index 9dbbe7a17..749fa959e 100644 --- a/core/src/library/library.rs +++ b/core/src/library/library.rs @@ -1,6 +1,11 @@ use crate::{ - api::CoreEvent, job::DynJob, location::LocationManager, node::NodeConfigManager, - object::preview::THUMBNAIL_CACHE_DIR_NAME, prisma::PrismaClient, sync::SyncManager, + api::CoreEvent, + job::DynJob, + location::{file_path_helper::LastFilePathIdManager, LocationManager}, + node::NodeConfigManager, + object::preview::THUMBNAIL_CACHE_DIR_NAME, + prisma::PrismaClient, + sync::SyncManager, NodeContext, }; @@ -29,6 +34,8 @@ pub struct Library { pub sync: Arc, /// key manager that provides encryption keys to functions that require them pub key_manager: Arc, + /// last id by location keeps track of the last id by location for the library + pub last_file_path_id_manager: Arc, /// node_local_id holds the local ID of the node which is running the library. pub node_local_id: i32, /// node_context holds the node context for the node which this library is running on. diff --git a/core/src/library/manager.rs b/core/src/library/manager.rs index 6c2facfcf..77fc7fc2d 100644 --- a/core/src/library/manager.rs +++ b/core/src/library/manager.rs @@ -1,5 +1,6 @@ use crate::{ invalidate_query, + location::file_path_helper::LastFilePathIdManager, node::Platform, prisma::{node, PrismaClient}, sync::SyncManager, @@ -131,7 +132,6 @@ impl LibraryManager { let mut libraries = Vec::new(); for entry in fs::read_dir(&libraries_dir)? - .into_iter() .filter_map(|entry| entry.ok()) .filter(|entry| { entry.path().is_file() @@ -346,6 +346,7 @@ impl LibraryManager { key_manager, sync: Arc::new(sync_manager), db, + last_file_path_id_manager: Arc::new(LastFilePathIdManager::new()), node_local_id: node_data.id, node_context, }) diff --git a/core/src/location/error.rs b/core/src/location/error.rs index a2ecc2e69..18d511818 100644 --- a/core/src/location/error.rs +++ b/core/src/location/error.rs @@ -7,7 +7,7 @@ use thiserror::Error; use tokio::io; use uuid::Uuid; -use super::metadata::LocationMetadataError; +use super::{file_path_helper::FilePathError, metadata::LocationMetadataError}; /// Error type for location related errors #[derive(Error, Debug)] @@ -58,6 +58,8 @@ pub enum LocationError { DatabaseError(#[from] prisma_client_rust::QueryError), #[error("Location manager error (error: {0:?})")] LocationManagerError(#[from] LocationManagerError), + #[error("File path related error (error: {0})")] + FilePathError(#[from] FilePathError), } impl From for rspc::Error { diff --git a/core/src/location/file_path_helper.rs b/core/src/location/file_path_helper.rs index 30fc671fe..43d7274e5 100644 --- a/core/src/location/file_path_helper.rs +++ b/core/src/location/file_path_helper.rs @@ -1,82 +1,491 @@ -use crate::{library::Library, prisma::file_path}; +use crate::prisma::{ + file_path::{self, FindMany}, + location, PrismaClient, +}; -use std::sync::atomic::{AtomicI32, Ordering}; +use std::{ + fmt::{Display, Formatter}, + path::{Path, PathBuf}, +}; +use dashmap::{mapref::entry::Entry, DashMap}; +use futures::future::try_join_all; use prisma_client_rust::{Direction, QueryError}; +use serde::{Deserialize, Serialize}; +use thiserror::Error; +use tokio::{fs, io}; +use tracing::error; -static LAST_FILE_PATH_ID: AtomicI32 = AtomicI32::new(0); +use super::LocationId; -file_path::select!(file_path_id_only { id }); +// File Path selectables! +file_path::select!(file_path_just_id_materialized_path { + id + materialized_path +}); +file_path::select!(file_path_for_file_identifier { + id + materialized_path + date_created +}); +file_path::select!(file_path_just_object_id { object_id }); +file_path::select!(file_path_for_object_validator { + id + materialized_path + integrity_checksum + location: select { + id + pub_id + } +}); +file_path::select!(file_path_just_materialized_path_cas_id { + materialized_path + cas_id +}); -pub async fn get_max_file_path_id(library: &Library) -> Result { - let mut last_id = LAST_FILE_PATH_ID.load(Ordering::Acquire); - if last_id == 0 { - last_id = fetch_max_file_path_id(library).await?; - LAST_FILE_PATH_ID.store(last_id, Ordering::Release); +// File Path includes! +file_path::include!(file_path_with_object { object }); + +#[derive(Serialize, Deserialize, Clone, Debug)] +pub struct MaterializedPath { + pub(super) materialized_path: String, + pub(super) is_dir: bool, + pub(super) location_id: LocationId, + pub(super) name: String, + pub(super) extension: String, +} + +impl MaterializedPath { + pub fn new( + location_id: LocationId, + location_path: impl AsRef, + full_path: impl AsRef, + is_dir: bool, + ) -> Result { + let full_path = full_path.as_ref(); + let mut materialized_path = + extract_materialized_path(location_id, location_path, full_path)? + .to_str() + .expect("Found non-UTF-8 path") + .to_string(); + + if is_dir && !materialized_path.ends_with('/') { + materialized_path += "/"; + } + + let extension = if !is_dir { + let extension = full_path + .extension() + .unwrap_or_default() + .to_str() + .unwrap_or_default(); + + #[cfg(debug_assertions)] + { + // In dev mode, we lowercase the extension as we don't use the SQL migration, + // and using prisma.schema directly we can't set `COLLATE NOCASE` in the + // `extension` column at `file_path` table + extension.to_lowercase() + } + #[cfg(not(debug_assertions))] + { + extension.to_string() + } + } else { + String::new() + }; + + Ok(Self { + materialized_path, + is_dir, + location_id, + name: Self::prepare_name(full_path), + extension, + }) } - Ok(last_id) + pub fn location_id(&self) -> LocationId { + self.location_id + } + + fn prepare_name(path: &Path) -> String { + // Not using `impl AsRef` here because it's an private method + path.file_name() + .unwrap_or_default() + .to_str() + .unwrap_or_default() + .to_string() + } + + pub fn parent(&self) -> Self { + let parent_path = Path::new(&self.materialized_path) + .parent() + .unwrap_or_else(|| Path::new("/")); + + let mut parent_path_str = parent_path + .to_str() + .unwrap() // SAFETY: This unwrap is ok because this path was a valid UTF-8 String before + .to_string(); + + if !parent_path_str.ends_with('/') { + parent_path_str += "/"; + } + + Self { + materialized_path: parent_path_str, + is_dir: true, + location_id: self.location_id, + // NOTE: This way we don't use the same name for "/" `file_path`, that uses the location + // name in the database, check later if this is a problem + name: Self::prepare_name(parent_path), + extension: String::new(), + } + } } -pub fn set_max_file_path_id(id: i32) { - LAST_FILE_PATH_ID.store(id, Ordering::Relaxed); +impl From for String { + fn from(path: MaterializedPath) -> Self { + path.materialized_path + } } -async fn fetch_max_file_path_id(library: &Library) -> Result { - Ok(library - .db - .file_path() - .find_first(vec![]) - .order_by(file_path::id::order(Direction::Desc)) - .select(file_path_id_only::select()) +impl From<&MaterializedPath> for String { + fn from(path: &MaterializedPath) -> Self { + path.materialized_path.clone() + } +} + +impl AsRef for MaterializedPath { + fn as_ref(&self) -> &str { + self.materialized_path.as_ref() + } +} + +impl AsRef for MaterializedPath { + fn as_ref(&self) -> &Path { + Path::new(&self.materialized_path) + } +} + +impl Display for MaterializedPath { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.materialized_path) + } +} + +#[derive(Error, Debug)] +pub enum FilePathError { + #[error("Received an invalid sub path: ")] + InvalidSubPath { + location_path: PathBuf, + sub_path: PathBuf, + }, + #[error("Sub path is not a directory: {0}")] + SubPathNotDirectory(PathBuf), + #[error("The parent directory of the received sub path isn't indexed in the location: ")] + SubPathParentNotInLocation { + location_id: LocationId, + sub_path: PathBuf, + }, + #[error("Unable to extract materialized path from location: ")] + UnableToExtractMaterializedPath(LocationId, PathBuf), + #[error("Database error (error: {0:?})")] + DatabaseError(#[from] QueryError), + #[error("Database error (error: {0:?})")] + IOError(#[from] io::Error), +} + +#[derive(Debug)] +pub struct LastFilePathIdManager { + last_id_by_location: DashMap, +} + +impl Default for LastFilePathIdManager { + fn default() -> Self { + Self { + last_id_by_location: DashMap::with_capacity(4), + } + } +} + +impl LastFilePathIdManager { + pub fn new() -> Self { + Default::default() + } + + pub async fn get_max_file_path_id( + &self, + location_id: LocationId, + db: &PrismaClient, + ) -> Result { + Ok(match self.last_id_by_location.entry(location_id) { + Entry::Occupied(entry) => *entry.get(), + Entry::Vacant(entry) => { + // I wish I could use `or_try_insert_with` method instead of this crappy match, + // but we don't have async closures yet ): + let id = Self::fetch_max_file_path_id(location_id, db).await?; + entry.insert(id); + id + } + }) + } + + pub async fn set_max_file_path_id(&self, location_id: LocationId, id: i32) { + self.last_id_by_location.insert(location_id, id); + } + + async fn fetch_max_file_path_id( + location_id: LocationId, + db: &PrismaClient, + ) -> Result { + Ok(db + .file_path() + .find_first(vec![file_path::location_id::equals(location_id)]) + .order_by(file_path::id::order(Direction::Desc)) + .select(file_path::select!({ id })) + .exec() + .await? + .map(|r| r.id) + .unwrap_or(0)) + } + + #[cfg(feature = "location-watcher")] + pub async fn create_file_path( + &self, + db: &PrismaClient, + MaterializedPath { + materialized_path, + is_dir, + location_id, + name, + extension, + }: MaterializedPath, + parent_id: Option, + ) -> Result { + // Keeping a reference in that map for the entire duration of the function, so we keep it locked + let mut last_id_ref = match self.last_id_by_location.entry(location_id) { + Entry::Occupied(ocupied) => ocupied.into_ref(), + Entry::Vacant(vacant) => { + let id = Self::fetch_max_file_path_id(location_id, db).await?; + vacant.insert(id) + } + }; + + let next_id = *last_id_ref + 1; + + let created_path = db + .file_path() + .create( + next_id, + location::id::equals(location_id), + materialized_path, + name, + extension, + vec![ + file_path::parent_id::set(parent_id), + file_path::is_dir::set(is_dir), + ], + ) + .exec() + .await?; + + *last_id_ref = next_id; + + Ok(created_path) + } +} + +pub fn subtract_location_path( + location_path: impl AsRef, + current_path: impl AsRef, +) -> Option { + let location_path = location_path.as_ref(); + let current_path = current_path.as_ref(); + + if let Ok(stripped) = current_path.strip_prefix(location_path) { + Some(stripped.to_path_buf()) + } else { + error!( + "Failed to strip location root path ({}) from current path ({})", + location_path.display(), + current_path.display() + ); + None + } +} + +pub fn extract_materialized_path( + location_id: LocationId, + location_path: impl AsRef, + path: impl AsRef, +) -> Result { + subtract_location_path(location_path, &path).ok_or_else(|| { + FilePathError::UnableToExtractMaterializedPath(location_id, path.as_ref().to_path_buf()) + }) +} + +pub async fn find_many_file_paths_by_full_path<'db>( + location: &location::Data, + full_paths: &[impl AsRef], + db: &'db PrismaClient, +) -> Result, FilePathError> { + let is_dirs = try_join_all( + full_paths + .iter() + .map(|path| async move { fs::metadata(path).await.map(|metadata| metadata.is_dir()) }), + ) + .await?; + + let materialized_paths = full_paths + .iter() + .zip(is_dirs.into_iter()) + .map(|(path, is_dir)| { + MaterializedPath::new(location.id, &location.path, path, is_dir).map(Into::into) + }) + // Collecting in a Result, so we stop on the first error + .collect::, _>>()?; + + Ok(db.file_path().find_many(vec![ + file_path::location_id::equals(location.id), + file_path::materialized_path::in_vec(materialized_paths), + ])) +} + +pub async fn get_existing_file_path_id( + materialized_path: MaterializedPath, + db: &PrismaClient, +) -> Result, FilePathError> { + db.file_path() + .find_first(vec![ + file_path::location_id::equals(materialized_path.location_id), + file_path::materialized_path::equals(materialized_path.into()), + ]) + .select(file_path::select!({ id })) .exec() - .await? - .map(|r| r.id) - .unwrap_or(0)) + .await + .map_or_else(|e| Err(e.into()), |r| Ok(r.map(|r| r.id))) } #[cfg(feature = "location-watcher")] -pub async fn create_file_path( - library: &Library, - location_id: i32, - mut materialized_path: String, - name: String, - extension: String, - parent_id: Option, - is_dir: bool, -) -> Result { - use crate::prisma::location; - - let mut last_id = LAST_FILE_PATH_ID.load(Ordering::Acquire); - if last_id == 0 { - last_id = fetch_max_file_path_id(library).await?; - } - - // If this new file_path is a directory, materialized_path must end with "/" - if is_dir && !materialized_path.ends_with('/') { - materialized_path += "/"; - } - - let next_id = last_id + 1; - - let created_path = library - .db - .file_path() - .create( - next_id, - location::id::equals(location_id), - materialized_path, - name, - extension, - vec![ - file_path::parent_id::set(parent_id), - file_path::is_dir::set(is_dir), - ], - ) +pub async fn get_existing_file_path( + materialized_path: MaterializedPath, + db: &PrismaClient, +) -> Result, FilePathError> { + db.file_path() + .find_first(vec![ + file_path::location_id::equals(materialized_path.location_id), + file_path::materialized_path::equals(materialized_path.into()), + ]) .exec() - .await?; - - LAST_FILE_PATH_ID.store(next_id, Ordering::Release); - - Ok(created_path) + .await + .map_err(Into::into) +} + +#[cfg(feature = "location-watcher")] +pub async fn get_existing_file_path_with_object( + materialized_path: MaterializedPath, + db: &PrismaClient, +) -> Result, FilePathError> { + db.file_path() + .find_first(vec![ + file_path::location_id::equals(materialized_path.location_id), + file_path::materialized_path::equals(materialized_path.into()), + ]) + // include object for orphan check + .include(file_path_with_object::include()) + .exec() + .await + .map_err(Into::into) +} + +#[cfg(feature = "location-watcher")] +pub async fn get_existing_file_or_directory( + location: &super::location_with_indexer_rules::Data, + path: impl AsRef, + db: &PrismaClient, +) -> Result, FilePathError> { + let mut maybe_file_path = get_existing_file_path_with_object( + MaterializedPath::new(location.id, &location.path, path.as_ref(), false)?, + db, + ) + .await?; + // First we just check if this path was a file in our db, if it isn't then we check for a directory + if maybe_file_path.is_none() { + maybe_file_path = get_existing_file_path_with_object( + MaterializedPath::new(location.id, &location.path, path.as_ref(), true)?, + db, + ) + .await?; + } + + Ok(maybe_file_path) +} + +#[cfg(feature = "location-watcher")] +pub async fn get_parent_dir( + materialized_path: &MaterializedPath, + db: &PrismaClient, +) -> Result, FilePathError> { + get_existing_file_path(materialized_path.parent(), db).await +} + +pub async fn ensure_sub_path_is_in_location( + location_path: impl AsRef, + sub_path: impl AsRef, +) -> Result { + let sub_path = sub_path.as_ref(); + let location_path = location_path.as_ref(); + + if !sub_path.starts_with(location_path) { + // If the sub_path doesn't start with the location_path, we have to check if it's a + // materialized path received from the frontend, then we check if the full path exists + let full_path = location_path.join(sub_path); + match fs::metadata(&full_path).await { + Ok(_) => Ok(full_path), + Err(e) if e.kind() == io::ErrorKind::NotFound => Err(FilePathError::InvalidSubPath { + sub_path: sub_path.to_path_buf(), + location_path: location_path.to_path_buf(), + }), + Err(e) => Err(e.into()), + } + } else { + Ok(sub_path.to_path_buf()) + } +} + +pub async fn ensure_sub_path_is_directory( + location_path: impl AsRef, + sub_path: impl AsRef, +) -> Result<(), FilePathError> { + let sub_path = sub_path.as_ref(); + let location_path = location_path.as_ref(); + + match fs::metadata(sub_path).await { + Ok(meta) => { + if meta.is_file() { + Err(FilePathError::SubPathNotDirectory(sub_path.to_path_buf())) + } else { + Ok(()) + } + } + Err(e) if e.kind() == io::ErrorKind::NotFound => { + match fs::metadata(location_path.join(sub_path)).await { + Ok(meta) => { + if meta.is_file() { + Err(FilePathError::SubPathNotDirectory(sub_path.to_path_buf())) + } else { + Ok(()) + } + } + Err(e) if e.kind() == io::ErrorKind::NotFound => { + Err(FilePathError::InvalidSubPath { + sub_path: sub_path.to_path_buf(), + location_path: location_path.to_path_buf(), + }) + } + Err(e) => Err(e.into()), + } + } + Err(e) => Err(e.into()), + } } diff --git a/core/src/location/indexer/indexer_job.rs b/core/src/location/indexer/indexer_job.rs index 6c1cd6e8d..c86e48eaf 100644 --- a/core/src/location/indexer/indexer_job.rs +++ b/core/src/location/indexer/indexer_job.rs @@ -1,103 +1,38 @@ use crate::{ - job::{JobError, JobReportUpdate, JobResult, JobState, StatefulJob, WorkerContext}, + job::{JobError, JobResult, JobState, StatefulJob, WorkerContext}, library::Library, - location::indexer::rules::RuleKind, - prisma::{file_path, location}, - sync, + location::file_path_helper::{ + ensure_sub_path_is_directory, ensure_sub_path_is_in_location, + file_path_just_id_materialized_path, find_many_file_paths_by_full_path, + get_existing_file_path_id, MaterializedPath, + }, + prisma::location, }; -use std::{ - collections::HashMap, - ffi::OsStr, - hash::{Hash, Hasher}, - path::PathBuf, - time::Duration, -}; +use std::{collections::HashMap, path::Path}; -use chrono::{DateTime, Utc}; +use chrono::Utc; use itertools::Itertools; -use serde::{Deserialize, Serialize}; -use serde_json::json; use tokio::time::Instant; -use tracing::info; +use tracing::error; use super::{ - super::file_path_helper::{get_max_file_path_id, set_max_file_path_id}, - rules::IndexerRule, - walk::{walk, WalkEntry}, + execute_indexer_step, finalize_indexer, + rules::{IndexerRule, RuleKind}, + walk::walk, + IndexerError, IndexerJobData, IndexerJobInit, IndexerJobStep, IndexerJobStepEntry, + ScanProgress, }; /// BATCH_SIZE is the number of files to index at each step, writing the chunk of files metadata in the database. const BATCH_SIZE: usize = 1000; pub const INDEXER_JOB_NAME: &str = "indexer"; -#[derive(Clone)] -pub enum ScanProgress { - ChunkCount(usize), - SavedChunks(usize), - Message(String), -} - /// A `IndexerJob` is a stateful job that walks a directory and indexes all files. /// First it walks the directory and generates a list of files to index, chunked into /// batches of [`BATCH_SIZE`]. Then for each chunk it write the file metadata to the database. pub struct IndexerJob; -location::include!(indexer_job_location { - indexer_rules: select { indexer_rule } -}); - -/// `IndexerJobInit` receives a `location::Data` object to be indexed -#[derive(Serialize, Deserialize)] -pub struct IndexerJobInit { - pub location: indexer_job_location::Data, -} - -impl Hash for IndexerJobInit { - fn hash(&self, state: &mut H) { - self.location.id.hash(state); - } -} -/// `IndexerJobData` contains the state of the indexer job, which includes a `location_path` that -/// is cached and casted on `PathBuf` from `local_path` column in the `location` table. It also -/// contains some metadata for logging purposes. -#[derive(Serialize, Deserialize)] -pub struct IndexerJobData { - db_write_start: DateTime, - scan_read_time: Duration, - total_paths: usize, -} - -/// `IndexerJobStep` is a type alias, specifying that each step of the [`IndexerJob`] is a vector of -/// `IndexerJobStepEntry`. The size of this vector is given by the [`BATCH_SIZE`] constant. -pub type IndexerJobStep = Vec; - -/// `IndexerJobStepEntry` represents a single file to be indexed, given its metadata to be written -/// on the `file_path` table in the database -#[derive(Serialize, Deserialize)] -pub struct IndexerJobStepEntry { - path: PathBuf, - created_at: DateTime, - file_id: i32, - parent_id: Option, - is_dir: bool, -} - -impl IndexerJobData { - fn on_scan_progress(ctx: WorkerContext, progress: Vec) { - ctx.progress_debounced( - progress - .iter() - .map(|p| match p.clone() { - ScanProgress::ChunkCount(c) => JobReportUpdate::TaskCount(c), - ScanProgress::SavedChunks(p) => JobReportUpdate::CompletedTaskCount(p), - ScanProgress::Message(m) => JobReportUpdate::Message(m), - }) - .collect(), - ) - } -} - #[async_trait::async_trait] impl StatefulJob for IndexerJob { type Init = IndexerJobInit; @@ -110,8 +45,21 @@ impl StatefulJob for IndexerJob { /// Creates a vector of valid path buffers from a directory, chunked into batches of `BATCH_SIZE`. async fn init(&self, ctx: WorkerContext, state: &mut JobState) -> Result<(), JobError> { + let Library { + last_file_path_id_manager, + db, + .. + } = &ctx.library; + + let location_id = state.init.location.id; + let location_path = Path::new(&state.init.location.path); + // grab the next id so we can increment in memory for batch inserting - let first_file_id = get_max_file_path_id(&ctx.library).await?; + let first_file_id = last_file_path_id_manager + .get_max_file_path_id(location_id, db) + .await + .map_err(IndexerError::from)? + + 1; let mut indexer_rules_by_kind: HashMap> = HashMap::with_capacity(state.init.location.indexer_rules.len()); @@ -124,70 +72,142 @@ impl StatefulJob for IndexerJob { .push(indexer_rule); } + let mut dirs_ids = HashMap::new(); + + let to_walk_path = if let Some(ref sub_path) = state.init.sub_path { + let full_path = ensure_sub_path_is_in_location(location_path, sub_path) + .await + .map_err(IndexerError::from)?; + ensure_sub_path_is_directory(location_path, sub_path) + .await + .map_err(IndexerError::from)?; + + let sub_path_file_path_id = get_existing_file_path_id( + MaterializedPath::new(location_id, location_path, &full_path, true) + .map_err(IndexerError::from)?, + db, + ) + .await + .map_err(IndexerError::from)? + .expect("Sub path should already exist in the database"); + + // If we're operating with a sub_path, then we have to put its id on `dirs_ids` map + dirs_ids.insert(full_path.clone(), sub_path_file_path_id); + + full_path + } else { + location_path.to_path_buf() + }; + let scan_start = Instant::now(); - let inner_ctx = ctx.clone(); - let paths = walk( - &state.init.location.path, + + let found_paths = walk( + to_walk_path, &indexer_rules_by_kind, - move |path, total_entries| { + |path, total_entries| { IndexerJobData::on_scan_progress( - inner_ctx.clone(), + &ctx, vec![ ScanProgress::Message(format!("Scanning {}", path.display())), ScanProgress::ChunkCount(total_entries / BATCH_SIZE), ], ); }, + // if we're not using a sub_path, then its a full indexing and we must include root dir + state.init.sub_path.is_none(), ) .await?; - let total_paths = paths.len(); - let last_file_id = first_file_id + total_paths as i32; - - // Setting our global state for file_path ids - set_max_file_path_id(last_file_id); - - let mut dirs_ids = HashMap::new(); - let paths_entries = paths - .into_iter() - .zip(first_file_id..last_file_id) - .map( - |( - WalkEntry { - path, - is_dir, - created_at, - }, - file_id, - )| { - let parent_id = if let Some(parent_dir) = path.parent() { - dirs_ids.get(parent_dir).copied() - } else { - None - }; - - dirs_ids.insert(path.clone(), file_id); - - IndexerJobStepEntry { - path, - created_at, - file_id, - parent_id, - is_dir, - } - }, + dirs_ids.extend( + find_many_file_paths_by_full_path( + &location::Data::from(&state.init.location), + &found_paths + .iter() + .map(|entry| &entry.path) + .collect::>(), + db, ) + .await + .map_err(IndexerError::from)? + .select(file_path_just_id_materialized_path::select()) + .exec() + .await? + .into_iter() + .map(|file_path| { + ( + location_path.join(file_path.materialized_path), + file_path.id, + ) + }), + ); + + let mut new_paths = found_paths + .into_iter() + .filter_map(|entry| { + MaterializedPath::new( + location_id, + &state.init.location.path, + &entry.path, + entry.is_dir, + ) + .map_or_else( + |e| { + error!("Failed to create materialized path: {e}"); + None + }, + |materialized_path| { + (!dirs_ids.contains_key(&entry.path)).then(|| { + IndexerJobStepEntry { + materialized_path, + created_at: entry.created_at, + file_id: 0, // To be set later + parent_id: entry.path.parent().and_then(|parent_dir| { + /*************************************************************** + * If we're dealing with a new path which its parent already * + * exist, we fetch its parent id from our `dirs_ids` map * + **************************************************************/ + dirs_ids.get(parent_dir).copied() + }), + full_path: entry.path, + } + }) + }, + ) + }) .collect::>(); - let total_entries = paths_entries.len(); + let total_paths = new_paths.len(); + let last_file_id = first_file_id + total_paths as i32; + + // Setting our global state for `file_path` ids + last_file_path_id_manager + .set_max_file_path_id(location_id, last_file_id) + .await; + + new_paths + .iter_mut() + .zip(first_file_id..last_file_id) + .for_each(|(entry, file_id)| { + // If the `parent_id` is still none here, is because the parent of this entry is also + // a new one in the DB + if entry.parent_id.is_none() { + entry.parent_id = entry + .full_path + .parent() + .and_then(|parent_dir| dirs_ids.get(parent_dir).copied()); + } + entry.file_id = file_id; + dirs_ids.insert(entry.full_path.clone(), file_id); + }); state.data = Some(IndexerJobData { db_write_start: Utc::now(), scan_read_time: scan_start.elapsed(), - total_paths: total_entries, + total_paths, + indexed_paths: 0, }); - state.steps = paths_entries + state.steps = new_paths .into_iter() .chunks(BATCH_SIZE) .into_iter() @@ -195,13 +215,13 @@ impl StatefulJob for IndexerJob { .map(|(i, chunk)| { let chunk_steps = chunk.collect::>(); IndexerJobData::on_scan_progress( - ctx.clone(), + &ctx, vec![ ScanProgress::SavedChunks(i), ScanProgress::Message(format!( "Writing {} of {} to db", i * chunk_steps.len(), - total_entries, + total_paths, )), ], ); @@ -218,114 +238,19 @@ impl StatefulJob for IndexerJob { ctx: WorkerContext, state: &mut JobState, ) -> Result<(), JobError> { - let Library { sync, db, .. } = &ctx.library; - - let location = &state.init.location; - - let (sync_stuff, paths): (Vec<_>, Vec<_>) = state.steps[0] - .iter() - .map(|entry| { - let name; - let extension; - - // if 'entry.path' is a directory, set extension to an empty string to - // avoid periods in folder names being interpreted as file extensions - if entry.is_dir { - extension = "".to_string(); - name = extract_name(entry.path.file_name()); - } else { - // if the 'entry.path' is not a directory, then get the extension and name. - extension = extract_name(entry.path.extension()).to_lowercase(); - name = extract_name(entry.path.file_stem()); - } - - let mut materialized_path = entry - .path - .strip_prefix(&location.path) - .unwrap() - .to_str() - .expect("Found non-UTF-8 path") - .to_string(); - - if entry.is_dir && !materialized_path.ends_with('/') { - materialized_path += "/"; - } - - use file_path::*; - - ( - sync.unique_shared_create( - sync::file_path::SyncId { - id: entry.file_id, - location: sync::location::SyncId { - pub_id: state.init.location.pub_id.clone(), - }, - }, - [ - ("materialized_path", json!(materialized_path.clone())), - ("name", json!(name.clone())), - ("is_dir", json!(entry.is_dir)), - ("extension", json!(extension.clone())), - ("parent_id", json!(entry.parent_id)), - ("date_created", json!(entry.created_at)), - ], - ), - file_path::create_unchecked( - entry.file_id, - location.id, - materialized_path, - name, - extension, - vec![ - is_dir::set(entry.is_dir), - parent_id::set(entry.parent_id), - date_created::set(entry.created_at.into()), - ], - ), - ) + execute_indexer_step(&state.init.location, &state.steps[0], ctx) + .await + .map(|indexed_paths| { + state + .data + .as_mut() + .expect("critical error: missing data on job state") + .indexed_paths = indexed_paths; }) - .unzip(); - - let count = sync - .write_ops( - db, - ( - sync_stuff, - db.file_path().create_many(paths).skip_duplicates(), - ), - ) - .await?; - - info!("Inserted {count} records"); - - Ok(()) } /// Logs some metadata about the indexer job - async fn finalize(&mut self, _ctx: WorkerContext, state: &mut JobState) -> JobResult { - let data = state - .data - .as_ref() - .expect("critical error: missing data on job state"); - info!( - "scan of {} completed in {:?}. {:?} files found. db write completed in {:?}", - state.init.location.path, - data.scan_read_time, - data.total_paths, - (Utc::now() - data.db_write_start) - .to_std() - .expect("critical error: non-negative duration"), - ); - - Ok(Some(serde_json::to_value(state)?)) + async fn finalize(&mut self, ctx: WorkerContext, state: &mut JobState) -> JobResult { + finalize_indexer(&state.init.location.path, state, ctx) } } - -/// Extract name from OsStr returned by PathBuff -fn extract_name(os_string: Option<&OsStr>) -> String { - os_string - .unwrap_or_default() - .to_str() - .unwrap_or_default() - .to_owned() -} diff --git a/core/src/location/indexer/mod.rs b/core/src/location/indexer/mod.rs index c6fa351fb..2acdebdea 100644 --- a/core/src/location/indexer/mod.rs +++ b/core/src/location/indexer/mod.rs @@ -1,15 +1,102 @@ -pub mod indexer_job; -pub mod rules; -mod walk; +use crate::{ + invalidate_query, + job::{JobError, JobReportUpdate, JobResult, JobState, StatefulJob, WorkerContext}, + library::Library, + prisma::file_path, + sync, +}; -use globset::Error; +use std::{ + hash::{Hash, Hasher}, + path::{Path, PathBuf}, + time::Duration, +}; + +use chrono::{DateTime, Utc}; use int_enum::IntEnumError; -use rmp_serde::{decode::Error as RMPDecodeError, encode::Error as RMPEncodeError}; +use rmp_serde::{decode, encode}; use rspc::ErrorCode; use rules::RuleKind; -use serde_json::Error as SerdeJsonError; -use std::io; +use serde::{de::DeserializeOwned, Deserialize, Serialize}; +use serde_json::json; use thiserror::Error; +use tokio::io; +use tracing::info; + +use super::{ + file_path_helper::{FilePathError, MaterializedPath}, + location_with_indexer_rules, +}; + +pub mod indexer_job; +pub mod rules; +pub mod shallow_indexer_job; +mod walk; + +/// `IndexerJobInit` receives a `location::Data` object to be indexed +/// and possibly a `sub_path` to be indexed. The `sub_path` is used when +/// we want do index just a part of a location. +#[derive(Serialize, Deserialize)] +pub struct IndexerJobInit { + pub location: location_with_indexer_rules::Data, + pub sub_path: Option, +} + +impl Hash for IndexerJobInit { + fn hash(&self, state: &mut H) { + self.location.id.hash(state); + if let Some(ref sub_path) = self.sub_path { + sub_path.hash(state); + } + } +} +/// `IndexerJobData` contains the state of the indexer job, which includes a `location_path` that +/// is cached and casted on `PathBuf` from `local_path` column in the `location` table. It also +/// contains some metadata for logging purposes. +#[derive(Serialize, Deserialize)] +pub struct IndexerJobData { + db_write_start: DateTime, + scan_read_time: Duration, + total_paths: usize, + indexed_paths: i64, +} + +/// `IndexerJobStep` is a type alias, specifying that each step of the [`IndexerJob`] is a vector of +/// `IndexerJobStepEntry`. The size of this vector is given by the [`BATCH_SIZE`] constant. +pub type IndexerJobStep = Vec; + +/// `IndexerJobStepEntry` represents a single file to be indexed, given its metadata to be written +/// on the `file_path` table in the database +#[derive(Serialize, Deserialize)] +pub struct IndexerJobStepEntry { + full_path: PathBuf, + materialized_path: MaterializedPath, + created_at: DateTime, + file_id: i32, + parent_id: Option, +} + +impl IndexerJobData { + fn on_scan_progress(ctx: &WorkerContext, progress: Vec) { + ctx.progress_debounced( + progress + .iter() + .map(|p| match p.clone() { + ScanProgress::ChunkCount(c) => JobReportUpdate::TaskCount(c), + ScanProgress::SavedChunks(p) => JobReportUpdate::CompletedTaskCount(p), + ScanProgress::Message(m) => JobReportUpdate::Message(m), + }) + .collect(), + ) + } +} + +#[derive(Clone)] +pub enum ScanProgress { + ChunkCount(usize), + SavedChunks(usize), + Message(String), +} /// Error type for the indexer module #[derive(Error, Debug)] @@ -22,7 +109,7 @@ pub enum IndexerError { #[error("Invalid indexer rule kind integer: {0}")] InvalidRuleKindInt(#[from] IntEnumError), #[error("Glob builder error: {0}")] - GlobBuilderError(#[from] Error), + GlobBuilderError(#[from] globset::Error), // Internal Errors #[error("Database error: {0}")] @@ -30,11 +117,13 @@ pub enum IndexerError { #[error("I/O error: {0}")] IOError(#[from] io::Error), #[error("Indexer rule parameters json serialization error: {0}")] - RuleParametersSerdeJson(#[from] SerdeJsonError), + RuleParametersSerdeJson(#[from] serde_json::Error), #[error("Indexer rule parameters encode error: {0}")] - RuleParametersRMPEncode(#[from] RMPEncodeError), + RuleParametersRMPEncode(#[from] encode::Error), #[error("Indexer rule parameters decode error: {0}")] - RuleParametersRMPDecode(#[from] RMPDecodeError), + RuleParametersRMPDecode(#[from] decode::Error), + #[error("File path related error (error: {0})")] + FilePathError(#[from] FilePathError), } impl From for rspc::Error { @@ -52,3 +141,104 @@ impl From for rspc::Error { } } } + +async fn execute_indexer_step( + location: &location_with_indexer_rules::Data, + step: &[IndexerJobStepEntry], + ctx: WorkerContext, +) -> Result { + let Library { sync, db, .. } = &ctx.library; + + let (sync_stuff, paths): (Vec<_>, Vec<_>) = step + .iter() + .map(|entry| { + let MaterializedPath { + materialized_path, + is_dir, + name, + extension, + .. + } = entry.materialized_path.clone(); + + use file_path::*; + + ( + sync.unique_shared_create( + sync::file_path::SyncId { + id: entry.file_id, + location: sync::location::SyncId { + pub_id: location.pub_id.clone(), + }, + }, + [ + ("materialized_path", json!(materialized_path.clone())), + ("name", json!(name.clone())), + ("is_dir", json!(is_dir)), + ("extension", json!(extension.clone())), + ("parent_id", json!(entry.parent_id)), + ("date_created", json!(entry.created_at)), + ], + ), + file_path::create_unchecked( + entry.file_id, + location.id, + materialized_path, + name, + extension, + vec![ + is_dir::set(is_dir), + parent_id::set(entry.parent_id), + date_created::set(entry.created_at.into()), + ], + ), + ) + }) + .unzip(); + + let count = sync + .write_ops( + db, + ( + sync_stuff, + db.file_path().create_many(paths).skip_duplicates(), + ), + ) + .await?; + + info!("Inserted {count} records"); + + Ok(count) +} + +fn finalize_indexer( + location_path: impl AsRef, + state: &JobState, + ctx: WorkerContext, +) -> JobResult +where + SJob: StatefulJob, + Init: Serialize + DeserializeOwned + Send + Sync + Hash, +{ + let data = state + .data + .as_ref() + .expect("critical error: missing data on job state"); + + tracing::info!( + "scan of {} completed in {:?}. {} new files found, \ + indexed {} files in db. db write completed in {:?}", + location_path.as_ref().display(), + data.scan_read_time, + data.total_paths, + data.indexed_paths, + (Utc::now() - data.db_write_start) + .to_std() + .expect("critical error: non-negative duration"), + ); + + if data.indexed_paths > 0 { + invalidate_query!(ctx.library, "locations.getExplorerData"); + } + + Ok(Some(serde_json::to_value(state)?)) +} diff --git a/core/src/location/indexer/rules.rs b/core/src/location/indexer/rules.rs index 189832a2c..c8c0b5e38 100644 --- a/core/src/location/indexer/rules.rs +++ b/core/src/location/indexer/rules.rs @@ -29,7 +29,7 @@ pub struct IndexerRuleCreateArgs { } impl IndexerRuleCreateArgs { - pub async fn create(self, ctx: &Library) -> Result { + pub async fn create(self, library: &Library) -> Result { let parameters = match self.kind { RuleKind::AcceptFilesByGlob | RuleKind::RejectFilesByGlob => rmp_serde::to_vec( &Glob::new(&serde_json::from_slice::(&self.parameters)?)?, @@ -41,7 +41,8 @@ impl IndexerRuleCreateArgs { } }; - ctx.db + library + .db .indexer_rule() .create(self.kind as i32, self.name, parameters, vec![]) .exec() diff --git a/core/src/location/indexer/shallow_indexer_job.rs b/core/src/location/indexer/shallow_indexer_job.rs new file mode 100644 index 000000000..0f3cf1161 --- /dev/null +++ b/core/src/location/indexer/shallow_indexer_job.rs @@ -0,0 +1,258 @@ +use crate::{ + job::{JobError, JobResult, JobState, StatefulJob, WorkerContext}, + library::Library, + location::file_path_helper::{ + ensure_sub_path_is_directory, ensure_sub_path_is_in_location, + file_path_just_id_materialized_path, find_many_file_paths_by_full_path, + get_existing_file_path_id, MaterializedPath, + }, + prisma::location, +}; + +use std::{ + collections::{HashMap, HashSet}, + hash::{Hash, Hasher}, + path::{Path, PathBuf}, +}; + +use chrono::Utc; +use itertools::Itertools; +use serde::{Deserialize, Serialize}; +use tokio::time::Instant; +use tracing::error; + +use super::{ + execute_indexer_step, finalize_indexer, location_with_indexer_rules, + rules::{IndexerRule, RuleKind}, + walk::walk_single_dir, + IndexerError, IndexerJobData, IndexerJobStep, IndexerJobStepEntry, ScanProgress, +}; + +/// BATCH_SIZE is the number of files to index at each step, writing the chunk of files metadata in the database. +const BATCH_SIZE: usize = 1000; +pub const SHALLOW_INDEXER_JOB_NAME: &str = "shallow_indexer"; + +/// `ShallowIndexerJobInit` receives a `location::Data` object to be indexed +/// and possibly a `sub_path` to be indexed. The `sub_path` is used when +/// we want do index just a part of a location. +#[derive(Serialize, Deserialize)] +pub struct ShallowIndexerJobInit { + pub location: location_with_indexer_rules::Data, + pub sub_path: PathBuf, +} + +impl Hash for ShallowIndexerJobInit { + fn hash(&self, state: &mut H) { + self.location.id.hash(state); + self.sub_path.hash(state); + } +} + +/// A `ShallowIndexerJob` is a stateful job that indexes all files in a directory, without checking inner directories. +/// First it checks the directory and generates a list of files to index, chunked into +/// batches of [`BATCH_SIZE`]. Then for each chunk it write the file metadata to the database. +pub struct ShallowIndexerJob; + +#[async_trait::async_trait] +impl StatefulJob for ShallowIndexerJob { + type Init = ShallowIndexerJobInit; + type Data = IndexerJobData; + type Step = IndexerJobStep; + + fn name(&self) -> &'static str { + SHALLOW_INDEXER_JOB_NAME + } + + /// Creates a vector of valid path buffers from a directory, chunked into batches of `BATCH_SIZE`. + async fn init(&self, ctx: WorkerContext, state: &mut JobState) -> Result<(), JobError> { + let Library { + last_file_path_id_manager, + db, + .. + } = &ctx.library; + + let location_id = state.init.location.id; + let location_path = Path::new(&state.init.location.path); + + // grab the next id so we can increment in memory for batch inserting + let first_file_id = last_file_path_id_manager + .get_max_file_path_id(location_id, db) + .await + .map_err(IndexerError::from)? + + 1; + + let mut indexer_rules_by_kind: HashMap> = + HashMap::with_capacity(state.init.location.indexer_rules.len()); + for location_rule in &state.init.location.indexer_rules { + let indexer_rule = IndexerRule::try_from(&location_rule.indexer_rule)?; + + indexer_rules_by_kind + .entry(indexer_rule.kind) + .or_default() + .push(indexer_rule); + } + + let (to_walk_path, parent_id) = if state.init.sub_path != Path::new("") { + let full_path = ensure_sub_path_is_in_location(location_path, &state.init.sub_path) + .await + .map_err(IndexerError::from)?; + ensure_sub_path_is_directory(location_path, &state.init.sub_path) + .await + .map_err(IndexerError::from)?; + + ( + location_path.join(&state.init.sub_path), + get_existing_file_path_id( + MaterializedPath::new(location_id, location_path, &full_path, true) + .map_err(IndexerError::from)?, + db, + ) + .await + .map_err(IndexerError::from)? + .expect("Sub path should already exist in the database"), + ) + } else { + ( + location_path.to_path_buf(), + get_existing_file_path_id( + MaterializedPath::new(location_id, location_path, location_path, true) + .map_err(IndexerError::from)?, + db, + ) + .await + .map_err(IndexerError::from)? + .expect("Location root path should already exist in the database"), + ) + }; + + let scan_start = Instant::now(); + let found_paths = walk_single_dir( + to_walk_path, + &indexer_rules_by_kind, + |path, total_entries| { + IndexerJobData::on_scan_progress( + &ctx, + vec![ + ScanProgress::Message(format!("Scanning {}", path.display())), + ScanProgress::ChunkCount(total_entries / BATCH_SIZE), + ], + ); + }, + ) + .await?; + + let already_existing_file_paths = find_many_file_paths_by_full_path( + &location::Data::from(&state.init.location), + &found_paths + .iter() + .map(|entry| &entry.path) + .collect::>(), + db, + ) + .await + .map_err(IndexerError::from)? + .select(file_path_just_id_materialized_path::select()) + .exec() + .await? + .into_iter() + .map(|file_path| file_path.materialized_path) + .collect::>(); + + // Filter out paths that are already in the databases + let mut new_paths = found_paths + .into_iter() + .filter_map(|entry| { + MaterializedPath::new(location_id, location_path, &entry.path, entry.is_dir) + .map_or_else( + |e| { + error!("Failed to create materialized path: {e}"); + None + }, + |materialized_path| { + (!already_existing_file_paths + .contains::(materialized_path.as_ref())) + .then_some(IndexerJobStepEntry { + full_path: entry.path, + materialized_path, + created_at: entry.created_at, + file_id: 0, // To be set later + parent_id: Some(parent_id), + }) + }, + ) + }) + // Sadly we have to collect here to be able to check the length so we can set + // the max file path id later + .collect::>(); + + let total_paths = new_paths.len(); + let last_file_id = first_file_id + total_paths as i32; + + // Setting our global state for file_path ids + last_file_path_id_manager + .set_max_file_path_id(location_id, last_file_id) + .await; + + new_paths + .iter_mut() + .zip(first_file_id..last_file_id) + .for_each(|(entry, file_id)| { + entry.file_id = file_id; + }); + + let total_paths = new_paths.len(); + + state.data = Some(IndexerJobData { + db_write_start: Utc::now(), + scan_read_time: scan_start.elapsed(), + total_paths, + indexed_paths: 0, + }); + + state.steps = new_paths + .into_iter() + .chunks(BATCH_SIZE) + .into_iter() + .enumerate() + .map(|(i, chunk)| { + let chunk_steps = chunk.collect::>(); + IndexerJobData::on_scan_progress( + &ctx, + vec![ + ScanProgress::SavedChunks(i), + ScanProgress::Message(format!( + "Writing {} of {} to db", + i * chunk_steps.len(), + total_paths, + )), + ], + ); + chunk_steps + }) + .collect(); + + Ok(()) + } + + /// Process each chunk of entries in the indexer job, writing to the `file_path` table + async fn execute_step( + &self, + ctx: WorkerContext, + state: &mut JobState, + ) -> Result<(), JobError> { + execute_indexer_step(&state.init.location, &state.steps[0], ctx) + .await + .map(|indexed_paths| { + state + .data + .as_mut() + .expect("critical error: missing data on job state") + .indexed_paths = indexed_paths; + }) + } + + /// Logs some metadata about the indexer job + async fn finalize(&mut self, ctx: WorkerContext, state: &mut JobState) -> JobResult { + finalize_indexer(&state.init.location.path, state, ctx) + } +} diff --git a/core/src/location/indexer/walk.rs b/core/src/location/indexer/walk.rs index bcc29f825..d27079cf0 100644 --- a/core/src/location/indexer/walk.rs +++ b/core/src/location/indexer/walk.rs @@ -6,7 +6,7 @@ use std::{ path::{Path, PathBuf}, }; use tokio::fs; -use tracing::{debug, error}; +use tracing::{error, trace}; use super::{ rules::{IndexerRule, RuleKind}, @@ -48,6 +48,8 @@ impl Ord for WalkEntry { } } +type ToWalkEntry = (PathBuf, Option); + /// This function walks through the filesystem, applying the rules to each entry and then returning /// a list of accepted entries. There are some useful comments in the implementation of this function /// in case of doubts. @@ -55,6 +57,7 @@ pub(super) async fn walk( root: impl AsRef, rules_per_kind: &HashMap>, update_notifier: impl Fn(&Path, usize), + include_root: bool, ) -> Result, IndexerError> { let root = root.as_ref().to_path_buf(); @@ -75,204 +78,265 @@ pub(super) async fn walk( } }; - // Marking with a loop label here in case of rejection or erros, to continue with next entry - 'entries: loop { - let entry = match read_dir.next_entry().await { - Ok(Some(entry)) => entry, - Ok(None) => break, - Err(e) => { - error!( - "Error reading entry in {}: {:#?}", + inner_walk_single_dir( + &root, + (current_path, parent_dir_accepted_by_its_children), + &mut read_dir, + rules_per_kind, + &update_notifier, + &mut indexed_paths, + Some(&mut to_walk), + ) + .await?; + } + + prepared_indexed_paths(root, indexed_paths, include_root).await +} + +async fn inner_walk_single_dir( + root: impl AsRef, + (current_path, parent_dir_accepted_by_its_children): ToWalkEntry, + read_dir: &mut fs::ReadDir, + rules_per_kind: &HashMap>, + update_notifier: &impl Fn(&Path, usize), + indexed_paths: &mut HashMap, + mut maybe_to_walk: Option<&mut VecDeque<(PathBuf, Option)>>, +) -> Result<(), IndexerError> { + let root = root.as_ref(); + + // Marking with a loop label here in case of rejection or erros, to continue with next entry + 'entries: loop { + let entry = match read_dir.next_entry().await { + Ok(Some(entry)) => entry, + Ok(None) => break, + Err(e) => { + error!( + "Error reading entry in {}: {:#?}", + current_path.display(), + e + ); + continue; + } + }; + + // Accept by children has three states, + // None if we don't now yet or if this check doesn't apply + // Some(true) if this check applies and it passes + // Some(false) if this check applies and it was rejected + // and we pass the current parent state to its children + let mut accept_by_children_dir = parent_dir_accepted_by_its_children; + + let current_path = entry.path(); + + update_notifier(¤t_path, indexed_paths.len()); + + trace!( + "Current filesystem path: {}, accept_by_children_dir: {:#?}", + current_path.display(), + accept_by_children_dir + ); + if let Some(reject_rules) = rules_per_kind.get(&RuleKind::RejectFilesByGlob) { + for reject_rule in reject_rules { + // It's ok to unwrap here, reject rules are infallible + if !reject_rule.apply(¤t_path).await.unwrap() { + trace!( + "Path {} rejected by rule {}", current_path.display(), - e - ); - continue; - } - }; - - // Accept by children has three states, - // None if we don't now yet or if this check doesn't apply - // Some(true) if this check applies and it passes - // Some(false) if this check applies and it was rejected - // and we pass the current parent state to its children - let mut accept_by_children_dir = parent_dir_accepted_by_its_children; - - let current_path = entry.path(); - - update_notifier(¤t_path, indexed_paths.len()); - - debug!( - "Current filesystem path: {}, accept_by_children_dir: {:#?}", - current_path.display(), - accept_by_children_dir - ); - if let Some(reject_rules) = rules_per_kind.get(&RuleKind::RejectFilesByGlob) { - for reject_rule in reject_rules { - // It's ok to unwrap here, reject rules are infallible - if !reject_rule.apply(¤t_path).await.unwrap() { - debug!( - "Path {} rejected by rule {}", - current_path.display(), - reject_rule.name - ); - continue 'entries; - } - } - } - - let metadata = entry.metadata().await?; - - // TODO: Hard ignoring symlinks for now, but this should be configurable - if metadata.is_symlink() { - continue 'entries; - } - - let is_dir = metadata.is_dir(); - - if is_dir { - // If it is a directory, first we check if we must reject it and its children entirely - if let Some(reject_by_children_rules) = - rules_per_kind.get(&RuleKind::RejectIfChildrenDirectoriesArePresent) - { - for reject_by_children_rule in reject_by_children_rules { - match reject_by_children_rule.apply(¤t_path).await { - Ok(false) => { - debug!( - "Path {} rejected by rule {}", - current_path.display(), - reject_by_children_rule.name - ); - continue 'entries; - } - Ok(true) => {} - Err(e) => { - error!( - "Error applying rule {} to path {}: {:#?}", - reject_by_children_rule.name, - current_path.display(), - e - ); - continue 'entries; - } - } - } - } - - // Then we check if we must accept it and its children - if let Some(accept_by_children_rules) = - rules_per_kind.get(&RuleKind::AcceptIfChildrenDirectoriesArePresent) - { - for accept_by_children_rule in accept_by_children_rules { - match accept_by_children_rule.apply(¤t_path).await { - Ok(true) => { - accept_by_children_dir = Some(true); - break; - } - Ok(false) => {} - Err(e) => { - error!( - "Error applying rule {} to path {}: {:#?}", - accept_by_children_rule.name, - current_path.display(), - e - ); - continue 'entries; - } - } - } - - // If it wasn't accepted then we mark as rejected - if accept_by_children_dir.is_none() { - debug!( - "Path {} rejected because it didn't passed in any AcceptIfChildrenDirectoriesArePresent rule", - current_path.display() - ); - accept_by_children_dir = Some(false); - } - } - - // Then we mark this directory the be walked in too - to_walk.push_back((entry.path(), accept_by_children_dir)); - } - - let mut accept_by_glob = false; - if let Some(accept_rules) = rules_per_kind.get(&RuleKind::AcceptFilesByGlob) { - for accept_rule in accept_rules { - // It's ok to unwrap here, accept rules are infallible - if accept_rule.apply(¤t_path).await.unwrap() { - debug!( - "Path {} accepted by rule {}", - current_path.display(), - accept_rule.name - ); - accept_by_glob = true; - break; - } - } - if !accept_by_glob { - debug!( - "Path {} reject because it didn't passed in any AcceptFilesByGlob rules", - current_path.display() + reject_rule.name ); continue 'entries; } - } else { - // If there are no accept rules, then accept all paths - accept_by_glob = true; + } + } + + let metadata = entry.metadata().await?; + + // TODO: Hard ignoring symlinks for now, but this should be configurable + if metadata.is_symlink() { + continue 'entries; + } + + let is_dir = metadata.is_dir(); + + if is_dir { + // If it is a directory, first we check if we must reject it and its children entirely + if let Some(reject_by_children_rules) = + rules_per_kind.get(&RuleKind::RejectIfChildrenDirectoriesArePresent) + { + for reject_by_children_rule in reject_by_children_rules { + match reject_by_children_rule.apply(¤t_path).await { + Ok(false) => { + trace!( + "Path {} rejected by rule {}", + current_path.display(), + reject_by_children_rule.name + ); + continue 'entries; + } + Ok(true) => {} + Err(e) => { + trace!( + "Error applying rule {} to path {}: {:#?}", + reject_by_children_rule.name, + current_path.display(), + e + ); + continue 'entries; + } + } + } } - if accept_by_glob - && (accept_by_children_dir.is_none() || accept_by_children_dir.unwrap()) + // Then we check if we must accept it and its children + if let Some(accept_by_children_rules) = + rules_per_kind.get(&RuleKind::AcceptIfChildrenDirectoriesArePresent) { - indexed_paths.insert( - current_path.clone(), - WalkEntry { - path: current_path.clone(), - is_dir, - created_at: metadata.created()?.into(), - }, - ); - - // If the ancestors directories wasn't indexed before, now we do - for ancestor in current_path - .ancestors() - .skip(1) // Skip the current directory as it was already indexed - .take_while(|&ancestor| ancestor != root) - { - debug!("Indexing ancestor {}", ancestor.display()); - if !indexed_paths.contains_key(ancestor) { - indexed_paths.insert( - ancestor.to_path_buf(), - WalkEntry { - path: ancestor.to_path_buf(), - is_dir: true, - created_at: fs::metadata(ancestor).await?.created()?.into(), - }, - ); - } else { - // If indexed_paths contains the current ancestors, then it will contain - // also all if its ancestors too, so we can stop here - break; + for accept_by_children_rule in accept_by_children_rules { + match accept_by_children_rule.apply(¤t_path).await { + Ok(true) => { + accept_by_children_dir = Some(true); + break; + } + Ok(false) => {} + Err(e) => { + error!( + "Error applying rule {} to path {}: {:#?}", + accept_by_children_rule.name, + current_path.display(), + e + ); + continue 'entries; + } } } + + // If it wasn't accepted then we mark as rejected + if accept_by_children_dir.is_none() { + trace!( + "Path {} rejected because it didn't passed in any AcceptIfChildrenDirectoriesArePresent rule", + current_path.display() + ); + accept_by_children_dir = Some(false); + } + } + + // Then we mark this directory the be walked in too + if let Some(ref mut to_walk) = maybe_to_walk { + to_walk.push_back((entry.path(), accept_by_children_dir)); + } + } + + let mut accept_by_glob = false; + if let Some(accept_rules) = rules_per_kind.get(&RuleKind::AcceptFilesByGlob) { + for accept_rule in accept_rules { + // It's ok to unwrap here, accept rules are infallible + if accept_rule.apply(¤t_path).await.unwrap() { + trace!( + "Path {} accepted by rule {}", + current_path.display(), + accept_rule.name + ); + accept_by_glob = true; + break; + } + } + if !accept_by_glob { + trace!( + "Path {} reject because it didn't passed in any AcceptFilesByGlob rules", + current_path.display() + ); + continue 'entries; + } + } else { + // If there are no accept rules, then accept all paths + accept_by_glob = true; + } + + if accept_by_glob && (accept_by_children_dir.is_none() || accept_by_children_dir.unwrap()) { + indexed_paths.insert( + current_path.clone(), + WalkEntry { + path: current_path.clone(), + is_dir, + created_at: metadata.created()?.into(), + }, + ); + + // If the ancestors directories wasn't indexed before, now we do + for ancestor in current_path + .ancestors() + .skip(1) // Skip the current directory as it was already indexed + .take_while(|&ancestor| ancestor != root) + { + trace!("Indexing ancestor {}", ancestor.display()); + if !indexed_paths.contains_key(ancestor) { + indexed_paths.insert( + ancestor.to_path_buf(), + WalkEntry { + path: ancestor.to_path_buf(), + is_dir: true, + created_at: fs::metadata(ancestor).await?.created()?.into(), + }, + ); + } else { + // If indexed_paths contains the current ancestors, then it will contain + // also all if its ancestors too, so we can stop here + break; + } } } } + Ok(()) +} + +async fn prepared_indexed_paths( + root: PathBuf, + indexed_paths: HashMap, + include_root: bool, +) -> Result, IndexerError> { let mut indexed_paths = indexed_paths.into_values().collect::>(); - // Also adding the root location path - let root_created_at = fs::metadata(&root).await?.created()?.into(); - indexed_paths.push(WalkEntry { - path: root, - is_dir: true, - created_at: root_created_at, - }); + + if include_root { + // Also adding the root location path + let root_created_at = fs::metadata(&root).await?.created()?.into(); + indexed_paths.push(WalkEntry { + path: root, + is_dir: true, + created_at: root_created_at, + }); + } + // Sorting so we can give each path a crescent id given the filesystem hierarchy indexed_paths.sort(); Ok(indexed_paths) } +pub(super) async fn walk_single_dir( + root: impl AsRef, + rules_per_kind: &HashMap>, + update_notifier: impl Fn(&Path, usize), +) -> Result, IndexerError> { + let root = root.as_ref().to_path_buf(); + + let mut read_dir = fs::read_dir(&root).await?; + let mut indexed_paths = HashMap::new(); + + inner_walk_single_dir( + &root, + (root.clone(), None), + &mut read_dir, + rules_per_kind, + &update_notifier, + &mut indexed_paths, + None, + ) + .await?; + + prepared_indexed_paths(root, indexed_paths, false).await +} + #[cfg(test)] mod tests { use super::super::rules::ParametersPerKind; @@ -377,7 +441,7 @@ mod tests { .into_iter() .collect::>(); - let actual = walk(root_path.to_path_buf(), &HashMap::new(), |_, _| {}) + let actual = walk(root_path.to_path_buf(), &HashMap::new(), |_, _| {}, true) .await .unwrap() .into_iter() @@ -416,7 +480,7 @@ mod tests { .into_iter() .collect::>(); - let actual = walk(root_path.to_path_buf(), &only_photos_rule, |_, _| {}) + let actual = walk(root_path.to_path_buf(), &only_photos_rule, |_, _| {}, true) .await .unwrap() .into_iter() @@ -470,7 +534,7 @@ mod tests { .into_iter() .collect::>(); - let actual = walk(root_path.to_path_buf(), &git_repos, |_, _| {}) + let actual = walk(root_path.to_path_buf(), &git_repos, |_, _| {}, true) .await .unwrap() .into_iter() @@ -543,6 +607,7 @@ mod tests { root_path.to_path_buf(), &git_repos_no_deps_no_build_dirs, |_, _| {}, + true, ) .await .unwrap() diff --git a/core/src/location/manager/helpers.rs b/core/src/location/manager/helpers.rs index 8ed1961b4..ae7d645ab 100644 --- a/core/src/location/manager/helpers.rs +++ b/core/src/location/manager/helpers.rs @@ -2,7 +2,7 @@ use crate::{library::Library, prisma::location}; use std::{ collections::{HashMap, HashSet}, - path::{Path, PathBuf}, + path::PathBuf, time::Duration, }; @@ -114,25 +114,6 @@ pub(super) async fn get_location(location_id: i32, library: &Library) -> Option< }) } -pub(super) fn subtract_location_path( - location_path: impl AsRef, - current_path: impl AsRef, -) -> Option { - let location_path = location_path.as_ref(); - let current_path = current_path.as_ref(); - - if let Ok(stripped) = current_path.strip_prefix(location_path) { - Some(stripped.to_path_buf()) - } else { - error!( - "Failed to strip location root path ({}) from current path ({})", - location_path.display(), - current_path.display() - ); - None - } -} - pub(super) async fn handle_remove_location_request( location_id: LocationId, library: Library, diff --git a/core/src/location/manager/mod.rs b/core/src/location/manager/mod.rs index bd2c5a702..aa5b45174 100644 --- a/core/src/location/manager/mod.rs +++ b/core/src/location/manager/mod.rs @@ -20,14 +20,14 @@ use tracing::{debug, error}; #[cfg(feature = "location-watcher")] use tokio::sync::mpsc; +use super::{file_path_helper::FilePathError, LocationId}; + #[cfg(feature = "location-watcher")] mod watcher; #[cfg(feature = "location-watcher")] mod helpers; -pub type LocationId = i32; - #[derive(Clone, Copy, Debug)] #[allow(dead_code)] enum ManagementMessageAction { @@ -89,12 +89,12 @@ pub enum LocationManagerError { LocationMissingLocalPath(LocationId), #[error("Tried to update a non-existing file: ")] UpdateNonExistingFile(PathBuf), - #[error("Unable to extract materialized path from location: ")] - UnableToExtractMaterializedPath(LocationId, PathBuf), #[error("Database error: {0}")] DatabaseError(#[from] prisma_client_rust::QueryError), #[error("I/O error: {0}")] IOError(#[from] io::Error), + #[error("File path related error (error: {0})")] + FilePathError(#[from] FilePathError), } type OnlineLocations = BTreeSet>; diff --git a/core/src/location/manager/watcher/linux.rs b/core/src/location/manager/watcher/linux.rs index 1e347d648..1ff6ccbd3 100644 --- a/core/src/location/manager/watcher/linux.rs +++ b/core/src/location/manager/watcher/linux.rs @@ -1,6 +1,6 @@ use crate::{ library::Library, - location::{indexer::indexer_job::indexer_job_location, manager::LocationManagerError}, + location::{location_with_indexer_rules, manager::LocationManagerError}, }; use async_trait::async_trait; @@ -26,7 +26,7 @@ impl EventHandler for LinuxEventHandler { async fn handle_event( &mut self, - location: indexer_job_location::Data, + location: location_with_indexer_rules::Data, library: &Library, event: Event, ) -> Result<(), LocationManagerError> { diff --git a/core/src/location/manager/watcher/macos.rs b/core/src/location/manager/watcher/macos.rs index 1aa7625c6..a72e40b52 100644 --- a/core/src/location/manager/watcher/macos.rs +++ b/core/src/location/manager/watcher/macos.rs @@ -1,6 +1,6 @@ use crate::{ library::Library, - location::{indexer::indexer_job::indexer_job_location, manager::LocationManagerError}, + location::{location_with_indexer_rules, manager::LocationManagerError}, }; use async_trait::async_trait; @@ -32,7 +32,7 @@ impl EventHandler for MacOsEventHandler { async fn handle_event( &mut self, - location: indexer_job_location::Data, + location: location_with_indexer_rules::Data, library: &Library, event: Event, ) -> Result<(), LocationManagerError> { diff --git a/core/src/location/manager/watcher/mod.rs b/core/src/location/manager/watcher/mod.rs index 91a3c2e42..4abfeba3c 100644 --- a/core/src/location/manager/watcher/mod.rs +++ b/core/src/location/manager/watcher/mod.rs @@ -1,6 +1,7 @@ use crate::{ library::Library, - prisma::{file_path, location}, + location::{find_location, location_with_indexer_rules, LocationId}, + prisma::location, }; use std::{ @@ -18,10 +19,7 @@ use tokio::{ }; use tracing::{debug, error, warn}; -use super::{ - super::{fetch_location, indexer::indexer_job::indexer_job_location}, - LocationId, LocationManagerError, -}; +use super::LocationManagerError; mod linux; mod macos; @@ -40,8 +38,6 @@ type Handler = macos::MacOsEventHandler; #[cfg(target_os = "windows")] type Handler = windows::WindowsEventHandler; -file_path::include!(file_path_with_object { object }); - pub(super) type IgnorePath = (PathBuf, bool); #[async_trait] @@ -52,7 +48,7 @@ trait EventHandler { async fn handle_event( &mut self, - location: indexer_job_location::Data, + location: location_with_indexer_rules::Data, library: &Library, event: Event, ) -> Result<(), LocationManagerError>; @@ -173,8 +169,8 @@ impl LocationWatcher { return Ok(()); } - let Some(location) = fetch_location(library, location_id) - .include(indexer_job_location::include()) + let Some(location) = find_location(library, location_id) + .include(location_with_indexer_rules::include()) .exec() .await? else { diff --git a/core/src/location/manager/watcher/utils.rs b/core/src/location/manager/watcher/utils.rs index 48ff69a66..243e9ef42 100644 --- a/core/src/location/manager/watcher/utils.rs +++ b/core/src/location/manager/watcher/utils.rs @@ -3,12 +3,16 @@ use crate::{ library::Library, location::{ delete_directory, - file_path_helper::create_file_path, - indexer::indexer_job::indexer_job_location, - manager::{helpers::subtract_location_path, LocationId, LocationManagerError}, + file_path_helper::{ + extract_materialized_path, file_path_with_object, get_existing_file_or_directory, + get_existing_file_path_with_object, get_parent_dir, MaterializedPath, + }, + location_with_indexer_rules, + manager::LocationManagerError, }, object::{ - identifier_job::FileMetadata, + file_identifier::FileMetadata, + object_just_id_has_thumbnail, preview::{ can_generate_thumbnail_for_image, generate_image_thumbnail, THUMBNAIL_CACHE_DIR_NAME, }, @@ -19,7 +23,6 @@ use crate::{ use std::{ collections::HashSet, - ffi::OsStr, path::{Path, PathBuf}, str::FromStr, }; @@ -33,8 +36,6 @@ use tokio::{fs, io::ErrorKind}; use tracing::{error, info, trace, warn}; use uuid::Uuid; -use super::file_path_with_object; - pub(super) fn check_event(event: &Event, ignore_paths: &HashSet) -> bool { // if path includes .DS_Store, .spacedrive or is in the `ignore_paths` set, we ignore !event.paths.iter().any(|p| { @@ -47,7 +48,7 @@ pub(super) fn check_event(event: &Event, ignore_paths: &HashSet) -> boo } pub(super) async fn create_dir( - location: &indexer_job_location::Data, + location: &location_with_indexer_rules::Data, event: &Event, library: &Library, ) -> Result<(), LocationManagerError> { @@ -61,11 +62,10 @@ pub(super) async fn create_dir( event.paths[0].display() ); - let Some(subpath) = subtract_location_path(&location.path, &event.paths[0]) else { - return Ok(()); - }; + let materialized_path = + MaterializedPath::new(location.id, &location.path, &event.paths[0], true)?; - let parent_directory = get_parent_dir(location.id, &subpath, library).await?; + let parent_directory = get_parent_dir(&materialized_path, &library.db).await?; trace!("parent_directory: {:?}", parent_directory); @@ -74,23 +74,10 @@ pub(super) async fn create_dir( return Ok(()) }; - let created_path = create_file_path( - library, - location.id, - subpath - .to_str() - .map(str::to_string) - .expect("Found non-UTF-8 path"), - subpath - .file_stem() - .and_then(OsStr::to_str) - .map(str::to_string) - .expect("Found non-UTF-8 path"), - "".to_string(), - Some(parent_directory.id), - true, - ) - .await?; + let created_path = library + .last_file_path_id_manager + .create_file_path(&library.db, materialized_path, Some(parent_directory.id)) + .await?; info!("Created path: {}", created_path.materialized_path); @@ -100,7 +87,7 @@ pub(super) async fn create_dir( } pub(super) async fn create_file( - location: &indexer_job_location::Data, + location: &location_with_indexer_rules::Data, event: &Event, library: &Library, ) -> Result<(), LocationManagerError> { @@ -108,44 +95,29 @@ pub(super) async fn create_file( return Ok(()); } + let full_path = &event.paths[0]; + trace!( "Location: creating file: {}", &location.path, - event.paths[0].display() + full_path.display() ); let db = &library.db; - let Some(materialized_path) = subtract_location_path(&location.path, &event.paths[0]) else { return Ok(()) }; + let materialized_path = MaterializedPath::new(location.id, &location.path, full_path, false)?; let Some(parent_directory) = - get_parent_dir(location.id, &materialized_path, library).await? + get_parent_dir(&materialized_path, &library.db).await? else { warn!("Watcher found a path without parent"); return Ok(()) }; - let created_file = create_file_path( - library, - location.id, - materialized_path - .to_str() - .expect("Found non-UTF-8 path") - .to_string(), - materialized_path - .file_stem() - .unwrap_or_default() - .to_str() - .expect("Found non-UTF-8 path") - .to_string(), - materialized_path - .extension() - .map(|ext| ext.to_str().expect("Found non-UTF-8 path").to_string()) - .unwrap_or_default(), - Some(parent_directory.id), - false, - ) - .await?; + let created_file = library + .last_file_path_id_manager + .create_file_path(&library.db, materialized_path, Some(parent_directory.id)) + .await?; info!("Created path: {}", created_file.materialized_path); @@ -164,8 +136,6 @@ pub(super) async fn create_file( .exec() .await?; - object::select!(object_id { id has_thumbnail }); - let size_str = fs_metadata.len().to_string(); let object = if let Some(object) = existing_object { @@ -179,7 +149,7 @@ pub(super) async fn create_file( ), ], ) - .select(object_id::select()) + .select(object_just_id_has_thumbnail::select()) .exec() .await? } else { @@ -194,7 +164,7 @@ pub(super) async fn create_file( object::size_in_bytes::set(size_str.clone()), ], ) - .select(object_id::select()) + .select(object_just_id_has_thumbnail::select()) .exec() .await? }; @@ -218,12 +188,15 @@ pub(super) async fn create_file( } pub(super) async fn file_creation_or_update( - location: &indexer_job_location::Data, + location: &location_with_indexer_rules::Data, event: &Event, library: &Library, ) -> Result<(), LocationManagerError> { - if let Some(ref file_path) = - get_existing_file_path(location, &event.paths[0], false, library).await? + if let Some(ref file_path) = get_existing_file_path_with_object( + MaterializedPath::new(location.id, &location.path, &event.paths[0], false)?, + &library.db, + ) + .await? { inner_update_file(location, file_path, event, library).await } else { @@ -233,13 +206,16 @@ pub(super) async fn file_creation_or_update( } pub(super) async fn update_file( - location: &indexer_job_location::Data, + location: &location_with_indexer_rules::Data, event: &Event, library: &Library, ) -> Result<(), LocationManagerError> { if location.node_id == library.node_local_id { - if let Some(ref file_path) = - get_existing_file_path(location, &event.paths[0], false, library).await? + if let Some(ref file_path) = get_existing_file_path_with_object( + MaterializedPath::new(location.id, &location.path, &event.paths[0], false)?, + &library.db, + ) + .await? { let ret = inner_update_file(location, file_path, event, library).await; invalidate_query!(library, "locations.getExplorerData"); @@ -255,7 +231,7 @@ pub(super) async fn update_file( } async fn inner_update_file( - location: &indexer_job_location::Data, + location: &location_with_indexer_rules::Data, file_path: &file_path_with_object::Data, event: &Event, library: &Library, @@ -321,7 +297,7 @@ async fn inner_update_file( } pub(super) async fn rename_both_event( - location: &indexer_job_location::Data, + location: &location_with_indexer_rules::Data, event: &Event, library: &Library, ) -> Result<(), LocationManagerError> { @@ -331,21 +307,24 @@ pub(super) async fn rename_both_event( pub(super) async fn rename( new_path: impl AsRef, old_path: impl AsRef, - location: &indexer_job_location::Data, + location: &location_with_indexer_rules::Data, library: &Library, ) -> Result<(), LocationManagerError> { - let mut old_path_materialized = extract_materialized_path(location, old_path.as_ref())? - .to_str() - .expect("Found non-UTF-8 path") - .to_string(); + let mut old_path_materialized = + extract_materialized_path(location.id, &location.path, old_path.as_ref())? + .to_str() + .expect("Found non-UTF-8 path") + .to_string(); - let new_path_materialized = extract_materialized_path(location, new_path.as_ref())?; + let new_path_materialized = + extract_materialized_path(location.id, &location.path, new_path.as_ref())?; let mut new_path_materialized_str = new_path_materialized .to_str() .expect("Found non-UTF-8 path") .to_string(); - if let Some(file_path) = get_existing_file_or_directory(location, old_path, library).await? { + if let Some(file_path) = get_existing_file_or_directory(location, old_path, &library.db).await? + { // If the renamed path is a directory, we have to update every successor if file_path.is_dir { if !old_path_materialized.ends_with('/') { @@ -406,7 +385,7 @@ pub(super) async fn rename( } pub(super) async fn remove_event( - location: &indexer_job_location::Data, + location: &location_with_indexer_rules::Data, event: &Event, remove_kind: RemoveKind, library: &Library, @@ -415,7 +394,7 @@ pub(super) async fn remove_event( // if it doesn't either way, then we don't care if let Some(file_path) = - get_existing_file_or_directory(location, &event.paths[0], library).await? + get_existing_file_or_directory(location, &event.paths[0], &library.db).await? { // check file still exists on disk match fs::metadata(&event.paths[0]).await { @@ -458,91 +437,6 @@ pub(super) async fn remove_event( Ok(()) } -fn extract_materialized_path( - location: &indexer_job_location::Data, - path: impl AsRef, -) -> Result { - subtract_location_path(&location.path, &path).ok_or_else(|| { - LocationManagerError::UnableToExtractMaterializedPath( - location.id, - path.as_ref().to_path_buf(), - ) - }) -} - -async fn get_existing_file_path( - location: &indexer_job_location::Data, - path: impl AsRef, - is_dir: bool, - library: &Library, -) -> Result, LocationManagerError> { - let mut materialized_path = extract_materialized_path(location, path)? - .to_str() - .expect("Found non-UTF-8 path") - .to_string(); - if is_dir && !materialized_path.ends_with('/') { - materialized_path += "/"; - } - - library - .db - .file_path() - .find_first(vec![file_path::materialized_path::equals( - materialized_path, - )]) - // include object for orphan check - .include(file_path_with_object::include()) - .exec() - .await - .map_err(Into::into) -} - -async fn get_existing_file_or_directory( - location: &indexer_job_location::Data, - path: impl AsRef, - library: &Library, -) -> Result, LocationManagerError> { - let mut maybe_file_path = - get_existing_file_path(location, path.as_ref(), false, library).await?; - // First we just check if this path was a file in our db, if it isn't then we check for a directory - if maybe_file_path.is_none() { - maybe_file_path = get_existing_file_path(location, path.as_ref(), true, library).await?; - } - - Ok(maybe_file_path) -} - -async fn get_parent_dir( - location_id: LocationId, - path: impl AsRef, - library: &Library, -) -> Result, LocationManagerError> { - let mut parent_path_str = path - .as_ref() - .parent() - // We have an "/" `materialized_path` for each location_id - .unwrap_or_else(|| Path::new("/")) - .to_str() - .expect("Found non-UTF-8 path") - .to_string(); - - // As we're looking specifically for a parent directory, it must end with '/' - if !parent_path_str.ends_with('/') { - parent_path_str += "/"; - } - - library - .db - .file_path() - .find_first(vec![ - file_path::location_id::equals(location_id), - file_path::materialized_path::equals(parent_path_str), - ]) - .exec() - .await - .map_err(Into::into) -} - async fn generate_thumbnail( extension: &str, cas_id: &str, diff --git a/core/src/location/manager/watcher/windows.rs b/core/src/location/manager/watcher/windows.rs index 879636ef0..7c5c7c069 100644 --- a/core/src/location/manager/watcher/windows.rs +++ b/core/src/location/manager/watcher/windows.rs @@ -1,6 +1,6 @@ use crate::{ library::Library, - location::{indexer::indexer_job::indexer_job_location, manager::LocationManagerError}, + location::{location_with_indexer_rules, manager::LocationManagerError}, }; use async_trait::async_trait; @@ -33,7 +33,7 @@ impl EventHandler for WindowsEventHandler { async fn handle_event( &mut self, - location: indexer_job_location::Data, + location: location_with_indexer_rules::Data, library: &Library, event: Event, ) -> Result<(), LocationManagerError> { diff --git a/core/src/location/mod.rs b/core/src/location/mod.rs index b2bd42749..c2e426eb1 100644 --- a/core/src/location/mod.rs +++ b/core/src/location/mod.rs @@ -3,16 +3,19 @@ use crate::{ job::Job, library::Library, object::{ - identifier_job::full_identifier_job::{FullFileIdentifierJob, FullFileIdentifierJobInit}, - preview::{ThumbnailJob, ThumbnailJobInit}, + file_identifier::{ + file_identifier_job::{FileIdentifierJob, FileIdentifierJobInit}, + shallow_file_identifier_job::{ShallowFileIdentifierJob, ShallowFileIdentifierJobInit}, + }, + preview::{ + shallow_thumbnailer_job::{ShallowThumbnailerJob, ShallowThumbnailerJobInit}, + thumbnailer_job::{ThumbnailerJob, ThumbnailerJobInit}, + }, }, prisma::{file_path, indexer_rules_in_location, location, node, object}, sync, }; -use rspc::Type; -use serde::Deserialize; -use serde_json::json; use std::{ collections::HashSet, ffi::OsStr, @@ -20,6 +23,9 @@ use std::{ }; use prisma_client_rust::QueryError; +use rspc::Type; +use serde::Deserialize; +use serde_json::json; use tokio::{fs, io}; use tracing::{debug, info}; use uuid::Uuid; @@ -31,10 +37,22 @@ mod manager; mod metadata; pub use error::LocationError; -use indexer::indexer_job::{indexer_job_location, IndexerJob, IndexerJobInit}; +use file_path_helper::file_path_just_object_id; +use indexer::{ + indexer_job::IndexerJob, + shallow_indexer_job::{ShallowIndexerJob, ShallowIndexerJobInit}, + IndexerJobInit, +}; pub use manager::{LocationManager, LocationManagerError}; use metadata::SpacedriveLocationMetadataFile; +pub type LocationId = i32; + +// Location includes! +location::include!(location_with_indexer_rules { + indexer_rules: select { indexer_rule } +}); + /// `LocationCreateArgs` is the argument received from the client using `rspc` to create a new location. /// It has the actual path and a vector of indexer rules ids, to create many-to-many relationships /// between the location and indexer rules. @@ -45,7 +63,10 @@ pub struct LocationCreateArgs { } impl LocationCreateArgs { - pub async fn create(self, ctx: &Library) -> Result { + pub async fn create( + self, + library: &Library, + ) -> Result { let path_metadata = match fs::metadata(&self.path).await { Ok(metadata) => metadata, Err(e) if e.kind() == io::ErrorKind::NotFound => { @@ -67,10 +88,10 @@ impl LocationCreateArgs { } if let Some(metadata) = SpacedriveLocationMetadataFile::try_load(&self.path).await? { - return if metadata.has_library(ctx.id) { + return if metadata.has_library(library.id) { Err(LocationError::NeedRelink { // SAFETY: This unwrap is ok as we checked that we have this library_id - old_path: metadata.location_path(ctx.id).unwrap().to_path_buf(), + old_path: metadata.location_path(library.id).unwrap().to_path_buf(), new_path: self.path, }) } else { @@ -84,18 +105,21 @@ impl LocationCreateArgs { ); let uuid = Uuid::new_v4(); - let location = create_location(ctx, uuid, &self.path, &self.indexer_rules_ids).await?; + let location = create_location(library, uuid, &self.path, &self.indexer_rules_ids).await?; // Write a location metadata on a .spacedrive file SpacedriveLocationMetadataFile::create_and_save( - ctx.id, + library.id, uuid, &self.path, location.name.clone(), ) .await?; - ctx.location_manager().add(location.id, ctx.clone()).await?; + library + .location_manager() + .add(location.id, library.clone()) + .await?; info!("Created location: {location:?}"); @@ -104,39 +128,42 @@ impl LocationCreateArgs { pub async fn add_library( self, - ctx: &Library, - ) -> Result { + library: &Library, + ) -> Result { let mut metadata = SpacedriveLocationMetadataFile::try_load(&self.path) .await? .ok_or_else(|| LocationError::MetadataNotFound(self.path.clone()))?; - if metadata.has_library(ctx.id) { + if metadata.has_library(library.id) { return Err(LocationError::NeedRelink { // SAFETY: This unwrap is ok as we checked that we have this library_id - old_path: metadata.location_path(ctx.id).unwrap().to_path_buf(), + old_path: metadata.location_path(library.id).unwrap().to_path_buf(), new_path: self.path, }); } debug!( "Trying to add a new library (library_id = {}) to an already existing location '{}'", - ctx.id, + library.id, self.path.display() ); let uuid = Uuid::new_v4(); - let location = create_location(ctx, uuid, &self.path, &self.indexer_rules_ids).await?; + let location = create_location(library, uuid, &self.path, &self.indexer_rules_ids).await?; metadata - .add_library(ctx.id, uuid, &self.path, location.name.clone()) + .add_library(library.id, uuid, &self.path, location.name.clone()) .await?; - ctx.location_manager().add(location.id, ctx.clone()).await?; + library + .location_manager() + .add(location.id, library.clone()) + .await?; info!( "Added library (library_id = {}) to location: {location:?}", - ctx.id + library.id ); Ok(location) @@ -160,11 +187,11 @@ pub struct LocationUpdateArgs { } impl LocationUpdateArgs { - pub async fn update(self, ctx: &Library) -> Result<(), LocationError> { - let Library { sync, db, .. } = &ctx; + pub async fn update(self, library: &Library) -> Result<(), LocationError> { + let Library { sync, db, .. } = &library; - let location = fetch_location(ctx, self.id) - .include(location::include!({ indexer_rules })) + let location = find_location(library, self.id) + .include(location_with_indexer_rules::include()) .exec() .await? .ok_or(LocationError::IdNotFound(self.id))?; @@ -215,11 +242,11 @@ impl LocationUpdateArgs { ) .await?; - if location.node_id == ctx.node_local_id { + if location.node_id == library.node_local_id { if let Some(mut metadata) = SpacedriveLocationMetadataFile::try_load(&location.path).await? { - metadata.update(ctx.id, self.name.unwrap()).await?; + metadata.update(library.id, self.name.unwrap()).await?; } } } @@ -227,7 +254,7 @@ impl LocationUpdateArgs { let current_rules_ids = location .indexer_rules .iter() - .map(|r| r.indexer_rule_id) + .map(|r| r.indexer_rule.id) .collect::>(); let new_rules_ids = self.indexer_rules_ids.into_iter().collect::>(); @@ -243,7 +270,8 @@ impl LocationUpdateArgs { .collect::>(); if !rule_ids_to_remove.is_empty() { - ctx.db + library + .db .indexer_rules_in_location() .delete_many(vec![ indexer_rules_in_location::location_id::equals(self.id), @@ -254,7 +282,7 @@ impl LocationUpdateArgs { } if !rule_ids_to_add.is_empty() { - link_location_and_indexer_rules(ctx, self.id, &rule_ids_to_add).await?; + link_location_and_indexer_rules(library, self.id, &rule_ids_to_add).await?; } } @@ -262,18 +290,20 @@ impl LocationUpdateArgs { } } -pub fn fetch_location(ctx: &Library, location_id: i32) -> location::FindUnique { - ctx.db +pub fn find_location(library: &Library, location_id: i32) -> location::FindUnique { + library + .db .location() .find_unique(location::id::equals(location_id)) } async fn link_location_and_indexer_rules( - ctx: &Library, + library: &Library, location_id: i32, rules_ids: &[i32], ) -> Result<(), LocationError> { - ctx.db + library + .db .indexer_rules_in_location() .create_many( rules_ids @@ -288,43 +318,137 @@ async fn link_location_and_indexer_rules( } pub async fn scan_location( - ctx: &Library, - location: indexer_job_location::Data, + library: &Library, + location: location_with_indexer_rules::Data, ) -> Result<(), LocationError> { - if location.node_id != ctx.node_local_id { + if location.node_id != library.node_local_id { return Ok(()); } - ctx.queue_job(Job::new( - FullFileIdentifierJobInit { - location_id: location.id, - sub_path: None, - }, - FullFileIdentifierJob {}, - )) - .await; + library + .queue_job(Job::new( + FileIdentifierJobInit { + location: location::Data::from(&location), + sub_path: None, + }, + FileIdentifierJob {}, + )) + .await; - ctx.queue_job(Job::new( - ThumbnailJobInit { - location_id: location.id, - root_path: PathBuf::new(), - background: true, - }, - ThumbnailJob {}, - )) - .await; + library + .queue_job(Job::new( + ThumbnailerJobInit { + location: location::Data::from(&location), + sub_path: None, + background: true, + }, + ThumbnailerJob {}, + )) + .await; - ctx.spawn_job(Job::new(IndexerJobInit { location }, IndexerJob {})) + library + .spawn_job(Job::new( + IndexerJobInit { + location, + sub_path: None, + }, + IndexerJob {}, + )) + .await; + + Ok(()) +} + +#[allow(dead_code)] +pub async fn scan_location_sub_path( + library: &Library, + location: location_with_indexer_rules::Data, + sub_path: impl AsRef, +) -> Result<(), LocationError> { + let sub_path = sub_path.as_ref().to_path_buf(); + if location.node_id != library.node_local_id { + return Ok(()); + } + + library + .queue_job(Job::new( + FileIdentifierJobInit { + location: location::Data::from(&location), + sub_path: Some(sub_path.clone()), + }, + FileIdentifierJob {}, + )) + .await; + + library + .queue_job(Job::new( + ThumbnailerJobInit { + location: location::Data::from(&location), + sub_path: Some(sub_path.clone()), + background: true, + }, + ThumbnailerJob {}, + )) + .await; + + library + .spawn_job(Job::new( + IndexerJobInit { + location, + sub_path: Some(sub_path), + }, + IndexerJob {}, + )) + .await; + + Ok(()) +} + +pub async fn light_scan_location( + library: &Library, + location: location_with_indexer_rules::Data, + sub_path: impl AsRef, +) -> Result<(), LocationError> { + let sub_path = sub_path.as_ref().to_path_buf(); + if location.node_id != library.node_local_id { + return Ok(()); + } + + library + .queue_job(Job::new( + ShallowFileIdentifierJobInit { + location: location::Data::from(&location), + sub_path: sub_path.clone(), + }, + ShallowFileIdentifierJob {}, + )) + .await; + + library + .queue_job(Job::new( + ShallowThumbnailerJobInit { + location: location::Data::from(&location), + sub_path: sub_path.clone(), + }, + ShallowThumbnailerJob {}, + )) + .await; + + library + .spawn_job(Job::new( + ShallowIndexerJobInit { location, sub_path }, + ShallowIndexerJob {}, + )) .await; Ok(()) } pub async fn relink_location( - ctx: &Library, + library: &Library, location_path: impl AsRef, ) -> Result<(), LocationError> { - let Library { db, id, sync, .. } = &ctx; + let Library { db, id, sync, .. } = &library; let mut metadata = SpacedriveLocationMetadataFile::try_load(&location_path) .await? @@ -332,7 +456,7 @@ pub async fn relink_location( metadata.relink(*id, &location_path).await?; - let pub_id = metadata.location_pub_id(ctx.id)?.as_ref().to_vec(); + let pub_id = metadata.location_pub_id(library.id)?.as_ref().to_vec(); let path = location_path .as_ref() .to_str() @@ -359,12 +483,12 @@ pub async fn relink_location( } async fn create_location( - ctx: &Library, + library: &Library, location_pub_id: Uuid, location_path: impl AsRef, indexer_rules_ids: &[i32], -) -> Result { - let Library { db, sync, .. } = &ctx; +) -> Result { + let Library { db, sync, .. } = &library; let location_path = location_path.as_ref(); @@ -387,7 +511,7 @@ async fn create_location( pub_id: location_pub_id.as_bytes().to_vec(), }, [ - ("node", json!({ "pub_id": ctx.id.as_bytes() })), + ("node", json!({ "pub_id": library.id.as_bytes() })), ("name", json!(&name)), ("path", json!(&path)), ], @@ -397,39 +521,40 @@ async fn create_location( location_pub_id.as_bytes().to_vec(), name, path, - node::id::equals(ctx.node_local_id), + node::id::equals(library.node_local_id), vec![], ) - .include(indexer_job_location::include()), + .include(location_with_indexer_rules::include()), ) .await?; debug!("created in db"); if !indexer_rules_ids.is_empty() { - link_location_and_indexer_rules(ctx, location.id, indexer_rules_ids).await?; + link_location_and_indexer_rules(library, location.id, indexer_rules_ids).await?; } // Updating our location variable to include information about the indexer rules - let location = fetch_location(ctx, location.id) - .include(indexer_job_location::include()) + let location = find_location(library, location.id) + .include(location_with_indexer_rules::include()) .exec() .await? .ok_or(LocationError::IdNotFound(location.id))?; - invalidate_query!(ctx, "locations.list"); + invalidate_query!(library, "locations.list"); Ok(location) } -pub async fn delete_location(ctx: &Library, location_id: i32) -> Result<(), LocationError> { - let Library { db, .. } = ctx; +pub async fn delete_location(library: &Library, location_id: i32) -> Result<(), LocationError> { + let Library { db, .. } = library; - ctx.location_manager() - .remove(location_id, ctx.clone()) + library + .location_manager() + .remove(location_id, library.clone()) .await?; - delete_directory(ctx, location_id, None).await?; + delete_directory(library, location_id, None).await?; db.indexer_rules_in_location() .delete_many(vec![indexer_rules_in_location::location_id::equals( @@ -444,26 +569,24 @@ pub async fn delete_location(ctx: &Library, location_id: i32) -> Result<(), Loca .exec() .await?; - if location.node_id == ctx.node_local_id { + if location.node_id == library.node_local_id { if let Ok(Some(mut metadata)) = SpacedriveLocationMetadataFile::try_load(&location.path).await { - metadata.remove_library(ctx.id).await?; + metadata.remove_library(library.id).await?; } } info!("Location {} deleted", location_id); - invalidate_query!(ctx, "locations.list"); + invalidate_query!(library, "locations.list"); Ok(()) } -file_path::select!(file_path_object_id_only { object_id }); - /// Will delete a directory recursively with Objects if left as orphans /// this function is used to delete a location and when ingesting directory deletion events pub async fn delete_directory( - ctx: &Library, + library: &Library, location_id: i32, parent_materialized_path: Option, ) -> Result<(), QueryError> { @@ -477,11 +600,11 @@ pub async fn delete_directory( }; // Fetching all object_ids from all children file_paths - let object_ids = ctx + let object_ids = library .db .file_path() .find_many(children_params.clone()) - .select(file_path_object_id_only::select()) + .select(file_path_just_object_id::select()) .exec() .await? .into_iter() @@ -490,14 +613,16 @@ pub async fn delete_directory( // WARNING: file_paths must be deleted before objects, as they reference objects through object_id // delete all children file_paths - ctx.db + library + .db .file_path() .delete_many(children_params) .exec() .await?; // delete all children objects - ctx.db + library + .db .object() .delete_many(vec![ object::id::in_vec(object_ids), @@ -507,11 +632,55 @@ pub async fn delete_directory( .exec() .await?; - invalidate_query!(ctx, "locations.getExplorerData"); + invalidate_query!(library, "locations.getExplorerData"); Ok(()) } +impl From for location::Data { + fn from(data: location_with_indexer_rules::Data) -> Self { + Self { + id: data.id, + pub_id: data.pub_id, + path: data.path, + node_id: data.node_id, + name: data.name, + total_capacity: data.total_capacity, + available_capacity: data.available_capacity, + is_archived: data.is_archived, + generate_preview_media: data.generate_preview_media, + sync_preview_media: data.sync_preview_media, + hidden: data.hidden, + date_created: data.date_created, + node: None, + file_paths: None, + indexer_rules: None, + } + } +} + +impl From<&location_with_indexer_rules::Data> for location::Data { + fn from(data: &location_with_indexer_rules::Data) -> Self { + Self { + id: data.id, + pub_id: data.pub_id.clone(), + path: data.path.clone(), + node_id: data.node_id, + name: data.name.clone(), + total_capacity: data.total_capacity, + available_capacity: data.available_capacity, + is_archived: data.is_archived, + generate_preview_media: data.generate_preview_media, + sync_preview_media: data.sync_preview_media, + hidden: data.hidden, + date_created: data.date_created, + node: None, + file_paths: None, + indexer_rules: None, + } + } +} + // check if a path exists in our database at that location // pub async fn check_virtual_path_exists( // library: &Library, diff --git a/core/src/object/file_identifier/file_identifier_job.rs b/core/src/object/file_identifier/file_identifier_job.rs new file mode 100644 index 000000000..66766e626 --- /dev/null +++ b/core/src/object/file_identifier/file_identifier_job.rs @@ -0,0 +1,254 @@ +use crate::{ + job::{JobError, JobReportUpdate, JobResult, JobState, StatefulJob, WorkerContext}, + library::Library, + location::file_path_helper::{ + ensure_sub_path_is_directory, ensure_sub_path_is_in_location, + file_path_for_file_identifier, MaterializedPath, + }, + prisma::{file_path, location, PrismaClient}, +}; + +use std::{ + hash::{Hash, Hasher}, + path::{Path, PathBuf}, +}; + +use prisma_client_rust::Direction; +use serde::{Deserialize, Serialize}; +use tracing::info; + +use super::{ + finalize_file_identifier, process_identifier_file_paths, FileIdentifierJobError, + FileIdentifierReport, FilePathIdAndLocationIdCursor, CHUNK_SIZE, +}; + +pub const FILE_IDENTIFIER_JOB_NAME: &str = "file_identifier"; + +pub struct FileIdentifierJob {} + +/// `FileIdentifierJobInit` takes file_paths without a file_id from an entire location +/// or starting from a `sub_path` (getting every descendent from this `sub_path` +/// and uniquely identifies them: +/// - first: generating the cas_id and extracting metadata +/// - finally: creating unique file records, and linking them to their file_paths +#[derive(Serialize, Deserialize, Clone)] +pub struct FileIdentifierJobInit { + pub location: location::Data, + pub sub_path: Option, // subpath to start from +} + +impl Hash for FileIdentifierJobInit { + fn hash(&self, state: &mut H) { + self.location.id.hash(state); + if let Some(ref sub_path) = self.sub_path { + sub_path.hash(state); + } + } +} + +#[derive(Serialize, Deserialize)] +pub struct FileIdentifierJobState { + cursor: FilePathIdAndLocationIdCursor, + report: FileIdentifierReport, + maybe_sub_materialized_path: Option, +} + +#[async_trait::async_trait] +impl StatefulJob for FileIdentifierJob { + type Init = FileIdentifierJobInit; + type Data = FileIdentifierJobState; + type Step = (); + + fn name(&self) -> &'static str { + FILE_IDENTIFIER_JOB_NAME + } + + async fn init(&self, ctx: WorkerContext, state: &mut JobState) -> Result<(), JobError> { + let Library { db, .. } = &ctx.library; + + info!("Identifying orphan File Paths..."); + + let location_id = state.init.location.id; + let location_path = Path::new(&state.init.location.path); + + let maybe_sub_materialized_path = if let Some(ref sub_path) = state.init.sub_path { + let full_path = ensure_sub_path_is_in_location(location_path, sub_path) + .await + .map_err(FileIdentifierJobError::from)?; + ensure_sub_path_is_directory(location_path, sub_path) + .await + .map_err(FileIdentifierJobError::from)?; + + Some( + MaterializedPath::new(location_id, location_path, &full_path, true) + .map_err(FileIdentifierJobError::from)?, + ) + } else { + None + }; + + let orphan_count = + count_orphan_file_paths(db, location_id, &maybe_sub_materialized_path).await?; + + // Initializing `state.data` here because we need a complete state in case of early finish + state.data = Some(FileIdentifierJobState { + report: FileIdentifierReport { + location_path: location_path.to_path_buf(), + total_orphan_paths: orphan_count, + ..Default::default() + }, + cursor: FilePathIdAndLocationIdCursor { + file_path_id: -1, + location_id, + }, + maybe_sub_materialized_path, + }); + + let data = state.data.as_mut().unwrap(); // SAFETY: We just initialized it + + if orphan_count == 0 { + return Err(JobError::EarlyFinish { + name: self.name().to_string(), + reason: "Found no orphan file paths to process".to_string(), + }); + } + + info!("Found {} orphan file paths", orphan_count); + + let task_count = (orphan_count as f64 / CHUNK_SIZE as f64).ceil() as usize; + info!( + "Found {} orphan Paths. Will execute {} tasks...", + orphan_count, task_count + ); + + // update job with total task count based on orphan file_paths count + ctx.progress(vec![JobReportUpdate::TaskCount(task_count)]); + + let first_path_id = db + .file_path() + .find_first(orphan_path_filters( + location_id, + None, + &data.maybe_sub_materialized_path, + )) + .order_by(file_path::id::order(Direction::Asc)) + .select(file_path::select!({ id })) + .exec() + .await? + .map(|d| d.id) + .unwrap(); // SAFETY: We already validated before that there are orphans `file_path`s + + data.cursor.file_path_id = first_path_id; + + state.steps = (0..task_count).map(|_| ()).collect(); + + Ok(()) + } + + async fn execute_step( + &self, + ctx: WorkerContext, + state: &mut JobState, + ) -> Result<(), JobError> { + let FileIdentifierJobState { + ref mut cursor, + ref mut report, + ref maybe_sub_materialized_path, + } = state + .data + .as_mut() + .expect("Critical error: missing data on job state"); + + let location = &state.init.location; + + // get chunk of orphans to process + let file_paths = + get_orphan_file_paths(&ctx.library.db, cursor, maybe_sub_materialized_path).await?; + + process_identifier_file_paths( + self.name(), + location, + &file_paths, + state.step_number, + cursor, + report, + ctx, + ) + .await + } + + async fn finalize(&mut self, ctx: WorkerContext, state: &mut JobState) -> JobResult { + finalize_file_identifier( + &state + .data + .as_ref() + .expect("critical error: missing data on job state") + .report, + ctx, + ) + } +} + +fn orphan_path_filters( + location_id: i32, + file_path_id: Option, + maybe_sub_materialized_path: &Option, +) -> Vec { + let mut params = vec![ + file_path::object_id::equals(None), + file_path::is_dir::equals(false), + file_path::location_id::equals(location_id), + ]; + // this is a workaround for the cursor not working properly + if let Some(file_path_id) = file_path_id { + params.push(file_path::id::gte(file_path_id)); + } + + if let Some(ref sub_materealized_path) = maybe_sub_materialized_path { + params.push(file_path::materialized_path::starts_with( + sub_materealized_path.into(), + )); + } + + params +} + +async fn count_orphan_file_paths( + db: &PrismaClient, + location_id: i32, + maybe_sub_materialized_path: &Option, +) -> Result { + db.file_path() + .count(orphan_path_filters( + location_id, + None, + maybe_sub_materialized_path, + )) + .exec() + .await + .map(|c| c as usize) +} + +async fn get_orphan_file_paths( + db: &PrismaClient, + cursor: &FilePathIdAndLocationIdCursor, + maybe_sub_materialized_path: &Option, +) -> Result, prisma_client_rust::QueryError> { + info!( + "Querying {} orphan Paths at cursor: {:?}", + CHUNK_SIZE, cursor + ); + db.file_path() + .find_many(orphan_path_filters( + cursor.location_id, + Some(cursor.file_path_id), + maybe_sub_materialized_path, + )) + .order_by(file_path::id::order(Direction::Asc)) + // .cursor(cursor.into()) + .take(CHUNK_SIZE as i64) + // .skip(1) + .select(file_path_for_file_identifier::select()) + .exec() + .await +} diff --git a/core/src/object/identifier_job/mod.rs b/core/src/object/file_identifier/mod.rs similarity index 66% rename from core/src/object/identifier_job/mod.rs rename to core/src/object/file_identifier/mod.rs index e798f3d21..a7956d360 100644 --- a/core/src/object/identifier_job/mod.rs +++ b/core/src/object/file_identifier/mod.rs @@ -1,7 +1,9 @@ use crate::{ - job::JobError, + invalidate_query, + job::{JobError, JobReportUpdate, JobResult, WorkerContext}, library::Library, - object::cas::generate_cas_id, + location::file_path_helper::{file_path_for_file_identifier, FilePathError}, + object::{cas::generate_cas_id, object_for_file_identifier}, prisma::{file_path, location, object, PrismaClient}, sync, sync::SyncManager, @@ -12,6 +14,7 @@ use sd_sync::CRDTOperation; use futures::future::join_all; use int_enum::IntEnum; +use serde::{Deserialize, Serialize}; use serde_json::json; use std::{ collections::{HashMap, HashSet}, @@ -22,17 +25,16 @@ use tokio::{fs, io}; use tracing::{error, info}; use uuid::Uuid; -pub mod full_identifier_job; +pub mod file_identifier_job; +pub mod shallow_file_identifier_job; // we break these jobs into chunks of 100 to improve performance const CHUNK_SIZE: usize = 100; #[derive(Error, Debug)] -pub enum IdentifierJobError { - #[error("Location not found: ")] - MissingLocation(i32), - #[error("Root file path not found: ")] - MissingRootFilePath(PathBuf), +pub enum FileIdentifierJobError { + #[error("File path related error (error: {0})")] + FilePathError(#[from] FilePathError), } #[derive(Debug, Clone)] @@ -75,10 +77,31 @@ impl FileMetadata { } } +#[derive(Serialize, Deserialize, Debug)] +struct FilePathIdAndLocationIdCursor { + file_path_id: i32, + location_id: i32, +} + +impl From<&FilePathIdAndLocationIdCursor> for file_path::UniqueWhereParam { + fn from(cursor: &FilePathIdAndLocationIdCursor) -> Self { + file_path::location_id_id(cursor.location_id, cursor.file_path_id) + } +} + +#[derive(Serialize, Deserialize, Debug, Default)] +pub struct FileIdentifierReport { + location_path: PathBuf, + total_orphan_paths: usize, + total_objects_created: usize, + total_objects_linked: usize, + total_objects_ignored: usize, +} + async fn identifier_job_step( Library { db, sync, .. }: &Library, location: &location::Data, - file_paths: &[file_path::Data], + file_paths: &[file_path_for_file_identifier::Data], ) -> Result<(usize, usize), JobError> { let file_path_metas = join_all(file_paths.iter().map(|file_path| async move { FileMetadata::new(&location.path, &file_path.materialized_path) @@ -89,7 +112,7 @@ async fn identifier_job_step( .into_iter() .flat_map(|data| { if let Err(e) = &data { - error!("Error assembling Object metadata: {:#?}", e); + error!("Error assembling Object metadata: {e}"); } data @@ -136,10 +159,7 @@ async fn identifier_job_step( .find_many(vec![object::file_paths::some(vec![ file_path::cas_id::in_vec(unique_cas_ids), ])]) - .select(object::select!({ - pub_id - file_paths: select { id cas_id } - })) + .select(object_for_file_identifier::select()) .exec() .await?; @@ -166,14 +186,16 @@ async fn identifier_job_step( .map(|o| (*id, o)) }) .map(|(id, object)| { - file_path_object_connect_ops( + let (crdt_op, db_op) = file_path_object_connect_ops( id, // SAFETY: This pub_id is generated by the uuid lib, but we have to store bytes in sqlite Uuid::from_slice(&object.pub_id).unwrap(), location, sync, db, - ) + ); + + (crdt_op, db_op.select(file_path::select!({ id }))) }) .unzip::<_, _, Vec<_>, Vec<_>>(), ) @@ -239,10 +261,12 @@ async fn identifier_job_step( ), ); - ( - object_creation_args, - file_path_object_connect_ops(*id, pub_id, location, sync, db), - ) + (object_creation_args, { + let (crdt_op, db_op) = + file_path_object_connect_ops(*id, pub_id, location, sync, db); + + (crdt_op, db_op.select(file_path::select!({ id }))) + }) }) .unzip(); @@ -278,18 +302,13 @@ async fn identifier_job_step( Ok((total_created, updated_file_paths.len())) } -file_path::select!(file_path_only_id { id }); - fn file_path_object_connect_ops<'db>( file_path_id: i32, object_id: Uuid, location: &location::Data, sync: &SyncManager, db: &'db PrismaClient, -) -> ( - CRDTOperation, - prisma_client_rust::Select<'db, file_path_only_id::Data>, -) { +) -> (CRDTOperation, file_path::Update<'db>) { info!("Connecting to "); ( @@ -303,13 +322,70 @@ fn file_path_object_connect_ops<'db>( "object", json!({ "pub_id": object_id }), ), - db.file_path() - .update( - file_path::location_id_id(location.id, file_path_id), - vec![file_path::object::connect(object::pub_id::equals( - object_id.as_bytes().to_vec(), - ))], - ) - .select(file_path_only_id::select()), + db.file_path().update( + file_path::location_id_id(location.id, file_path_id), + vec![file_path::object::connect(object::pub_id::equals( + object_id.as_bytes().to_vec(), + ))], + ), ) } + +async fn process_identifier_file_paths( + job_name: &str, + location: &location::Data, + file_paths: &[file_path_for_file_identifier::Data], + step_number: usize, + cursor: &mut FilePathIdAndLocationIdCursor, + report: &mut FileIdentifierReport, + ctx: WorkerContext, +) -> Result<(), JobError> { + // if no file paths found, abort entire job early, there is nothing to do + // if we hit this error, there is something wrong with the data/query + if file_paths.is_empty() { + return Err(JobError::EarlyFinish { + name: job_name.to_string(), + reason: "Expected orphan Paths not returned from database query for this chunk" + .to_string(), + }); + } + + info!( + "Processing {:?} orphan Paths. ({} completed of {})", + file_paths.len(), + step_number, + report.total_orphan_paths + ); + + let (total_objects_created, total_objects_linked) = + identifier_job_step(&ctx.library, location, file_paths).await?; + + report.total_objects_created += total_objects_created; + report.total_objects_linked += total_objects_linked; + + // set the step data cursor to the last row of this chunk + if let Some(last_row) = file_paths.last() { + cursor.file_path_id = last_row.id; + } + + ctx.progress(vec![ + JobReportUpdate::CompletedTaskCount(step_number), + JobReportUpdate::Message(format!( + "Processed {} of {} orphan Paths", + step_number * CHUNK_SIZE, + report.total_orphan_paths + )), + ]); + + Ok(()) +} + +fn finalize_file_identifier(report: &FileIdentifierReport, ctx: WorkerContext) -> JobResult { + info!("Finalizing identifier job: {report:?}"); + + if report.total_orphan_paths > 0 { + invalidate_query!(ctx.library, "locations.getExplorerData"); + } + + Ok(Some(serde_json::to_value(report)?)) +} diff --git a/core/src/object/file_identifier/shallow_file_identifier_job.rs b/core/src/object/file_identifier/shallow_file_identifier_job.rs new file mode 100644 index 000000000..b68dc0543 --- /dev/null +++ b/core/src/object/file_identifier/shallow_file_identifier_job.rs @@ -0,0 +1,246 @@ +use crate::{ + job::{JobError, JobReportUpdate, JobResult, JobState, StatefulJob, WorkerContext}, + library::Library, + location::file_path_helper::{ + ensure_sub_path_is_directory, ensure_sub_path_is_in_location, + file_path_for_file_identifier, get_existing_file_path_id, MaterializedPath, + }, + prisma::{file_path, location, PrismaClient}, +}; + +use std::{ + hash::{Hash, Hasher}, + path::{Path, PathBuf}, +}; + +use prisma_client_rust::Direction; +use serde::{Deserialize, Serialize}; +use tracing::info; + +use super::{ + finalize_file_identifier, process_identifier_file_paths, FileIdentifierJobError, + FileIdentifierReport, FilePathIdAndLocationIdCursor, CHUNK_SIZE, +}; + +pub const SHALLOW_FILE_IDENTIFIER_JOB_NAME: &str = "shallow_file_identifier"; + +pub struct ShallowFileIdentifierJob {} + +/// `ShallowFileIdentifierJobInit` takes file_paths without a file_id from a specific path +/// (just direct children of this path) and uniquely identifies them: +/// - first: generating the cas_id and extracting metadata +/// - finally: creating unique file records, and linking them to their file_paths +#[derive(Serialize, Deserialize, Clone)] +pub struct ShallowFileIdentifierJobInit { + pub location: location::Data, + pub sub_path: PathBuf, +} + +impl Hash for ShallowFileIdentifierJobInit { + fn hash(&self, state: &mut H) { + self.location.id.hash(state); + self.sub_path.hash(state); + } +} + +#[derive(Serialize, Deserialize)] +pub struct ShallowFileIdentifierJobState { + cursor: FilePathIdAndLocationIdCursor, + report: FileIdentifierReport, + sub_path_id: i32, +} + +#[async_trait::async_trait] +impl StatefulJob for ShallowFileIdentifierJob { + type Init = ShallowFileIdentifierJobInit; + type Data = ShallowFileIdentifierJobState; + type Step = (); + + fn name(&self) -> &'static str { + SHALLOW_FILE_IDENTIFIER_JOB_NAME + } + + async fn init(&self, ctx: WorkerContext, state: &mut JobState) -> Result<(), JobError> { + let Library { db, .. } = &ctx.library; + + info!("Identifying orphan File Paths..."); + + let location_id = state.init.location.id; + let location_path = Path::new(&state.init.location.path); + + let sub_path_id = if state.init.sub_path != Path::new("") { + let full_path = ensure_sub_path_is_in_location(location_path, &state.init.sub_path) + .await + .map_err(FileIdentifierJobError::from)?; + ensure_sub_path_is_directory(location_path, &state.init.sub_path) + .await + .map_err(FileIdentifierJobError::from)?; + + get_existing_file_path_id( + MaterializedPath::new(location_id, location_path, &full_path, true) + .map_err(FileIdentifierJobError::from)?, + db, + ) + .await + .map_err(FileIdentifierJobError::from)? + .expect("Sub path should already exist in the database") + } else { + get_existing_file_path_id( + MaterializedPath::new(location_id, location_path, location_path, true) + .map_err(FileIdentifierJobError::from)?, + db, + ) + .await + .map_err(FileIdentifierJobError::from)? + .expect("Location root path should already exist in the database") + }; + + let orphan_count = count_orphan_file_paths(db, location_id, sub_path_id).await?; + + // Initializing `state.data` here because we need a complete state in case of early finish + state.data = Some(ShallowFileIdentifierJobState { + report: FileIdentifierReport { + location_path: location_path.to_path_buf(), + total_orphan_paths: orphan_count, + ..Default::default() + }, + cursor: FilePathIdAndLocationIdCursor { + file_path_id: -1, + location_id, + }, + sub_path_id, + }); + + if orphan_count == 0 { + return Err(JobError::EarlyFinish { + name: self.name().to_string(), + reason: "Found no orphan file paths to process".to_string(), + }); + } + + info!("Found {} orphan file paths", orphan_count); + + let task_count = (orphan_count as f64 / CHUNK_SIZE as f64).ceil() as usize; + info!( + "Found {} orphan Paths. Will execute {} tasks...", + orphan_count, task_count + ); + + // update job with total task count based on orphan file_paths count + ctx.progress(vec![JobReportUpdate::TaskCount(task_count)]); + + let first_path_id = db + .file_path() + .find_first(orphan_path_filters(location_id, None, sub_path_id)) + .order_by(file_path::id::order(Direction::Asc)) + .select(file_path::select!({ id })) + .exec() + .await? + .map(|d| d.id) + .unwrap(); // SAFETY: We already validated before that there are orphans `file_path`s + + // SAFETY: We just initialized `state.data` above + state.data.as_mut().unwrap().cursor.file_path_id = first_path_id; + + state.steps = (0..task_count).map(|_| ()).collect(); + + Ok(()) + } + + async fn execute_step( + &self, + ctx: WorkerContext, + state: &mut JobState, + ) -> Result<(), JobError> { + let ShallowFileIdentifierJobState { + ref mut cursor, + ref mut report, + ref sub_path_id, + } = state + .data + .as_mut() + .expect("Critical error: missing data on job state"); + + let location = &state.init.location; + + // get chunk of orphans to process + let file_paths = get_orphan_file_paths(&ctx.library.db, cursor, *sub_path_id).await?; + + process_identifier_file_paths( + self.name(), + location, + &file_paths, + state.step_number, + cursor, + report, + ctx, + ) + .await + } + + async fn finalize(&mut self, ctx: WorkerContext, state: &mut JobState) -> JobResult { + finalize_file_identifier( + &state + .data + .as_ref() + .expect("critical error: missing data on job state") + .report, + ctx, + ) + } +} + +fn orphan_path_filters( + location_id: i32, + file_path_id: Option, + sub_path_id: i32, +) -> Vec { + let mut params = vec![ + file_path::object_id::equals(None), + file_path::is_dir::equals(false), + file_path::location_id::equals(location_id), + file_path::parent_id::equals(Some(sub_path_id)), + ]; + // this is a workaround for the cursor not working properly + if let Some(file_path_id) = file_path_id { + params.push(file_path::id::gte(file_path_id)); + } + + params +} + +async fn count_orphan_file_paths( + db: &PrismaClient, + location_id: i32, + sub_path_id: i32, +) -> Result { + db.file_path() + .count(orphan_path_filters(location_id, None, sub_path_id)) + .exec() + .await + .map(|c| c as usize) +} + +async fn get_orphan_file_paths( + db: &PrismaClient, + cursor: &FilePathIdAndLocationIdCursor, + sub_path_id: i32, +) -> Result, prisma_client_rust::QueryError> { + info!( + "Querying {} orphan Paths at cursor: {:?}", + CHUNK_SIZE, cursor + ); + db.file_path() + .find_many(orphan_path_filters( + cursor.location_id, + Some(cursor.file_path_id), + sub_path_id, + )) + .order_by(file_path::id::order(Direction::Asc)) + // .cursor(cursor.into()) + .take(CHUNK_SIZE as i64) + // .skip(1) + .select(file_path_for_file_identifier::select()) + .exec() + .await +} diff --git a/core/src/object/fs/mod.rs b/core/src/object/fs/mod.rs index 924c791e0..073776c52 100644 --- a/core/src/object/fs/mod.rs +++ b/core/src/object/fs/mod.rs @@ -1,7 +1,6 @@ -pub mod create; - use crate::{ job::JobError, + location::file_path_helper::file_path_with_object, prisma::{file_path, location, PrismaClient}, }; @@ -9,7 +8,7 @@ use std::{ffi::OsStr, path::PathBuf}; use serde::{Deserialize, Serialize}; -use super::preview::file_path_with_object; +pub mod create; pub mod copy; pub mod cut; diff --git a/core/src/object/identifier_job/full_identifier_job.rs b/core/src/object/identifier_job/full_identifier_job.rs deleted file mode 100644 index 43a087388..000000000 --- a/core/src/object/identifier_job/full_identifier_job.rs +++ /dev/null @@ -1,234 +0,0 @@ -use crate::{ - invalidate_query, - job::{JobError, JobReportUpdate, JobResult, JobState, StatefulJob, WorkerContext}, - library::Library, - prisma::{file_path, location}, -}; - -use std::path::PathBuf; - -use prisma_client_rust::Direction; -use serde::{Deserialize, Serialize}; -use tracing::info; - -use super::{identifier_job_step, IdentifierJobError, CHUNK_SIZE}; - -pub const FULL_IDENTIFIER_JOB_NAME: &str = "file_identifier"; - -pub struct FullFileIdentifierJob {} - -// FileIdentifierJobInit takes file_paths without a file_id and uniquely identifies them -// first: generating the cas_id and extracting metadata -// finally: creating unique file records, and linking them to their file_paths -#[derive(Serialize, Deserialize, Clone, Hash)] -pub struct FullFileIdentifierJobInit { - pub location_id: i32, - pub sub_path: Option, // subpath to start from -} - -#[derive(Serialize, Deserialize, Debug)] -struct FilePathIdAndLocationIdCursor { - file_path_id: i32, - location_id: i32, -} - -impl From<&FilePathIdAndLocationIdCursor> for file_path::UniqueWhereParam { - fn from(cursor: &FilePathIdAndLocationIdCursor) -> Self { - file_path::location_id_id(cursor.location_id, cursor.file_path_id) - } -} - -#[derive(Serialize, Deserialize)] -pub struct FullFileIdentifierJobState { - location: location::Data, - cursor: FilePathIdAndLocationIdCursor, - report: FileIdentifierReport, -} - -#[derive(Serialize, Deserialize, Debug, Default)] -pub struct FileIdentifierReport { - location_path: PathBuf, - total_orphan_paths: usize, - total_objects_created: usize, - total_objects_linked: usize, - total_objects_ignored: usize, -} - -#[async_trait::async_trait] -impl StatefulJob for FullFileIdentifierJob { - type Init = FullFileIdentifierJobInit; - type Data = FullFileIdentifierJobState; - type Step = (); - - fn name(&self) -> &'static str { - FULL_IDENTIFIER_JOB_NAME - } - - async fn init(&self, ctx: WorkerContext, state: &mut JobState) -> Result<(), JobError> { - info!("Identifying orphan File Paths..."); - - let location_id = state.init.location_id; - let db = &ctx.library.db; - - let location = db - .location() - .find_unique(location::id::equals(location_id)) - .exec() - .await? - .ok_or(IdentifierJobError::MissingLocation(state.init.location_id))?; - - let orphan_count = count_orphan_file_paths(&ctx.library, location_id).await?; - info!("Found {} orphan file paths", orphan_count); - - let task_count = (orphan_count as f64 / CHUNK_SIZE as f64).ceil() as usize; - info!( - "Found {} orphan Paths. Will execute {} tasks...", - orphan_count, task_count - ); - - // update job with total task count based on orphan file_paths count - ctx.progress(vec![JobReportUpdate::TaskCount(task_count)]); - - let first_path_id = db - .file_path() - .find_first(orphan_path_filters(location_id, None)) - .exec() - .await? - .map(|d| d.id) - .unwrap_or(1); - - state.data = Some(FullFileIdentifierJobState { - report: FileIdentifierReport { - location_path: location.path.clone().into(), - total_orphan_paths: orphan_count, - ..Default::default() - }, - location, - cursor: FilePathIdAndLocationIdCursor { - file_path_id: first_path_id, - location_id: state.init.location_id, - }, - }); - - state.steps = (0..task_count).map(|_| ()).collect(); - - Ok(()) - } - - async fn execute_step( - &self, - ctx: WorkerContext, - state: &mut JobState, - ) -> Result<(), JobError> { - let data = state - .data - .as_mut() - .expect("Critical error: missing data on job state"); - - // get chunk of orphans to process - let file_paths = - get_orphan_file_paths(&ctx.library, &data.cursor, data.location.id).await?; - - // if no file paths found, abort entire job early, there is nothing to do - // if we hit this error, there is something wrong with the data/query - if file_paths.is_empty() { - return Err(JobError::EarlyFinish { - name: self.name().to_string(), - reason: "Expected orphan Paths not returned from database query for this chunk" - .to_string(), - }); - } - - info!( - "Processing {:?} orphan Paths. ({} completed of {})", - file_paths.len(), - state.step_number, - data.report.total_orphan_paths - ); - - let (total_objects_created, total_objects_linked) = - identifier_job_step(&ctx.library, &data.location, &file_paths).await?; - - data.report.total_objects_created += total_objects_created; - data.report.total_objects_linked += total_objects_linked; - - // set the step data cursor to the last row of this chunk - if let Some(last_row) = file_paths.last() { - data.cursor.file_path_id = last_row.id; - } - - ctx.progress(vec![ - JobReportUpdate::CompletedTaskCount(state.step_number), - JobReportUpdate::Message(format!( - "Processed {} of {} orphan Paths", - state.step_number * CHUNK_SIZE, - data.report.total_orphan_paths - )), - ]); - - invalidate_query!(ctx.library, "locations.getExplorerData"); - - // let _remaining = count_orphan_file_paths(&ctx.core_ctx, location_id.into()).await?; - Ok(()) - } - - async fn finalize(&mut self, _ctx: WorkerContext, state: &mut JobState) -> JobResult { - let data = state - .data - .as_ref() - .expect("critical error: missing data on job state"); - - info!("Finalizing identifier job: {:#?}", data.report); - - Ok(Some(serde_json::to_value(&data.report)?)) - } -} - -fn orphan_path_filters(location_id: i32, file_path_id: Option) -> Vec { - let mut params = vec![ - file_path::object_id::equals(None), - file_path::is_dir::equals(false), - file_path::location_id::equals(location_id), - ]; - // this is a workaround for the cursor not working properly - if let Some(file_path_id) = file_path_id { - params.push(file_path::id::gte(file_path_id)); - } - params -} - -async fn count_orphan_file_paths( - ctx: &Library, - location_id: i32, -) -> Result { - Ok(ctx - .db - .file_path() - .count(vec![ - file_path::object_id::equals(None), - file_path::is_dir::equals(false), - file_path::location_id::equals(location_id), - ]) - .exec() - .await? as usize) -} - -async fn get_orphan_file_paths( - ctx: &Library, - cursor: &FilePathIdAndLocationIdCursor, - location_id: i32, -) -> Result, prisma_client_rust::QueryError> { - info!( - "Querying {} orphan Paths at cursor: {:?}", - CHUNK_SIZE, cursor - ); - ctx.db - .file_path() - .find_many(orphan_path_filters(location_id, Some(cursor.file_path_id))) - .order_by(file_path::id::order(Direction::Asc)) - // .cursor(cursor.into()) - .take(CHUNK_SIZE as i64) - // .skip(1) - .exec() - .await -} diff --git a/core/src/object/mod.rs b/core/src/object/mod.rs index 52f8ff350..f7f90e13d 100644 --- a/core/src/object/mod.rs +++ b/core/src/object/mod.rs @@ -1,6 +1,11 @@ +use crate::prisma::{file_path, object}; + +use rspc::Type; +use serde::{Deserialize, Serialize}; + pub mod cas; +pub mod file_identifier; pub mod fs; -pub mod identifier_job; pub mod preview; pub mod tag; pub mod validation; @@ -9,10 +14,12 @@ pub mod validation; // Some Objects are purely virtual, unless they have one or more associated Paths, which refer to a file found in a Location // Objects are what can be added to Spaces -use rspc::Type; -use serde::{Deserialize, Serialize}; - -use crate::prisma; +// Object selectables! +object::select!(object_just_id_has_thumbnail { id has_thumbnail }); +object::select!(object_for_file_identifier { + pub_id + file_paths: select { id cas_id } +}); // The response to provide the Explorer when looking at Objects #[derive(Debug, Serialize, Deserialize, Type)] @@ -23,14 +30,14 @@ pub struct ObjectsForExplorer { // #[derive(Debug, Serialize, Deserialize, Type)] // pub enum ExplorerContext { -// Location(Box), -// Space(Box), -// Tag(Box), -// // Search(Box), +// Location(Box), +// Space(Box), +// Tag(Box), +// // Search(Box), // } #[derive(Debug, Serialize, Deserialize, Type)] pub enum ObjectData { - Object(Box), - Path(Box), + Object(Box), + Path(Box), } diff --git a/core/src/object/preview/mod.rs b/core/src/object/preview/mod.rs index 98b94a7e8..ef1bda723 100644 --- a/core/src/object/preview/mod.rs +++ b/core/src/object/preview/mod.rs @@ -1,5 +1,5 @@ mod media_data; -mod thumb; +mod thumbnail; pub use media_data::*; -pub use thumb::*; +pub use thumbnail::*; diff --git a/core/src/object/preview/thumb.rs b/core/src/object/preview/thumb.rs deleted file mode 100644 index 6087a18c0..000000000 --- a/core/src/object/preview/thumb.rs +++ /dev/null @@ -1,385 +0,0 @@ -use crate::{ - api::CoreEvent, - invalidate_query, - job::{JobError, JobReportUpdate, JobResult, JobState, StatefulJob, WorkerContext}, - library::Library, - prisma::{file_path, location}, -}; - -use std::{ - collections::VecDeque, - error::Error, - ops::Deref, - path::{Path, PathBuf}, -}; - -use image::{self, imageops, DynamicImage, GenericImageView}; -use sd_file_ext::extensions::{Extension, ImageExtension, VideoExtension}; -use serde::{Deserialize, Serialize}; -use thiserror::Error; -use tokio::{fs, task::block_in_place}; -use tracing::{error, info, trace, warn}; -use webp::Encoder; - -static THUMBNAIL_SIZE_FACTOR: f32 = 0.2; -static THUMBNAIL_QUALITY: f32 = 30.0; -pub static THUMBNAIL_CACHE_DIR_NAME: &str = "thumbnails"; -pub const THUMBNAIL_JOB_NAME: &str = "thumbnailer"; - -pub struct ThumbnailJob {} - -#[derive(Serialize, Deserialize, Clone, Hash)] -pub struct ThumbnailJobInit { - pub location_id: i32, - pub root_path: PathBuf, - pub background: bool, -} - -#[derive(Debug, Serialize, Deserialize)] -pub struct ThumbnailJobState { - thumbnail_dir: PathBuf, - root_path: PathBuf, -} - -#[derive(Error, Debug)] -pub enum ThumbnailError { - #[error("Location not found: ")] - MissingLocation(i32), - #[error("Root file path not found: ")] - MissingRootFilePath(PathBuf), -} - -file_path::include!(file_path_with_object { object }); - -#[derive(Debug, Serialize, Deserialize, Clone, Copy)] -enum ThumbnailJobStepKind { - Image, - #[cfg(feature = "ffmpeg")] - Video, -} - -#[derive(Debug, Serialize, Deserialize)] -pub struct ThumbnailJobStep { - file_path: file_path_with_object::Data, - object_id: i32, - kind: ThumbnailJobStepKind, -} - -#[async_trait::async_trait] -impl StatefulJob for ThumbnailJob { - type Init = ThumbnailJobInit; - type Data = ThumbnailJobState; - type Step = ThumbnailJobStep; - - fn name(&self) -> &'static str { - THUMBNAIL_JOB_NAME - } - - async fn init(&self, ctx: WorkerContext, state: &mut JobState) -> Result<(), JobError> { - let Library { db, .. } = &ctx.library; - - let thumbnail_dir = ctx - .library - .config() - .data_directory() - .join(THUMBNAIL_CACHE_DIR_NAME); - - let location = db - .location() - .find_unique(location::id::equals(state.init.location_id)) - .exec() - .await? - .ok_or(ThumbnailError::MissingLocation(state.init.location_id))?; - - let root_path_str = state - .init - .root_path - .to_str() - .expect("Found non-UTF-8 path") - .to_string(); - - let parent_directory_id = db - .file_path() - .find_first(vec![ - file_path::location_id::equals(state.init.location_id), - file_path::materialized_path::equals(if !root_path_str.is_empty() { - root_path_str - } else { - "/".to_string() - }), - file_path::is_dir::equals(true), - ]) - .select(file_path::select!({ id })) - .exec() - .await? - .ok_or_else(|| ThumbnailError::MissingRootFilePath(state.init.root_path.clone()))? - .id; - - info!( - "Searching for images in location {} at directory {}", - location.id, parent_directory_id - ); - - // create all necessary directories if they don't exist - fs::create_dir_all(&thumbnail_dir).await?; - let root_path = location.path.into(); - - // query database for all image files in this location that need thumbnails - let image_files = get_files_by_extensions( - &ctx.library, - state.init.location_id, - parent_directory_id, - &sd_file_ext::extensions::ALL_IMAGE_EXTENSIONS - .iter() - .map(Clone::clone) - .filter(can_generate_thumbnail_for_image) - .map(Extension::Image) - .collect::>(), - ThumbnailJobStepKind::Image, - ) - .await?; - info!("Found {:?} image files", image_files.len()); - - #[cfg(feature = "ffmpeg")] - let all_files = { - // query database for all video files in this location that need thumbnails - let video_files = get_files_by_extensions( - &ctx.library, - state.init.location_id, - parent_directory_id, - &sd_file_ext::extensions::ALL_VIDEO_EXTENSIONS - .iter() - .map(Clone::clone) - .filter(can_generate_thumbnail_for_video) - .map(Extension::Video) - .collect::>(), - ThumbnailJobStepKind::Video, - ) - .await?; - info!("Found {:?} video files", video_files.len()); - - image_files - .into_iter() - .chain(video_files.into_iter()) - .collect::>() - }; - #[cfg(not(feature = "ffmpeg"))] - let all_files = { image_files.into_iter().collect::>() }; - - ctx.progress(vec![ - JobReportUpdate::TaskCount(all_files.len()), - JobReportUpdate::Message(format!("Preparing to process {} files", all_files.len())), - ]); - - state.data = Some(ThumbnailJobState { - thumbnail_dir, - root_path, - }); - state.steps = all_files; - - Ok(()) - } - - async fn execute_step( - &self, - ctx: WorkerContext, - state: &mut JobState, - ) -> Result<(), JobError> { - let step = &state.steps[0]; - ctx.progress(vec![JobReportUpdate::Message(format!( - "Processing {}", - step.file_path.materialized_path - ))]); - - let data = state - .data - .as_ref() - .expect("critical error: missing data on job state"); - - // assemble the file path - let path = data.root_path.join(&step.file_path.materialized_path); - trace!("image_file {:?}", step); - - // get cas_id, if none found skip - let Some(cas_id) = &step.file_path.cas_id else { - warn!( - "skipping thumbnail generation for {}", - step.file_path.materialized_path - ); - - return Ok(()); - }; - - // Define and write the WebP-encoded file to a given path - let output_path = data.thumbnail_dir.join(cas_id).with_extension("webp"); - - // check if file exists at output path - if !output_path.try_exists().unwrap() { - info!("Writing {:?} to {:?}", path, output_path); - - match step.kind { - ThumbnailJobStepKind::Image => { - if let Err(e) = generate_image_thumbnail(&path, &output_path).await { - error!("Error generating thumb for image {:#?}", e); - } - } - #[cfg(feature = "ffmpeg")] - ThumbnailJobStepKind::Video => { - // use crate::{ - // object::preview::{extract_media_data, StreamKind}, - // prisma::media_data, - // }; - - // use - if let Err(e) = generate_video_thumbnail(&path, &output_path).await { - error!("Error generating thumb for video: {:?} {:#?}", &path, e); - } - // extract MediaData from video and put in the database - // TODO: this is bad here, maybe give it its own job? - // if let Ok(media_data) = extract_media_data(&path) { - // info!( - // "Extracted media data for object {}: {:?}", - // step.object_id, media_data - // ); - - // // let primary_video_stream = media_data - // // .steams - // // .iter() - // // .find(|s| s.kind == Some(StreamKind::Video(_))); - - // let params = vec![ - // media_data::duration_seconds::set(Some(media_data.duration_seconds)), - // // media_data::pixel_width::set(Some(media_data.width)), - // // media_data::pixel_height::set(Some(media_data.height)), - // ]; - // let _ = ctx - // .library() - // .db - // .media_data() - // .upsert( - // media_data::id::equals(step.object_id), - // params.clone(), - // params, - // ) - // .exec() - // .await?; - // } - } - } - - if !state.init.background { - ctx.library.emit(CoreEvent::NewThumbnail { - cas_id: cas_id.clone(), - }); - }; - - // With this invalidate query, we update the user interface to show each new thumbnail - invalidate_query!(ctx.library, "locations.getExplorerData"); - } else { - info!("Thumb exists, skipping... {}", output_path.display()); - } - - ctx.progress(vec![JobReportUpdate::CompletedTaskCount( - state.step_number + 1, - )]); - - Ok(()) - } - - async fn finalize(&mut self, _ctx: WorkerContext, state: &mut JobState) -> JobResult { - let data = state - .data - .as_ref() - .expect("critical error: missing data on job state"); - info!( - "Finished thumbnail generation for location {} at {}", - state.init.location_id, - data.root_path.display() - ); - - // TODO: Serialize and return metadata here - Ok(None) - } -} - -pub async fn generate_image_thumbnail>( - file_path: P, - output_path: P, -) -> Result<(), Box> { - // Webp creation has blocking code - let webp = block_in_place(|| -> Result, Box> { - // Using `image` crate, open the included .jpg file - let img = image::open(file_path)?; - let (w, h) = img.dimensions(); - // Optionally, resize the existing photo and convert back into DynamicImage - let img = DynamicImage::ImageRgba8(imageops::resize( - &img, - // FIXME : Think of a better heuristic to get the thumbnail size - (w as f32 * THUMBNAIL_SIZE_FACTOR) as u32, - (h as f32 * THUMBNAIL_SIZE_FACTOR) as u32, - imageops::FilterType::Triangle, - )); - // Create the WebP encoder for the above image - let encoder = Encoder::from_image(&img)?; - - // Encode the image at a specified quality 0-100 - - // Type WebPMemory is !Send, which makes the Future in this function !Send, - // this make us `deref` to have a `&[u8]` and then `to_owned` to make a Vec - // which implies on a unwanted clone... - Ok(encoder.encode(THUMBNAIL_QUALITY).deref().to_owned()) - })?; - - fs::write(output_path, &webp).await.map_err(Into::into) -} - -#[cfg(feature = "ffmpeg")] -pub async fn generate_video_thumbnail>( - file_path: P, - output_path: P, -) -> Result<(), Box> { - use sd_ffmpeg::to_thumbnail; - - to_thumbnail(file_path, output_path, 256, THUMBNAIL_QUALITY).await?; - - Ok(()) -} - -async fn get_files_by_extensions( - ctx: &Library, - location_id: i32, - _parent_file_path_id: i32, - extensions: &[Extension], - kind: ThumbnailJobStepKind, -) -> Result, JobError> { - Ok(ctx - .db - .file_path() - .find_many(vec![ - file_path::location_id::equals(location_id), - file_path::extension::in_vec(extensions.iter().map(ToString::to_string).collect()), - // file_path::parent_id::equals(Some(parent_file_path_id)), - ]) - .include(file_path_with_object::include()) - .exec() - .await? - .into_iter() - .map(|file_path| ThumbnailJobStep { - object_id: file_path.object.as_ref().unwrap().id, - file_path, - kind, - }) - .collect()) -} - -#[allow(unused)] -pub fn can_generate_thumbnail_for_video(video_extension: &VideoExtension) -> bool { - use VideoExtension::*; - // File extensions that are specifically not supported by the thumbnailer - !matches!(video_extension, Mpg | Swf | M2v | Hevc) -} -#[allow(unused)] -pub fn can_generate_thumbnail_for_image(image_extension: &ImageExtension) -> bool { - use ImageExtension::*; - matches!(image_extension, Jpg | Jpeg | Png | Webp | Gif) -} diff --git a/core/src/object/preview/thumbnail/mod.rs b/core/src/object/preview/thumbnail/mod.rs new file mode 100644 index 000000000..f25d0442f --- /dev/null +++ b/core/src/object/preview/thumbnail/mod.rs @@ -0,0 +1,239 @@ +use crate::{ + api::CoreEvent, + invalidate_query, + job::{JobError, JobReportUpdate, JobResult, WorkerContext}, + location::{ + file_path_helper::{file_path_just_materialized_path_cas_id, FilePathError}, + LocationId, + }, +}; + +use std::{ + error::Error, + ops::Deref, + path::{Path, PathBuf}, +}; + +use sd_file_ext::extensions::{Extension, ImageExtension}; + +#[cfg(feature = "ffmpeg")] +use sd_file_ext::extensions::VideoExtension; + +use image::{self, imageops, DynamicImage, GenericImageView}; +use once_cell::sync::Lazy; +use serde::{Deserialize, Serialize}; +use thiserror::Error; +use tokio::{fs, io, task::block_in_place}; +use tracing::{error, info, trace, warn}; +use webp::Encoder; + +pub mod shallow_thumbnailer_job; +pub mod thumbnailer_job; + +static THUMBNAIL_SIZE_FACTOR: f32 = 0.2; +static THUMBNAIL_QUALITY: f32 = 30.0; +pub static THUMBNAIL_CACHE_DIR_NAME: &str = "thumbnails"; + +#[cfg(feature = "ffmpeg")] +static FILTERED_VIDEO_EXTENSIONS: Lazy> = Lazy::new(|| { + sd_file_ext::extensions::ALL_VIDEO_EXTENSIONS + .iter() + .map(Clone::clone) + .filter(can_generate_thumbnail_for_video) + .map(Extension::Video) + .collect() +}); + +static FILTERED_IMAGE_EXTENSIONS: Lazy> = Lazy::new(|| { + sd_file_ext::extensions::ALL_IMAGE_EXTENSIONS + .iter() + .map(Clone::clone) + .filter(can_generate_thumbnail_for_image) + .map(Extension::Image) + .collect() +}); + +#[derive(Debug, Serialize, Deserialize)] +pub struct ThumbnailerJobState { + thumbnail_dir: PathBuf, + location_path: PathBuf, + report: ThumbnailerJobReport, +} + +#[derive(Error, Debug)] +pub enum ThumbnailerError { + #[error("File path related error (error: {0})")] + FilePathError(#[from] FilePathError), + #[error("IO error (error: {0})")] + IOError(#[from] io::Error), +} + +#[derive(Debug, Serialize, Deserialize)] +pub struct ThumbnailerJobReport { + location_id: LocationId, + materialized_path: String, + thumbnails_created: u32, +} + +#[derive(Debug, Serialize, Deserialize, Clone, Copy)] +enum ThumbnailerJobStepKind { + Image, + #[cfg(feature = "ffmpeg")] + Video, +} + +#[derive(Debug, Serialize, Deserialize)] +pub struct ThumbnailerJobStep { + file_path: file_path_just_materialized_path_cas_id::Data, + kind: ThumbnailerJobStepKind, +} + +pub async fn generate_image_thumbnail>( + file_path: P, + output_path: P, +) -> Result<(), Box> { + // Webp creation has blocking code + let webp = block_in_place(|| -> Result, Box> { + // Using `image` crate, open the included .jpg file + let img = image::open(file_path)?; + let (w, h) = img.dimensions(); + // Optionally, resize the existing photo and convert back into DynamicImage + let img = DynamicImage::ImageRgba8(imageops::resize( + &img, + // FIXME : Think of a better heuristic to get the thumbnail size + (w as f32 * THUMBNAIL_SIZE_FACTOR) as u32, + (h as f32 * THUMBNAIL_SIZE_FACTOR) as u32, + imageops::FilterType::Triangle, + )); + // Create the WebP encoder for the above image + let encoder = Encoder::from_image(&img)?; + + // Encode the image at a specified quality 0-100 + + // Type WebPMemory is !Send, which makes the Future in this function !Send, + // this make us `deref` to have a `&[u8]` and then `to_owned` to make a Vec + // which implies on a unwanted clone... + Ok(encoder.encode(THUMBNAIL_QUALITY).deref().to_owned()) + })?; + + fs::write(output_path, &webp).await.map_err(Into::into) +} + +#[cfg(feature = "ffmpeg")] +pub async fn generate_video_thumbnail>( + file_path: P, + output_path: P, +) -> Result<(), Box> { + use sd_ffmpeg::to_thumbnail; + + to_thumbnail(file_path, output_path, 256, THUMBNAIL_QUALITY).await?; + + Ok(()) +} + +#[cfg(feature = "ffmpeg")] +pub const fn can_generate_thumbnail_for_video(video_extension: &VideoExtension) -> bool { + use VideoExtension::*; + // File extensions that are specifically not supported by the thumbnailer + !matches!(video_extension, Mpg | Swf | M2v | Hevc) +} + +pub const fn can_generate_thumbnail_for_image(image_extension: &ImageExtension) -> bool { + use ImageExtension::*; + matches!(image_extension, Jpg | Jpeg | Png | Webp | Gif) +} + +fn finalize_thumbnailer(data: &ThumbnailerJobState, ctx: WorkerContext) -> JobResult { + info!( + "Finished thumbnail generation for location {} at {}", + data.report.location_id, + data.location_path + .join(&data.report.materialized_path) + .display() + ); + + if data.report.thumbnails_created > 0 { + invalidate_query!(ctx.library, "locations.getExplorerData"); + } + + Ok(Some(serde_json::to_value(&data.report)?)) +} + +async fn process_step( + is_background: bool, + step_number: usize, + step: &ThumbnailerJobStep, + data: &mut ThumbnailerJobState, + ctx: WorkerContext, +) -> Result<(), JobError> { + ctx.progress(vec![JobReportUpdate::Message(format!( + "Processing {}", + step.file_path.materialized_path + ))]); + + let step_result = inner_process_step(is_background, step, data, &ctx).await; + + ctx.progress(vec![JobReportUpdate::CompletedTaskCount(step_number + 1)]); + + step_result +} + +async fn inner_process_step( + is_background: bool, + step: &ThumbnailerJobStep, + data: &mut ThumbnailerJobState, + ctx: &WorkerContext, +) -> Result<(), JobError> { + // assemble the file path + let path = data.location_path.join(&step.file_path.materialized_path); + trace!("image_file {:?}", step); + + // get cas_id, if none found skip + let Some(cas_id) = &step.file_path.cas_id else { + warn!( + "skipping thumbnail generation for {}", + step.file_path.materialized_path + ); + + return Ok(()); + }; + + // Define and write the WebP-encoded file to a given path + let output_path = data.thumbnail_dir.join(format!("{cas_id}.webp")); + + match fs::metadata(&output_path).await { + Ok(_) => { + info!("Thumb exists, skipping... {}", output_path.display()); + } + Err(e) if e.kind() == io::ErrorKind::NotFound => { + info!("Writing {:?} to {:?}", path, output_path); + + match step.kind { + ThumbnailerJobStepKind::Image => { + if let Err(e) = generate_image_thumbnail(&path, &output_path).await { + error!("Error generating thumb for image {:#?}", e); + } + } + #[cfg(feature = "ffmpeg")] + ThumbnailerJobStepKind::Video => { + if let Err(e) = generate_video_thumbnail(&path, &output_path).await { + error!("Error generating thumb for video: {:?} {:#?}", &path, e); + } + } + } + + if !is_background { + ctx.library.emit(CoreEvent::NewThumbnail { + cas_id: cas_id.clone(), + }); + // With this invalidate query, we update the user interface to show each new thumbnail + invalidate_query!(ctx.library, "locations.getExplorerData"); + }; + + data.report.thumbnails_created += 1; + } + Err(e) => return Err(ThumbnailerError::from(e).into()), + } + + Ok(()) +} diff --git a/core/src/object/preview/thumbnail/shallow_thumbnailer_job.rs b/core/src/object/preview/thumbnail/shallow_thumbnailer_job.rs new file mode 100644 index 000000000..f0ba60326 --- /dev/null +++ b/core/src/object/preview/thumbnail/shallow_thumbnailer_job.rs @@ -0,0 +1,211 @@ +use crate::{ + job::{JobError, JobReportUpdate, JobResult, JobState, StatefulJob, WorkerContext}, + library::Library, + location::{ + file_path_helper::{ + ensure_sub_path_is_directory, ensure_sub_path_is_in_location, + file_path_just_materialized_path_cas_id, get_existing_file_path_id, MaterializedPath, + }, + LocationId, + }, + prisma::{file_path, location, PrismaClient}, +}; + +use std::{ + collections::VecDeque, + hash::Hash, + path::{Path, PathBuf}, +}; + +use sd_file_ext::extensions::Extension; + +use serde::{Deserialize, Serialize}; +use tokio::fs; +use tracing::info; + +use super::{ + finalize_thumbnailer, process_step, ThumbnailerError, ThumbnailerJobReport, + ThumbnailerJobState, ThumbnailerJobStep, ThumbnailerJobStepKind, FILTERED_IMAGE_EXTENSIONS, + THUMBNAIL_CACHE_DIR_NAME, +}; + +#[cfg(feature = "ffmpeg")] +use super::FILTERED_VIDEO_EXTENSIONS; + +pub const SHALLOW_THUMBNAILER_JOB_NAME: &str = "shallow_thumbnailer"; + +pub struct ShallowThumbnailerJob {} + +#[derive(Serialize, Deserialize, Clone)] +pub struct ShallowThumbnailerJobInit { + pub location: location::Data, + pub sub_path: PathBuf, +} + +impl Hash for ShallowThumbnailerJobInit { + fn hash(&self, state: &mut H) { + self.location.id.hash(state); + self.sub_path.hash(state); + } +} + +#[async_trait::async_trait] +impl StatefulJob for ShallowThumbnailerJob { + type Init = ShallowThumbnailerJobInit; + type Data = ThumbnailerJobState; + type Step = ThumbnailerJobStep; + + fn name(&self) -> &'static str { + SHALLOW_THUMBNAILER_JOB_NAME + } + + async fn init(&self, ctx: WorkerContext, state: &mut JobState) -> Result<(), JobError> { + let Library { db, .. } = &ctx.library; + + let thumbnail_dir = ctx + .library + .config() + .data_directory() + .join(THUMBNAIL_CACHE_DIR_NAME); + + let location_id = state.init.location.id; + let location_path = PathBuf::from(&state.init.location.path); + + let sub_path_id = if state.init.sub_path != Path::new("") { + let full_path = ensure_sub_path_is_in_location(&location_path, &state.init.sub_path) + .await + .map_err(ThumbnailerError::from)?; + ensure_sub_path_is_directory(&location_path, &state.init.sub_path) + .await + .map_err(ThumbnailerError::from)?; + + get_existing_file_path_id( + MaterializedPath::new(location_id, &location_path, &full_path, true) + .map_err(ThumbnailerError::from)?, + db, + ) + .await + .map_err(ThumbnailerError::from)? + .expect("Sub path should already exist in the database") + } else { + get_existing_file_path_id( + MaterializedPath::new(location_id, &location_path, &location_path, true) + .map_err(ThumbnailerError::from)?, + db, + ) + .await + .map_err(ThumbnailerError::from)? + .expect("Location root path should already exist in the database") + }; + + info!("Searching for images in location {location_id} at parent directory with id {sub_path_id}"); + + // create all necessary directories if they don't exist + fs::create_dir_all(&thumbnail_dir).await?; + + // query database for all image files in this location that need thumbnails + let image_files = get_files_by_extensions( + db, + location_id, + sub_path_id, + &FILTERED_IMAGE_EXTENSIONS, + ThumbnailerJobStepKind::Image, + ) + .await?; + info!("Found {:?} image files", image_files.len()); + + #[cfg(feature = "ffmpeg")] + let all_files = { + // query database for all video files in this location that need thumbnails + let video_files = get_files_by_extensions( + db, + location_id, + sub_path_id, + &FILTERED_VIDEO_EXTENSIONS, + ThumbnailerJobStepKind::Video, + ) + .await?; + info!("Found {:?} video files", video_files.len()); + + image_files + .into_iter() + .chain(video_files.into_iter()) + .collect::>() + }; + #[cfg(not(feature = "ffmpeg"))] + let all_files = { image_files.into_iter().collect::>() }; + + ctx.progress(vec![ + JobReportUpdate::TaskCount(all_files.len()), + JobReportUpdate::Message(format!("Preparing to process {} files", all_files.len())), + ]); + + state.data = Some(ThumbnailerJobState { + thumbnail_dir, + location_path, + report: ThumbnailerJobReport { + location_id, + materialized_path: if state.init.sub_path != Path::new("") { + // SAFETY: We know that the sub_path is a valid UTF-8 string because we validated it before + state.init.sub_path.to_str().unwrap().to_string() + } else { + "".to_string() + }, + thumbnails_created: 0, + }, + }); + state.steps = all_files; + + Ok(()) + } + + async fn execute_step( + &self, + ctx: WorkerContext, + state: &mut JobState, + ) -> Result<(), JobError> { + process_step( + false, // On shallow thumbnailer, we want to show thumbnails ASAP + state.step_number, + &state.steps[0], + state + .data + .as_mut() + .expect("critical error: missing data on job state"), + ctx, + ) + .await + } + + async fn finalize(&mut self, ctx: WorkerContext, state: &mut JobState) -> JobResult { + finalize_thumbnailer( + state + .data + .as_ref() + .expect("critical error: missing data on job state"), + ctx, + ) + } +} + +async fn get_files_by_extensions( + db: &PrismaClient, + location_id: LocationId, + parent_id: i32, + extensions: &[Extension], + kind: ThumbnailerJobStepKind, +) -> Result, JobError> { + Ok(db + .file_path() + .find_many(vec![ + file_path::location_id::equals(location_id), + file_path::extension::in_vec(extensions.iter().map(ToString::to_string).collect()), + file_path::parent_id::equals(Some(parent_id)), + ]) + .select(file_path_just_materialized_path_cas_id::select()) + .exec() + .await? + .into_iter() + .map(|file_path| ThumbnailerJobStep { file_path, kind }) + .collect()) +} diff --git a/core/src/object/preview/thumbnail/thumbnailer_job.rs b/core/src/object/preview/thumbnail/thumbnailer_job.rs new file mode 100644 index 000000000..41f6ebea9 --- /dev/null +++ b/core/src/object/preview/thumbnail/thumbnailer_job.rs @@ -0,0 +1,187 @@ +use crate::{ + job::{JobError, JobReportUpdate, JobResult, JobState, StatefulJob, WorkerContext}, + library::Library, + location::file_path_helper::{ + ensure_sub_path_is_directory, ensure_sub_path_is_in_location, + file_path_just_materialized_path_cas_id, MaterializedPath, + }, + prisma::{file_path, location, PrismaClient}, +}; + +use std::{collections::VecDeque, hash::Hash, path::PathBuf}; + +use sd_file_ext::extensions::Extension; + +use serde::{Deserialize, Serialize}; +use tokio::fs; +use tracing::info; + +use super::{ + finalize_thumbnailer, process_step, ThumbnailerError, ThumbnailerJobReport, + ThumbnailerJobState, ThumbnailerJobStep, ThumbnailerJobStepKind, FILTERED_IMAGE_EXTENSIONS, + THUMBNAIL_CACHE_DIR_NAME, +}; + +#[cfg(feature = "ffmpeg")] +use super::FILTERED_VIDEO_EXTENSIONS; + +pub const THUMBNAILER_JOB_NAME: &str = "thumbnailer"; + +pub struct ThumbnailerJob {} + +#[derive(Serialize, Deserialize, Clone)] +pub struct ThumbnailerJobInit { + pub location: location::Data, + pub sub_path: Option, + pub background: bool, +} + +impl Hash for ThumbnailerJobInit { + fn hash(&self, state: &mut H) { + self.location.id.hash(state); + if let Some(ref sub_path) = self.sub_path { + sub_path.hash(state); + } + } +} + +#[async_trait::async_trait] +impl StatefulJob for ThumbnailerJob { + type Init = ThumbnailerJobInit; + type Data = ThumbnailerJobState; + type Step = ThumbnailerJobStep; + + fn name(&self) -> &'static str { + THUMBNAILER_JOB_NAME + } + + async fn init(&self, ctx: WorkerContext, state: &mut JobState) -> Result<(), JobError> { + let Library { db, .. } = &ctx.library; + + let thumbnail_dir = ctx + .library + .config() + .data_directory() + .join(THUMBNAIL_CACHE_DIR_NAME); + + let location_id = state.init.location.id; + let location_path = PathBuf::from(&state.init.location.path); + + let materialized_path = if let Some(ref sub_path) = state.init.sub_path { + let full_path = ensure_sub_path_is_in_location(&location_path, sub_path) + .await + .map_err(ThumbnailerError::from)?; + ensure_sub_path_is_directory(&location_path, sub_path) + .await + .map_err(ThumbnailerError::from)?; + + MaterializedPath::new(location_id, &location_path, &full_path, true) + .map_err(ThumbnailerError::from)? + } else { + MaterializedPath::new(location_id, &location_path, &location_path, true) + .map_err(ThumbnailerError::from)? + }; + + info!("Searching for images in location {location_id} at directory {materialized_path}"); + + // create all necessary directories if they don't exist + fs::create_dir_all(&thumbnail_dir).await?; + + // query database for all image files in this location that need thumbnails + let image_files = get_files_by_extensions( + db, + &materialized_path, + &FILTERED_IMAGE_EXTENSIONS, + ThumbnailerJobStepKind::Image, + ) + .await?; + info!("Found {:?} image files", image_files.len()); + + #[cfg(feature = "ffmpeg")] + let all_files = { + // query database for all video files in this location that need thumbnails + let video_files = get_files_by_extensions( + db, + &materialized_path, + &FILTERED_VIDEO_EXTENSIONS, + ThumbnailerJobStepKind::Video, + ) + .await?; + info!("Found {:?} video files", video_files.len()); + + image_files + .into_iter() + .chain(video_files.into_iter()) + .collect::>() + }; + #[cfg(not(feature = "ffmpeg"))] + let all_files = { image_files.into_iter().collect::>() }; + + ctx.progress(vec![ + JobReportUpdate::TaskCount(all_files.len()), + JobReportUpdate::Message(format!("Preparing to process {} files", all_files.len())), + ]); + + state.data = Some(ThumbnailerJobState { + thumbnail_dir, + location_path, + report: ThumbnailerJobReport { + location_id, + materialized_path: materialized_path.into(), + thumbnails_created: 0, + }, + }); + state.steps = all_files; + + Ok(()) + } + + async fn execute_step( + &self, + ctx: WorkerContext, + state: &mut JobState, + ) -> Result<(), JobError> { + process_step( + state.init.background, + state.step_number, + &state.steps[0], + state + .data + .as_mut() + .expect("critical error: missing data on job state"), + ctx, + ) + .await + } + + async fn finalize(&mut self, ctx: WorkerContext, state: &mut JobState) -> JobResult { + finalize_thumbnailer( + state + .data + .as_ref() + .expect("critical error: missing data on job state"), + ctx, + ) + } +} + +async fn get_files_by_extensions( + db: &PrismaClient, + materialized_path: &MaterializedPath, + extensions: &[Extension], + kind: ThumbnailerJobStepKind, +) -> Result, JobError> { + Ok(db + .file_path() + .find_many(vec![ + file_path::location_id::equals(materialized_path.location_id()), + file_path::extension::in_vec(extensions.iter().map(ToString::to_string).collect()), + file_path::materialized_path::starts_with(materialized_path.into()), + ]) + .select(file_path_just_materialized_path_cas_id::select()) + .exec() + .await? + .into_iter() + .map(|file_path| ThumbnailerJobStep { file_path, kind }) + .collect()) +} diff --git a/core/src/object/validation/validator_job.rs b/core/src/object/validation/validator_job.rs index a5438ddf4..09990b9ae 100644 --- a/core/src/object/validation/validator_job.rs +++ b/core/src/object/validation/validator_job.rs @@ -1,15 +1,15 @@ -use serde::{Deserialize, Serialize}; -use serde_json::json; - -use std::{collections::VecDeque, path::PathBuf}; - use crate::{ job::{JobError, JobReportUpdate, JobResult, JobState, StatefulJob, WorkerContext}, library::Library, + location::file_path_helper::file_path_for_object_validator, prisma::{file_path, location}, sync, }; +use std::{collections::VecDeque, path::PathBuf}; + +use serde::{Deserialize, Serialize}; +use serde_json::json; use tracing::info; use super::hash::file_checksum; @@ -36,31 +36,18 @@ pub struct ObjectValidatorJobInit { pub background: bool, } -file_path::select!(file_path_and_object { - id - materialized_path - integrity_checksum - location: select { - id - pub_id - } - object: select { - id - } -}); - #[async_trait::async_trait] impl StatefulJob for ObjectValidatorJob { type Init = ObjectValidatorJobInit; type Data = ObjectValidatorJobState; - type Step = file_path_and_object::Data; + type Step = file_path_for_object_validator::Data; fn name(&self) -> &'static str { VALIDATOR_JOB_NAME } async fn init(&self, ctx: WorkerContext, state: &mut JobState) -> Result<(), JobError> { - let db = &ctx.library.db; + let Library { db, .. } = &ctx.library; state.steps = db .file_path() @@ -69,7 +56,7 @@ impl StatefulJob for ObjectValidatorJob { file_path::is_dir::equals(false), file_path::integrity_checksum::equals(None), ]) - .select(file_path_and_object::select()) + .select(file_path_for_object_validator::select()) .exec() .await? .into_iter() diff --git a/core/src/p2p/p2p_manager.rs b/core/src/p2p/p2p_manager.rs index 964617fcc..c93046c35 100644 --- a/core/src/p2p/p2p_manager.rs +++ b/core/src/p2p/p2p_manager.rs @@ -82,7 +82,7 @@ impl P2PManager { events_tx .send(P2PEvent::DiscoveredPeer { - peer_id: event.peer_id.clone(), + peer_id: event.peer_id, metadata: event.metadata.clone(), }) .map_err(|_| error!("Failed to send event to p2p event stream!")) diff --git a/core/src/volume.rs b/core/src/volume.rs index b6846420f..868e9b9ab 100644 --- a/core/src/volume.rs +++ b/core/src/volume.rs @@ -38,21 +38,22 @@ impl From for rspc::Error { } } -pub async fn save_volume(ctx: &Library) -> Result<(), VolumeError> { +pub async fn save_volume(library: &Library) -> Result<(), VolumeError> { let volumes = get_volumes()?; // enter all volumes associate with this client add to db for volume in volumes { - ctx.db + library + .db .volume() .upsert( node_id_mount_point_name( - ctx.node_local_id, + library.node_local_id, volume.mount_point.to_string(), volume.name.to_string(), ), ( - ctx.node_local_id, + library.node_local_id, volume.name, volume.mount_point, vec![ diff --git a/crates/crypto/src/primitives/types.rs b/crates/crypto/src/primitives/types.rs index a6034c36a..6f66481d5 100644 --- a/crates/crypto/src/primitives/types.rs +++ b/crates/crypto/src/primitives/types.rs @@ -195,7 +195,6 @@ impl From for SecretKeyString { c.to_string() } }) - .into_iter() .collect(); Self::new(hex_string) diff --git a/interface/app/$libraryId/location/$id.tsx b/interface/app/$libraryId/location/$id.tsx index fba3244df..dbb1a26e7 100644 --- a/interface/app/$libraryId/location/$id.tsx +++ b/interface/app/$libraryId/location/$id.tsx @@ -1,6 +1,6 @@ import { useEffect } from 'react'; import { useParams, useSearchParams } from 'react-router-dom'; -import { useLibraryQuery } from '@sd/client'; +import { useLibraryMutation, useLibraryQuery } from '@sd/client'; import { getExplorerStore } from '~/hooks/useExplorerStore'; import Explorer from '../Explorer'; @@ -16,11 +16,15 @@ export function useExplorerParams() { } export default () => { - const { location_id, path } = useExplorerParams(); + const { location_id, path, limit } = useExplorerParams(); + + const quickRescan = useLibraryMutation('locations.quickRescan'); + const explorerState = getExplorerStore(); useEffect(() => { - getExplorerStore().locationId = location_id; - }, [location_id]); + explorerState.locationId = location_id; + if (location_id !== null) quickRescan.mutate({ location_id, sub_path: path }); + }, [location_id, path]); if (location_id === null) throw new Error(`location_id is null!`); @@ -28,8 +32,8 @@ export default () => { 'locations.getExplorerData', { location_id, - path: path, - limit: 100, + path, + limit, cursor: null } ]); diff --git a/interface/app/$libraryId/settings/library/locations/$id.tsx b/interface/app/$libraryId/settings/library/locations/$id.tsx index 051ef7d66..a45ef7f72 100644 --- a/interface/app/$libraryId/settings/library/locations/$id.tsx +++ b/interface/app/$libraryId/settings/library/locations/$id.tsx @@ -36,7 +36,7 @@ export default function EditLocation() { form.reset({ displayName: data.name, localPath: data.path, - indexer_rules_ids: data.indexer_rules.map((i) => i.indexer_rule_id.toString()), + indexer_rules_ids: data.indexer_rules.map((i) => i.indexer_rule.id.toString()), generatePreviewMedia: data.generate_preview_media, syncPreviewMedia: data.sync_preview_media, hidden: data.hidden diff --git a/packages/client/src/core.ts b/packages/client/src/core.ts index fc1f35d73..5463cec94 100644 --- a/packages/client/src/core.ts +++ b/packages/client/src/core.ts @@ -17,7 +17,7 @@ export type Procedures = { { key: "keys.listMounted", input: LibraryArgs, result: string[] } | { key: "library.getStatistics", input: LibraryArgs, result: Statistics } | { key: "library.list", input: never, result: LibraryConfigWrapped[] } | - { key: "locations.getById", input: LibraryArgs, result: { id: number, pub_id: number[], node_id: number, name: string, path: string, total_capacity: number | null, available_capacity: number | null, is_archived: boolean, generate_preview_media: boolean, sync_preview_media: boolean, hidden: boolean, date_created: string, indexer_rules: IndexerRulesInLocation[] } | null } | + { key: "locations.getById", input: LibraryArgs, result: location_with_indexer_rules | null } | { key: "locations.getExplorerData", input: LibraryArgs, result: ExplorerData } | { key: "locations.indexer_rules.get", input: LibraryArgs, result: IndexerRule } | { key: "locations.indexer_rules.list", input: LibraryArgs, result: IndexerRule[] } | @@ -66,7 +66,7 @@ export type Procedures = { { key: "locations.fullRescan", input: LibraryArgs, result: null } | { key: "locations.indexer_rules.create", input: LibraryArgs, result: IndexerRule } | { key: "locations.indexer_rules.delete", input: LibraryArgs, result: null } | - { key: "locations.quickRescan", input: LibraryArgs, result: null } | + { key: "locations.quickRescan", input: LibraryArgs, result: null } | { key: "locations.relink", input: LibraryArgs, result: null } | { key: "locations.update", input: LibraryArgs, result: null } | { key: "nodes.tokenizeSensitiveKey", input: TokenizeKeyArgs, result: TokenizeResponse } | @@ -155,8 +155,6 @@ export type IndexerRule = { id: number, kind: number, name: string, parameters: */ export type IndexerRuleCreateArgs = { kind: RuleKind, name: string, parameters: number[] } -export type IndexerRulesInLocation = { date_created: string, location_id: number, indexer_rule_id: number } - export type InvalidateOperationEvent = { key: string, arg: any } export type JobReport = { id: string, name: string, data: number[] | null, metadata: any | null, date_created: string, date_modified: string, status: JobStatus, task_count: number, completed_task_count: number, message: string, seconds_elapsed: number } @@ -177,6 +175,8 @@ export type LibraryConfig = ({ version: string | null }) & { name: string, descr export type LibraryConfigWrapped = { uuid: string, config: LibraryConfig } +export type LightScanArgs = { location_id: number, sub_path: string } + export type Location = { id: number, pub_id: number[], node_id: number, name: string, path: string, total_capacity: number | null, available_capacity: number | null, is_archived: boolean, generate_preview_media: boolean, sync_preview_media: boolean, hidden: boolean, date_created: string } /** @@ -296,4 +296,6 @@ export type Volume = { name: string, mount_point: string, total_capacity: string export type file_path_with_object = { id: number, is_dir: boolean, cas_id: string | null, integrity_checksum: string | null, location_id: number, materialized_path: string, name: string, extension: string, object_id: number | null, parent_id: number | null, key_id: number | null, date_created: string, date_modified: string, date_indexed: string, object: Object | null } +export type location_with_indexer_rules = { id: number, pub_id: number[], node_id: number, name: string, path: string, total_capacity: number | null, available_capacity: number | null, is_archived: boolean, generate_preview_media: boolean, sync_preview_media: boolean, hidden: boolean, date_created: string, indexer_rules: { indexer_rule: IndexerRule }[] } + export type object_with_file_paths = { id: number, pub_id: number[], name: string | null, extension: string | null, kind: number, size_in_bytes: string, key_id: number | null, hidden: boolean, favorite: boolean, important: boolean, has_thumbnail: boolean, has_thumbstrip: boolean, has_video_preview: boolean, ipfs_id: string | null, note: string | null, date_created: string, date_modified: string, date_indexed: string, file_paths: FilePath[] }