[ENG-371] Subpath shallow re indexing (#599)

* Introducing sub path indexing for IndexerJob

* Introducing shallow indexer job

* Some feature flags to avoid warnings at pnpm prep

* Many fixes at shallow indexer job

* Changing how we implement the bookkeeping for file_path ids
Now we're account for independent locations, and also integrating
better with the LibraryContext, instead of using a static global

* Making indexer job reentrant

* Introducing shallow file identifier job
And a bunch of minor refactors

* Rust fmt

* Removing light scan from getExplorerData query
Light scan is a mutation, so we can call it on useEffect function from
the Explorer component in the frontend, when a location_id or the explorer
path changes

* Handling job early finish on init for identifier

* Only invalidate query if we have orphan paths

* Introducing ShalowThumbnailerJob

* Clippy warnings about `into_iter()`

* Naming scheme for Prisma's selects and includes

* Invalidating getExplorerData at thumbnailer

* Small mistakes lol

* Some nitpicks with pnpm prep

* Rust fmt

* Changing indexer's walk log to `trace!`

* Not deleting all file_paths on location fullRescan

* TS typecheck

* Removing `file_path` selection just with id
This commit is contained in:
Ericson "Fogo" Soares 2023-03-10 13:25:58 -03:00 committed by GitHub
parent 7c5f760fbd
commit 4a6b057872
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
40 changed files with 3112 additions and 1595 deletions

19
.vscode/tasks.json vendored
View file

@ -23,20 +23,11 @@
"type": "shell",
"label": "ui:dev",
"problemMatcher": {
"owner": "vite",
"fileLocation": "autoDetect",
"pattern": {
"regexp": "^([^\\s].*)\\((\\d+|\\d+,\\d+|\\d+,\\d+,\\d+,\\d+)\\):\\s+(error|warning|info)\\s+(TS\\d+)\\s*:\\s*(.*)$",
"file": 1,
"location": 2,
"severity": 3,
"code": 4,
"message": 5
},
"base": "$tsc-watch",
"background": {
"activeOnStart": true,
"beginsPattern": "^> @sd\\/root@\\d\\.\\d\\.\\d desktop",
"endsPattern": "to show help$"
"beginsPattern": "VITE v",
"endsPattern": "http://localhost:8001/"
}
},
"isBackground": true,
@ -56,7 +47,7 @@
{
"type": "cargo",
"command": "run",
"args": ["--package", "spacedrive", "--bin", "spacedrive"],
"args": ["--manifest-path=./apps/desktop/src-tauri/Cargo.toml", "--no-default-features"],
"env": {
"RUST_BACKTRACE": "short" // Change this if you want more or less backtrace
},
@ -68,7 +59,7 @@
{
"type": "cargo",
"command": "run",
"args": ["--package", "spacedrive", "--bin", "spacedrive", "--release"],
"args": ["--manifest-path=./apps/desktop/src-tauri/Cargo.toml", "--release"],
"env": {
"RUST_BACKTRACE": "short" // Change this if you want more or less backtrace
},

View file

@ -1,15 +1,14 @@
use crate::{
job::{Job, JobManager},
location::{fetch_location, LocationError},
location::{find_location, LocationError},
object::{
identifier_job::full_identifier_job::{FullFileIdentifierJob, FullFileIdentifierJobInit},
preview::{ThumbnailJob, ThumbnailJobInit},
file_identifier::file_identifier_job::{FileIdentifierJob, FileIdentifierJobInit},
preview::thumbnailer_job::{ThumbnailerJob, ThumbnailerJobInit},
validation::validator_job::{ObjectValidatorJob, ObjectValidatorJobInit},
},
prisma::location,
};
use rspc::{ErrorCode, Type};
use rspc::Type;
use serde::Deserialize;
use std::path::PathBuf;
@ -41,24 +40,18 @@ pub(crate) fn mount() -> RouterBuilder {
t(
|_, args: GenerateThumbsForLocationArgs, library| async move {
if library
.db
.location()
.count(vec![location::id::equals(args.id)])
.exec()
.await? == 0
{
let Some(location) = find_location(&library, args.id).exec().await? else {
return Err(LocationError::IdNotFound(args.id).into());
}
};
library
.spawn_job(Job::new(
ThumbnailJobInit {
location_id: args.id,
root_path: PathBuf::new(),
ThumbnailerJobInit {
location,
sub_path: Some(args.path),
background: false,
},
ThumbnailJob {},
ThumbnailerJob {},
))
.await;
@ -74,11 +67,8 @@ pub(crate) fn mount() -> RouterBuilder {
}
t(|_, args: ObjectValidatorArgs, library| async move {
if fetch_location(&library, args.id).exec().await?.is_none() {
return Err(rspc::Error::new(
ErrorCode::NotFound,
"Location not found".into(),
));
if find_location(&library, args.id).exec().await?.is_none() {
return Err(LocationError::IdNotFound(args.id).into());
}
library
@ -103,20 +93,17 @@ pub(crate) fn mount() -> RouterBuilder {
}
t(|_, args: IdentifyUniqueFilesArgs, library| async move {
if fetch_location(&library, args.id).exec().await?.is_none() {
return Err(rspc::Error::new(
ErrorCode::NotFound,
"Location not found".into(),
));
}
let Some(location) = find_location(&library, args.id).exec().await? else {
return Err(LocationError::IdNotFound(args.id).into());
};
library
.spawn_job(Job::new(
FullFileIdentifierJobInit {
location_id: args.id,
FileIdentifierJobInit {
location,
sub_path: Some(args.path),
},
FullFileIdentifierJob {},
FileIdentifierJob {},
))
.await;

View file

@ -1,9 +1,9 @@
use crate::{
library::Library,
location::{
delete_location, fetch_location,
indexer::{indexer_job::indexer_job_location, rules::IndexerRuleCreateArgs},
relink_location, scan_location, LocationCreateArgs, LocationError, LocationUpdateArgs,
delete_location, find_location, indexer::rules::IndexerRuleCreateArgs, light_scan_location,
location_with_indexer_rules, relink_location, scan_location, LocationCreateArgs,
LocationError, LocationUpdateArgs,
},
prisma::{file_path, indexer_rule, indexer_rules_in_location, location, object, tag},
};
@ -44,7 +44,6 @@ pub struct ExplorerData {
file_path::include!(file_path_with_object { object });
object::include!(object_with_file_paths { file_paths });
indexer_rules_in_location::include!(indexer_rules_in_location_with_rules { indexer_rule });
pub(crate) fn mount() -> impl RouterBuilderLike<Ctx> {
<RouterBuilder>::new()
@ -65,7 +64,7 @@ pub(crate) fn mount() -> impl RouterBuilderLike<Ctx> {
.db
.location()
.find_unique(location::id::equals(location_id))
.include(location::include!({ indexer_rules }))
.include(location_with_indexer_rules::include())
.exec()
.await?)
})
@ -82,14 +81,10 @@ pub(crate) fn mount() -> impl RouterBuilderLike<Ctx> {
t(|_, mut args: LocationExplorerArgs, library| async move {
let Library { db, .. } = &library;
let location = db
.location()
.find_unique(location::id::equals(args.location_id))
let location = find_location(&library, args.location_id)
.exec()
.await?
.ok_or_else(|| {
rspc::Error::new(ErrorCode::NotFound, "Location not found".into())
})?;
.ok_or(LocationError::IdNotFound(args.location_id))?;
if !args.path.ends_with('/') {
args.path += "/";
@ -177,18 +172,11 @@ pub(crate) fn mount() -> impl RouterBuilderLike<Ctx> {
})
.library_mutation("fullRescan", |t| {
t(|_, location_id: i32, library| async move {
// remove existing paths
library
.db
.file_path()
.delete_many(vec![file_path::location_id::equals(location_id)])
.exec()
.await?;
// rescan location
scan_location(
&library,
fetch_location(&library, location_id)
.include(indexer_job_location::include())
find_location(&library, location_id)
.include(location_with_indexer_rules::include())
.exec()
.await?
.ok_or(LocationError::IdNotFound(location_id))?,
@ -198,9 +186,25 @@ pub(crate) fn mount() -> impl RouterBuilderLike<Ctx> {
})
})
.library_mutation("quickRescan", |t| {
t(|_, _: (), _| async move {
#[allow(unreachable_code)]
Ok(todo!())
#[derive(Clone, Serialize, Deserialize, Type, Debug)]
pub struct LightScanArgs {
pub location_id: i32,
pub sub_path: String,
}
t(|_, args: LightScanArgs, library| async move {
// light rescan location
light_scan_location(
&library,
find_location(&library, args.location_id)
.include(location_with_indexer_rules::include())
.exec()
.await?
.ok_or(LocationError::IdNotFound(args.location_id))?,
&args.sub_path,
)
.await
.map_err(Into::into)
})
})
.subscription("online", |t| {

View file

@ -2,16 +2,27 @@ use crate::{
invalidate_query,
job::{worker::Worker, DynJob, Job, JobError},
library::Library,
location::indexer::indexer_job::{IndexerJob, INDEXER_JOB_NAME},
location::indexer::{
indexer_job::{IndexerJob, INDEXER_JOB_NAME},
shallow_indexer_job::{ShallowIndexerJob, SHALLOW_INDEXER_JOB_NAME},
},
object::{
file_identifier::{
file_identifier_job::{FileIdentifierJob, FILE_IDENTIFIER_JOB_NAME},
shallow_file_identifier_job::{
ShallowFileIdentifierJob, SHALLOW_FILE_IDENTIFIER_JOB_NAME,
},
},
fs::{
copy::{FileCopierJob, COPY_JOB_NAME},
cut::{FileCutterJob, CUT_JOB_NAME},
delete::{FileDeleterJob, DELETE_JOB_NAME},
erase::{FileEraserJob, ERASE_JOB_NAME},
},
identifier_job::full_identifier_job::{FullFileIdentifierJob, FULL_IDENTIFIER_JOB_NAME},
preview::{ThumbnailJob, THUMBNAIL_JOB_NAME},
preview::{
shallow_thumbnailer_job::{ShallowThumbnailerJob, SHALLOW_THUMBNAILER_JOB_NAME},
thumbnailer_job::{ThumbnailerJob, THUMBNAILER_JOB_NAME},
},
validation::validator_job::{ObjectValidatorJob, VALIDATOR_JOB_NAME},
},
prisma::{job, node},
@ -71,8 +82,8 @@ impl JobManager {
// FIXME: if this task crashes, the entire application is unusable
while let Some(event) = internal_receiver.recv().await {
match event {
JobManagerEvent::IngestJob(ctx, job) => {
this2.clone().dispatch_job(&ctx, job).await
JobManagerEvent::IngestJob(library, job) => {
this2.clone().dispatch_job(&library, job).await
}
}
}
@ -83,7 +94,7 @@ impl JobManager {
this
}
pub async fn ingest(self: Arc<Self>, ctx: &Library, job: Box<dyn DynJob>) {
pub async fn ingest(self: Arc<Self>, library: &Library, job: Box<dyn DynJob>) {
let job_hash = job.hash();
debug!(
"Ingesting job: <name='{}', hash='{}'>",
@ -93,7 +104,7 @@ impl JobManager {
if !self.current_jobs_hashes.read().await.contains(&job_hash) {
self.current_jobs_hashes.write().await.insert(job_hash);
self.dispatch_job(ctx, job).await;
self.dispatch_job(library, job).await;
} else {
debug!(
"Job already in queue: <name='{}', hash='{}'>",
@ -119,7 +130,7 @@ impl JobManager {
}
}
pub async fn complete(self: Arc<Self>, ctx: &Library, job_id: Uuid, job_hash: u64) {
pub async fn complete(self: Arc<Self>, library: &Library, job_id: Uuid, job_hash: u64) {
// remove worker from running workers and from current jobs hashes
self.current_jobs_hashes.write().await.remove(&job_hash);
self.running_workers.write().await.remove(&job_id);
@ -128,7 +139,7 @@ impl JobManager {
if let Some(job) = job {
// We can't directly execute `self.ingest` here because it would cause an async cycle.
self.internal_sender
.send(JobManagerEvent::IngestJob(ctx.clone(), job))
.send(JobManagerEvent::IngestJob(library.clone(), job))
.unwrap_or_else(|_| {
error!("Failed to ingest job!");
});
@ -146,9 +157,9 @@ impl JobManager {
}
pub async fn get_history(
ctx: &Library,
library: &Library,
) -> Result<Vec<JobReport>, prisma_client_rust::QueryError> {
Ok(ctx
Ok(library
.db
.job()
.find_many(vec![job::status::not(JobStatus::Running.int_value())])
@ -161,10 +172,10 @@ impl JobManager {
.collect())
}
pub async fn clear_all_jobs(ctx: &Library) -> Result<(), prisma_client_rust::QueryError> {
ctx.db.job().delete_many(vec![]).exec().await?;
pub async fn clear_all_jobs(library: &Library) -> Result<(), prisma_client_rust::QueryError> {
library.db.job().delete_many(vec![]).exec().await?;
invalidate_query!(ctx, "jobs.getHistory");
invalidate_query!(library, "jobs.getHistory");
Ok(())
}
@ -190,8 +201,8 @@ impl JobManager {
}
}
pub async fn resume_jobs(self: Arc<Self>, ctx: &Library) -> Result<(), JobError> {
let paused_jobs = ctx
pub async fn resume_jobs(self: Arc<Self>, library: &Library) -> Result<(), JobError> {
let paused_jobs = library
.db
.job()
.find_many(vec![job::status::equals(JobStatus::Paused.int_value())])
@ -203,47 +214,65 @@ impl JobManager {
info!("Resuming job: {}, id: {}", paused_job.name, paused_job.id);
match paused_job.name.as_str() {
THUMBNAIL_JOB_NAME => {
THUMBNAILER_JOB_NAME => {
Arc::clone(&self)
.dispatch_job(ctx, Job::resume(paused_job, ThumbnailJob {})?)
.dispatch_job(library, Job::resume(paused_job, ThumbnailerJob {})?)
.await;
}
SHALLOW_THUMBNAILER_JOB_NAME => {
Arc::clone(&self)
.dispatch_job(library, Job::resume(paused_job, ShallowThumbnailerJob {})?)
.await;
}
INDEXER_JOB_NAME => {
Arc::clone(&self)
.dispatch_job(ctx, Job::resume(paused_job, IndexerJob {})?)
.dispatch_job(library, Job::resume(paused_job, IndexerJob {})?)
.await;
}
FULL_IDENTIFIER_JOB_NAME => {
SHALLOW_INDEXER_JOB_NAME => {
Arc::clone(&self)
.dispatch_job(ctx, Job::resume(paused_job, FullFileIdentifierJob {})?)
.dispatch_job(library, Job::resume(paused_job, ShallowIndexerJob {})?)
.await;
}
FILE_IDENTIFIER_JOB_NAME => {
Arc::clone(&self)
.dispatch_job(library, Job::resume(paused_job, FileIdentifierJob {})?)
.await;
}
SHALLOW_FILE_IDENTIFIER_JOB_NAME => {
Arc::clone(&self)
.dispatch_job(
library,
Job::resume(paused_job, ShallowFileIdentifierJob {})?,
)
.await;
}
VALIDATOR_JOB_NAME => {
Arc::clone(&self)
.dispatch_job(ctx, Job::resume(paused_job, ObjectValidatorJob {})?)
.dispatch_job(library, Job::resume(paused_job, ObjectValidatorJob {})?)
.await;
}
CUT_JOB_NAME => {
Arc::clone(&self)
.dispatch_job(ctx, Job::resume(paused_job, FileCutterJob {})?)
.dispatch_job(library, Job::resume(paused_job, FileCutterJob {})?)
.await;
}
COPY_JOB_NAME => {
Arc::clone(&self)
.dispatch_job(
ctx,
library,
Job::resume(paused_job, FileCopierJob { done_tx: None })?,
)
.await;
}
DELETE_JOB_NAME => {
Arc::clone(&self)
.dispatch_job(ctx, Job::resume(paused_job, FileDeleterJob {})?)
.dispatch_job(library, Job::resume(paused_job, FileDeleterJob {})?)
.await;
}
ERASE_JOB_NAME => {
Arc::clone(&self)
.dispatch_job(ctx, Job::resume(paused_job, FileEraserJob {})?)
.dispatch_job(library, Job::resume(paused_job, FileEraserJob {})?)
.await;
}
_ => {
@ -259,7 +288,7 @@ impl JobManager {
Ok(())
}
async fn dispatch_job(self: Arc<Self>, ctx: &Library, mut job: Box<dyn DynJob>) {
async fn dispatch_job(self: Arc<Self>, library: &Library, mut job: Box<dyn DynJob>) {
// create worker to process job
let mut running_workers = self.running_workers.write().await;
if running_workers.len() < MAX_WORKERS {
@ -276,8 +305,12 @@ impl JobManager {
let wrapped_worker = Arc::new(Mutex::new(worker));
if let Err(e) =
Worker::spawn(Arc::clone(&self), Arc::clone(&wrapped_worker), ctx.clone()).await
if let Err(e) = Worker::spawn(
Arc::clone(&self),
Arc::clone(&wrapped_worker),
library.clone(),
)
.await
{
error!("Error spawning worker: {:?}", e);
} else {
@ -375,22 +408,24 @@ impl JobReport {
}
}
pub async fn create(&self, ctx: &Library) -> Result<(), JobError> {
ctx.db
pub async fn create(&self, library: &Library) -> Result<(), JobError> {
library
.db
.job()
.create(
self.id.as_bytes().to_vec(),
self.name.clone(),
JobStatus::Running as i32,
node::id::equals(ctx.node_local_id),
node::id::equals(library.node_local_id),
vec![job::data::set(self.data.clone())],
)
.exec()
.await?;
Ok(())
}
pub async fn update(&self, ctx: &Library) -> Result<(), JobError> {
ctx.db
pub async fn update(&self, library: &Library) -> Result<(), JobError> {
library
.db
.job()
.update(
job::id::equals(self.id.as_bytes().to_vec()),

View file

@ -1,6 +1,6 @@
use crate::{
location::{indexer::IndexerError, LocationError, LocationManagerError},
object::{identifier_job::IdentifierJobError, preview::ThumbnailError},
object::{file_identifier::FileIdentifierJobError, preview::ThumbnailerError},
};
use std::{
@ -13,7 +13,7 @@ use rmp_serde::{decode::Error as DecodeError, encode::Error as EncodeError};
use sd_crypto::Error as CryptoError;
use serde::{de::DeserializeOwned, Deserialize, Serialize};
use thiserror::Error;
use tracing::warn;
use tracing::info;
use uuid::Uuid;
mod job_manager;
@ -57,10 +57,10 @@ pub enum JobError {
IndexerError(#[from] IndexerError),
#[error("Location error: {0}")]
LocationError(#[from] LocationError),
#[error("Thumbnail error: {0}")]
ThumbnailError(#[from] ThumbnailError),
#[error("Thumbnailer error: {0}")]
ThumbnailError(#[from] ThumbnailerError),
#[error("Identifier error: {0}")]
IdentifierError(#[from] IdentifierJobError),
IdentifierError(#[from] FileIdentifierJobError),
#[error("Crypto error: {0}")]
CryptoError(#[from] CryptoError),
@ -172,23 +172,32 @@ impl<State: StatefulJob> DynJob for Job<State> {
}
async fn run(&mut self, ctx: WorkerContext) -> JobResult {
let mut job_should_run = true;
// Checking if we have a brand new job, or if we are resuming an old one.
if self.state.data.is_none() {
self.stateful_job.init(ctx.clone(), &mut self.state).await?;
if let Err(e) = self.stateful_job.init(ctx.clone(), &mut self.state).await {
if matches!(e, JobError::EarlyFinish { .. }) {
info!("{e}");
job_should_run = false;
} else {
return Err(e);
}
}
}
let mut shutdown_rx = ctx.shutdown_rx();
let shutdown_rx_fut = shutdown_rx.recv();
tokio::pin!(shutdown_rx_fut);
while !self.state.steps.is_empty() {
while job_should_run && !self.state.steps.is_empty() {
tokio::select! {
step_result = self.stateful_job.execute_step(
ctx.clone(),
&mut self.state,
) => {
if matches!(step_result, Err(JobError::EarlyFinish { .. })) {
warn!("{}", step_result.unwrap_err());
info!("{}", step_result.unwrap_err());
break;
} else {
step_result?;

View file

@ -1,6 +1,11 @@
use crate::{
api::CoreEvent, job::DynJob, location::LocationManager, node::NodeConfigManager,
object::preview::THUMBNAIL_CACHE_DIR_NAME, prisma::PrismaClient, sync::SyncManager,
api::CoreEvent,
job::DynJob,
location::{file_path_helper::LastFilePathIdManager, LocationManager},
node::NodeConfigManager,
object::preview::THUMBNAIL_CACHE_DIR_NAME,
prisma::PrismaClient,
sync::SyncManager,
NodeContext,
};
@ -29,6 +34,8 @@ pub struct Library {
pub sync: Arc<SyncManager>,
/// key manager that provides encryption keys to functions that require them
pub key_manager: Arc<KeyManager>,
/// last id by location keeps track of the last id by location for the library
pub last_file_path_id_manager: Arc<LastFilePathIdManager>,
/// node_local_id holds the local ID of the node which is running the library.
pub node_local_id: i32,
/// node_context holds the node context for the node which this library is running on.

View file

@ -1,5 +1,6 @@
use crate::{
invalidate_query,
location::file_path_helper::LastFilePathIdManager,
node::Platform,
prisma::{node, PrismaClient},
sync::SyncManager,
@ -131,7 +132,6 @@ impl LibraryManager {
let mut libraries = Vec::new();
for entry in fs::read_dir(&libraries_dir)?
.into_iter()
.filter_map(|entry| entry.ok())
.filter(|entry| {
entry.path().is_file()
@ -346,6 +346,7 @@ impl LibraryManager {
key_manager,
sync: Arc::new(sync_manager),
db,
last_file_path_id_manager: Arc::new(LastFilePathIdManager::new()),
node_local_id: node_data.id,
node_context,
})

View file

@ -7,7 +7,7 @@ use thiserror::Error;
use tokio::io;
use uuid::Uuid;
use super::metadata::LocationMetadataError;
use super::{file_path_helper::FilePathError, metadata::LocationMetadataError};
/// Error type for location related errors
#[derive(Error, Debug)]
@ -58,6 +58,8 @@ pub enum LocationError {
DatabaseError(#[from] prisma_client_rust::QueryError),
#[error("Location manager error (error: {0:?})")]
LocationManagerError(#[from] LocationManagerError),
#[error("File path related error (error: {0})")]
FilePathError(#[from] FilePathError),
}
impl From<LocationError> for rspc::Error {

View file

@ -1,82 +1,491 @@
use crate::{library::Library, prisma::file_path};
use crate::prisma::{
file_path::{self, FindMany},
location, PrismaClient,
};
use std::sync::atomic::{AtomicI32, Ordering};
use std::{
fmt::{Display, Formatter},
path::{Path, PathBuf},
};
use dashmap::{mapref::entry::Entry, DashMap};
use futures::future::try_join_all;
use prisma_client_rust::{Direction, QueryError};
use serde::{Deserialize, Serialize};
use thiserror::Error;
use tokio::{fs, io};
use tracing::error;
static LAST_FILE_PATH_ID: AtomicI32 = AtomicI32::new(0);
use super::LocationId;
file_path::select!(file_path_id_only { id });
// File Path selectables!
file_path::select!(file_path_just_id_materialized_path {
id
materialized_path
});
file_path::select!(file_path_for_file_identifier {
id
materialized_path
date_created
});
file_path::select!(file_path_just_object_id { object_id });
file_path::select!(file_path_for_object_validator {
id
materialized_path
integrity_checksum
location: select {
id
pub_id
}
});
file_path::select!(file_path_just_materialized_path_cas_id {
materialized_path
cas_id
});
pub async fn get_max_file_path_id(library: &Library) -> Result<i32, QueryError> {
let mut last_id = LAST_FILE_PATH_ID.load(Ordering::Acquire);
if last_id == 0 {
last_id = fetch_max_file_path_id(library).await?;
LAST_FILE_PATH_ID.store(last_id, Ordering::Release);
// File Path includes!
file_path::include!(file_path_with_object { object });
#[derive(Serialize, Deserialize, Clone, Debug)]
pub struct MaterializedPath {
pub(super) materialized_path: String,
pub(super) is_dir: bool,
pub(super) location_id: LocationId,
pub(super) name: String,
pub(super) extension: String,
}
impl MaterializedPath {
pub fn new(
location_id: LocationId,
location_path: impl AsRef<Path>,
full_path: impl AsRef<Path>,
is_dir: bool,
) -> Result<Self, FilePathError> {
let full_path = full_path.as_ref();
let mut materialized_path =
extract_materialized_path(location_id, location_path, full_path)?
.to_str()
.expect("Found non-UTF-8 path")
.to_string();
if is_dir && !materialized_path.ends_with('/') {
materialized_path += "/";
}
let extension = if !is_dir {
let extension = full_path
.extension()
.unwrap_or_default()
.to_str()
.unwrap_or_default();
#[cfg(debug_assertions)]
{
// In dev mode, we lowercase the extension as we don't use the SQL migration,
// and using prisma.schema directly we can't set `COLLATE NOCASE` in the
// `extension` column at `file_path` table
extension.to_lowercase()
}
#[cfg(not(debug_assertions))]
{
extension.to_string()
}
} else {
String::new()
};
Ok(Self {
materialized_path,
is_dir,
location_id,
name: Self::prepare_name(full_path),
extension,
})
}
Ok(last_id)
pub fn location_id(&self) -> LocationId {
self.location_id
}
fn prepare_name(path: &Path) -> String {
// Not using `impl AsRef<Path>` here because it's an private method
path.file_name()
.unwrap_or_default()
.to_str()
.unwrap_or_default()
.to_string()
}
pub fn parent(&self) -> Self {
let parent_path = Path::new(&self.materialized_path)
.parent()
.unwrap_or_else(|| Path::new("/"));
let mut parent_path_str = parent_path
.to_str()
.unwrap() // SAFETY: This unwrap is ok because this path was a valid UTF-8 String before
.to_string();
if !parent_path_str.ends_with('/') {
parent_path_str += "/";
}
Self {
materialized_path: parent_path_str,
is_dir: true,
location_id: self.location_id,
// NOTE: This way we don't use the same name for "/" `file_path`, that uses the location
// name in the database, check later if this is a problem
name: Self::prepare_name(parent_path),
extension: String::new(),
}
}
}
pub fn set_max_file_path_id(id: i32) {
LAST_FILE_PATH_ID.store(id, Ordering::Relaxed);
impl From<MaterializedPath> for String {
fn from(path: MaterializedPath) -> Self {
path.materialized_path
}
}
async fn fetch_max_file_path_id(library: &Library) -> Result<i32, QueryError> {
Ok(library
.db
.file_path()
.find_first(vec![])
.order_by(file_path::id::order(Direction::Desc))
.select(file_path_id_only::select())
impl From<&MaterializedPath> for String {
fn from(path: &MaterializedPath) -> Self {
path.materialized_path.clone()
}
}
impl AsRef<str> for MaterializedPath {
fn as_ref(&self) -> &str {
self.materialized_path.as_ref()
}
}
impl AsRef<Path> for MaterializedPath {
fn as_ref(&self) -> &Path {
Path::new(&self.materialized_path)
}
}
impl Display for MaterializedPath {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.materialized_path)
}
}
#[derive(Error, Debug)]
pub enum FilePathError {
#[error("Received an invalid sub path: <location_path={location_path}, sub_path={sub_path}>")]
InvalidSubPath {
location_path: PathBuf,
sub_path: PathBuf,
},
#[error("Sub path is not a directory: {0}")]
SubPathNotDirectory(PathBuf),
#[error("The parent directory of the received sub path isn't indexed in the location: <id={location_id}, sub_path={sub_path}>")]
SubPathParentNotInLocation {
location_id: LocationId,
sub_path: PathBuf,
},
#[error("Unable to extract materialized path from location: <id='{0}', path='{1:?}'>")]
UnableToExtractMaterializedPath(LocationId, PathBuf),
#[error("Database error (error: {0:?})")]
DatabaseError(#[from] QueryError),
#[error("Database error (error: {0:?})")]
IOError(#[from] io::Error),
}
#[derive(Debug)]
pub struct LastFilePathIdManager {
last_id_by_location: DashMap<LocationId, i32>,
}
impl Default for LastFilePathIdManager {
fn default() -> Self {
Self {
last_id_by_location: DashMap::with_capacity(4),
}
}
}
impl LastFilePathIdManager {
pub fn new() -> Self {
Default::default()
}
pub async fn get_max_file_path_id(
&self,
location_id: LocationId,
db: &PrismaClient,
) -> Result<i32, FilePathError> {
Ok(match self.last_id_by_location.entry(location_id) {
Entry::Occupied(entry) => *entry.get(),
Entry::Vacant(entry) => {
// I wish I could use `or_try_insert_with` method instead of this crappy match,
// but we don't have async closures yet ):
let id = Self::fetch_max_file_path_id(location_id, db).await?;
entry.insert(id);
id
}
})
}
pub async fn set_max_file_path_id(&self, location_id: LocationId, id: i32) {
self.last_id_by_location.insert(location_id, id);
}
async fn fetch_max_file_path_id(
location_id: LocationId,
db: &PrismaClient,
) -> Result<i32, FilePathError> {
Ok(db
.file_path()
.find_first(vec![file_path::location_id::equals(location_id)])
.order_by(file_path::id::order(Direction::Desc))
.select(file_path::select!({ id }))
.exec()
.await?
.map(|r| r.id)
.unwrap_or(0))
}
#[cfg(feature = "location-watcher")]
pub async fn create_file_path(
&self,
db: &PrismaClient,
MaterializedPath {
materialized_path,
is_dir,
location_id,
name,
extension,
}: MaterializedPath,
parent_id: Option<i32>,
) -> Result<file_path::Data, FilePathError> {
// Keeping a reference in that map for the entire duration of the function, so we keep it locked
let mut last_id_ref = match self.last_id_by_location.entry(location_id) {
Entry::Occupied(ocupied) => ocupied.into_ref(),
Entry::Vacant(vacant) => {
let id = Self::fetch_max_file_path_id(location_id, db).await?;
vacant.insert(id)
}
};
let next_id = *last_id_ref + 1;
let created_path = db
.file_path()
.create(
next_id,
location::id::equals(location_id),
materialized_path,
name,
extension,
vec![
file_path::parent_id::set(parent_id),
file_path::is_dir::set(is_dir),
],
)
.exec()
.await?;
*last_id_ref = next_id;
Ok(created_path)
}
}
pub fn subtract_location_path(
location_path: impl AsRef<Path>,
current_path: impl AsRef<Path>,
) -> Option<PathBuf> {
let location_path = location_path.as_ref();
let current_path = current_path.as_ref();
if let Ok(stripped) = current_path.strip_prefix(location_path) {
Some(stripped.to_path_buf())
} else {
error!(
"Failed to strip location root path ({}) from current path ({})",
location_path.display(),
current_path.display()
);
None
}
}
pub fn extract_materialized_path(
location_id: LocationId,
location_path: impl AsRef<Path>,
path: impl AsRef<Path>,
) -> Result<PathBuf, FilePathError> {
subtract_location_path(location_path, &path).ok_or_else(|| {
FilePathError::UnableToExtractMaterializedPath(location_id, path.as_ref().to_path_buf())
})
}
pub async fn find_many_file_paths_by_full_path<'db>(
location: &location::Data,
full_paths: &[impl AsRef<Path>],
db: &'db PrismaClient,
) -> Result<FindMany<'db>, FilePathError> {
let is_dirs = try_join_all(
full_paths
.iter()
.map(|path| async move { fs::metadata(path).await.map(|metadata| metadata.is_dir()) }),
)
.await?;
let materialized_paths = full_paths
.iter()
.zip(is_dirs.into_iter())
.map(|(path, is_dir)| {
MaterializedPath::new(location.id, &location.path, path, is_dir).map(Into::into)
})
// Collecting in a Result, so we stop on the first error
.collect::<Result<Vec<_>, _>>()?;
Ok(db.file_path().find_many(vec![
file_path::location_id::equals(location.id),
file_path::materialized_path::in_vec(materialized_paths),
]))
}
pub async fn get_existing_file_path_id(
materialized_path: MaterializedPath,
db: &PrismaClient,
) -> Result<Option<i32>, FilePathError> {
db.file_path()
.find_first(vec![
file_path::location_id::equals(materialized_path.location_id),
file_path::materialized_path::equals(materialized_path.into()),
])
.select(file_path::select!({ id }))
.exec()
.await?
.map(|r| r.id)
.unwrap_or(0))
.await
.map_or_else(|e| Err(e.into()), |r| Ok(r.map(|r| r.id)))
}
#[cfg(feature = "location-watcher")]
pub async fn create_file_path(
library: &Library,
location_id: i32,
mut materialized_path: String,
name: String,
extension: String,
parent_id: Option<i32>,
is_dir: bool,
) -> Result<file_path::Data, QueryError> {
use crate::prisma::location;
let mut last_id = LAST_FILE_PATH_ID.load(Ordering::Acquire);
if last_id == 0 {
last_id = fetch_max_file_path_id(library).await?;
}
// If this new file_path is a directory, materialized_path must end with "/"
if is_dir && !materialized_path.ends_with('/') {
materialized_path += "/";
}
let next_id = last_id + 1;
let created_path = library
.db
.file_path()
.create(
next_id,
location::id::equals(location_id),
materialized_path,
name,
extension,
vec![
file_path::parent_id::set(parent_id),
file_path::is_dir::set(is_dir),
],
)
pub async fn get_existing_file_path(
materialized_path: MaterializedPath,
db: &PrismaClient,
) -> Result<Option<file_path::Data>, FilePathError> {
db.file_path()
.find_first(vec![
file_path::location_id::equals(materialized_path.location_id),
file_path::materialized_path::equals(materialized_path.into()),
])
.exec()
.await?;
LAST_FILE_PATH_ID.store(next_id, Ordering::Release);
Ok(created_path)
.await
.map_err(Into::into)
}
#[cfg(feature = "location-watcher")]
pub async fn get_existing_file_path_with_object(
materialized_path: MaterializedPath,
db: &PrismaClient,
) -> Result<Option<file_path_with_object::Data>, FilePathError> {
db.file_path()
.find_first(vec![
file_path::location_id::equals(materialized_path.location_id),
file_path::materialized_path::equals(materialized_path.into()),
])
// include object for orphan check
.include(file_path_with_object::include())
.exec()
.await
.map_err(Into::into)
}
#[cfg(feature = "location-watcher")]
pub async fn get_existing_file_or_directory(
location: &super::location_with_indexer_rules::Data,
path: impl AsRef<Path>,
db: &PrismaClient,
) -> Result<Option<file_path_with_object::Data>, FilePathError> {
let mut maybe_file_path = get_existing_file_path_with_object(
MaterializedPath::new(location.id, &location.path, path.as_ref(), false)?,
db,
)
.await?;
// First we just check if this path was a file in our db, if it isn't then we check for a directory
if maybe_file_path.is_none() {
maybe_file_path = get_existing_file_path_with_object(
MaterializedPath::new(location.id, &location.path, path.as_ref(), true)?,
db,
)
.await?;
}
Ok(maybe_file_path)
}
#[cfg(feature = "location-watcher")]
pub async fn get_parent_dir(
materialized_path: &MaterializedPath,
db: &PrismaClient,
) -> Result<Option<file_path::Data>, FilePathError> {
get_existing_file_path(materialized_path.parent(), db).await
}
pub async fn ensure_sub_path_is_in_location(
location_path: impl AsRef<Path>,
sub_path: impl AsRef<Path>,
) -> Result<PathBuf, FilePathError> {
let sub_path = sub_path.as_ref();
let location_path = location_path.as_ref();
if !sub_path.starts_with(location_path) {
// If the sub_path doesn't start with the location_path, we have to check if it's a
// materialized path received from the frontend, then we check if the full path exists
let full_path = location_path.join(sub_path);
match fs::metadata(&full_path).await {
Ok(_) => Ok(full_path),
Err(e) if e.kind() == io::ErrorKind::NotFound => Err(FilePathError::InvalidSubPath {
sub_path: sub_path.to_path_buf(),
location_path: location_path.to_path_buf(),
}),
Err(e) => Err(e.into()),
}
} else {
Ok(sub_path.to_path_buf())
}
}
pub async fn ensure_sub_path_is_directory(
location_path: impl AsRef<Path>,
sub_path: impl AsRef<Path>,
) -> Result<(), FilePathError> {
let sub_path = sub_path.as_ref();
let location_path = location_path.as_ref();
match fs::metadata(sub_path).await {
Ok(meta) => {
if meta.is_file() {
Err(FilePathError::SubPathNotDirectory(sub_path.to_path_buf()))
} else {
Ok(())
}
}
Err(e) if e.kind() == io::ErrorKind::NotFound => {
match fs::metadata(location_path.join(sub_path)).await {
Ok(meta) => {
if meta.is_file() {
Err(FilePathError::SubPathNotDirectory(sub_path.to_path_buf()))
} else {
Ok(())
}
}
Err(e) if e.kind() == io::ErrorKind::NotFound => {
Err(FilePathError::InvalidSubPath {
sub_path: sub_path.to_path_buf(),
location_path: location_path.to_path_buf(),
})
}
Err(e) => Err(e.into()),
}
}
Err(e) => Err(e.into()),
}
}

View file

@ -1,103 +1,38 @@
use crate::{
job::{JobError, JobReportUpdate, JobResult, JobState, StatefulJob, WorkerContext},
job::{JobError, JobResult, JobState, StatefulJob, WorkerContext},
library::Library,
location::indexer::rules::RuleKind,
prisma::{file_path, location},
sync,
location::file_path_helper::{
ensure_sub_path_is_directory, ensure_sub_path_is_in_location,
file_path_just_id_materialized_path, find_many_file_paths_by_full_path,
get_existing_file_path_id, MaterializedPath,
},
prisma::location,
};
use std::{
collections::HashMap,
ffi::OsStr,
hash::{Hash, Hasher},
path::PathBuf,
time::Duration,
};
use std::{collections::HashMap, path::Path};
use chrono::{DateTime, Utc};
use chrono::Utc;
use itertools::Itertools;
use serde::{Deserialize, Serialize};
use serde_json::json;
use tokio::time::Instant;
use tracing::info;
use tracing::error;
use super::{
super::file_path_helper::{get_max_file_path_id, set_max_file_path_id},
rules::IndexerRule,
walk::{walk, WalkEntry},
execute_indexer_step, finalize_indexer,
rules::{IndexerRule, RuleKind},
walk::walk,
IndexerError, IndexerJobData, IndexerJobInit, IndexerJobStep, IndexerJobStepEntry,
ScanProgress,
};
/// BATCH_SIZE is the number of files to index at each step, writing the chunk of files metadata in the database.
const BATCH_SIZE: usize = 1000;
pub const INDEXER_JOB_NAME: &str = "indexer";
#[derive(Clone)]
pub enum ScanProgress {
ChunkCount(usize),
SavedChunks(usize),
Message(String),
}
/// A `IndexerJob` is a stateful job that walks a directory and indexes all files.
/// First it walks the directory and generates a list of files to index, chunked into
/// batches of [`BATCH_SIZE`]. Then for each chunk it write the file metadata to the database.
pub struct IndexerJob;
location::include!(indexer_job_location {
indexer_rules: select { indexer_rule }
});
/// `IndexerJobInit` receives a `location::Data` object to be indexed
#[derive(Serialize, Deserialize)]
pub struct IndexerJobInit {
pub location: indexer_job_location::Data,
}
impl Hash for IndexerJobInit {
fn hash<H: Hasher>(&self, state: &mut H) {
self.location.id.hash(state);
}
}
/// `IndexerJobData` contains the state of the indexer job, which includes a `location_path` that
/// is cached and casted on `PathBuf` from `local_path` column in the `location` table. It also
/// contains some metadata for logging purposes.
#[derive(Serialize, Deserialize)]
pub struct IndexerJobData {
db_write_start: DateTime<Utc>,
scan_read_time: Duration,
total_paths: usize,
}
/// `IndexerJobStep` is a type alias, specifying that each step of the [`IndexerJob`] is a vector of
/// `IndexerJobStepEntry`. The size of this vector is given by the [`BATCH_SIZE`] constant.
pub type IndexerJobStep = Vec<IndexerJobStepEntry>;
/// `IndexerJobStepEntry` represents a single file to be indexed, given its metadata to be written
/// on the `file_path` table in the database
#[derive(Serialize, Deserialize)]
pub struct IndexerJobStepEntry {
path: PathBuf,
created_at: DateTime<Utc>,
file_id: i32,
parent_id: Option<i32>,
is_dir: bool,
}
impl IndexerJobData {
fn on_scan_progress(ctx: WorkerContext, progress: Vec<ScanProgress>) {
ctx.progress_debounced(
progress
.iter()
.map(|p| match p.clone() {
ScanProgress::ChunkCount(c) => JobReportUpdate::TaskCount(c),
ScanProgress::SavedChunks(p) => JobReportUpdate::CompletedTaskCount(p),
ScanProgress::Message(m) => JobReportUpdate::Message(m),
})
.collect(),
)
}
}
#[async_trait::async_trait]
impl StatefulJob for IndexerJob {
type Init = IndexerJobInit;
@ -110,8 +45,21 @@ impl StatefulJob for IndexerJob {
/// Creates a vector of valid path buffers from a directory, chunked into batches of `BATCH_SIZE`.
async fn init(&self, ctx: WorkerContext, state: &mut JobState<Self>) -> Result<(), JobError> {
let Library {
last_file_path_id_manager,
db,
..
} = &ctx.library;
let location_id = state.init.location.id;
let location_path = Path::new(&state.init.location.path);
// grab the next id so we can increment in memory for batch inserting
let first_file_id = get_max_file_path_id(&ctx.library).await?;
let first_file_id = last_file_path_id_manager
.get_max_file_path_id(location_id, db)
.await
.map_err(IndexerError::from)?
+ 1;
let mut indexer_rules_by_kind: HashMap<RuleKind, Vec<IndexerRule>> =
HashMap::with_capacity(state.init.location.indexer_rules.len());
@ -124,70 +72,142 @@ impl StatefulJob for IndexerJob {
.push(indexer_rule);
}
let mut dirs_ids = HashMap::new();
let to_walk_path = if let Some(ref sub_path) = state.init.sub_path {
let full_path = ensure_sub_path_is_in_location(location_path, sub_path)
.await
.map_err(IndexerError::from)?;
ensure_sub_path_is_directory(location_path, sub_path)
.await
.map_err(IndexerError::from)?;
let sub_path_file_path_id = get_existing_file_path_id(
MaterializedPath::new(location_id, location_path, &full_path, true)
.map_err(IndexerError::from)?,
db,
)
.await
.map_err(IndexerError::from)?
.expect("Sub path should already exist in the database");
// If we're operating with a sub_path, then we have to put its id on `dirs_ids` map
dirs_ids.insert(full_path.clone(), sub_path_file_path_id);
full_path
} else {
location_path.to_path_buf()
};
let scan_start = Instant::now();
let inner_ctx = ctx.clone();
let paths = walk(
&state.init.location.path,
let found_paths = walk(
to_walk_path,
&indexer_rules_by_kind,
move |path, total_entries| {
|path, total_entries| {
IndexerJobData::on_scan_progress(
inner_ctx.clone(),
&ctx,
vec![
ScanProgress::Message(format!("Scanning {}", path.display())),
ScanProgress::ChunkCount(total_entries / BATCH_SIZE),
],
);
},
// if we're not using a sub_path, then its a full indexing and we must include root dir
state.init.sub_path.is_none(),
)
.await?;
let total_paths = paths.len();
let last_file_id = first_file_id + total_paths as i32;
// Setting our global state for file_path ids
set_max_file_path_id(last_file_id);
let mut dirs_ids = HashMap::new();
let paths_entries = paths
.into_iter()
.zip(first_file_id..last_file_id)
.map(
|(
WalkEntry {
path,
is_dir,
created_at,
},
file_id,
)| {
let parent_id = if let Some(parent_dir) = path.parent() {
dirs_ids.get(parent_dir).copied()
} else {
None
};
dirs_ids.insert(path.clone(), file_id);
IndexerJobStepEntry {
path,
created_at,
file_id,
parent_id,
is_dir,
}
},
dirs_ids.extend(
find_many_file_paths_by_full_path(
&location::Data::from(&state.init.location),
&found_paths
.iter()
.map(|entry| &entry.path)
.collect::<Vec<_>>(),
db,
)
.await
.map_err(IndexerError::from)?
.select(file_path_just_id_materialized_path::select())
.exec()
.await?
.into_iter()
.map(|file_path| {
(
location_path.join(file_path.materialized_path),
file_path.id,
)
}),
);
let mut new_paths = found_paths
.into_iter()
.filter_map(|entry| {
MaterializedPath::new(
location_id,
&state.init.location.path,
&entry.path,
entry.is_dir,
)
.map_or_else(
|e| {
error!("Failed to create materialized path: {e}");
None
},
|materialized_path| {
(!dirs_ids.contains_key(&entry.path)).then(|| {
IndexerJobStepEntry {
materialized_path,
created_at: entry.created_at,
file_id: 0, // To be set later
parent_id: entry.path.parent().and_then(|parent_dir| {
/***************************************************************
* If we're dealing with a new path which its parent already *
* exist, we fetch its parent id from our `dirs_ids` map *
**************************************************************/
dirs_ids.get(parent_dir).copied()
}),
full_path: entry.path,
}
})
},
)
})
.collect::<Vec<_>>();
let total_entries = paths_entries.len();
let total_paths = new_paths.len();
let last_file_id = first_file_id + total_paths as i32;
// Setting our global state for `file_path` ids
last_file_path_id_manager
.set_max_file_path_id(location_id, last_file_id)
.await;
new_paths
.iter_mut()
.zip(first_file_id..last_file_id)
.for_each(|(entry, file_id)| {
// If the `parent_id` is still none here, is because the parent of this entry is also
// a new one in the DB
if entry.parent_id.is_none() {
entry.parent_id = entry
.full_path
.parent()
.and_then(|parent_dir| dirs_ids.get(parent_dir).copied());
}
entry.file_id = file_id;
dirs_ids.insert(entry.full_path.clone(), file_id);
});
state.data = Some(IndexerJobData {
db_write_start: Utc::now(),
scan_read_time: scan_start.elapsed(),
total_paths: total_entries,
total_paths,
indexed_paths: 0,
});
state.steps = paths_entries
state.steps = new_paths
.into_iter()
.chunks(BATCH_SIZE)
.into_iter()
@ -195,13 +215,13 @@ impl StatefulJob for IndexerJob {
.map(|(i, chunk)| {
let chunk_steps = chunk.collect::<Vec<_>>();
IndexerJobData::on_scan_progress(
ctx.clone(),
&ctx,
vec![
ScanProgress::SavedChunks(i),
ScanProgress::Message(format!(
"Writing {} of {} to db",
i * chunk_steps.len(),
total_entries,
total_paths,
)),
],
);
@ -218,114 +238,19 @@ impl StatefulJob for IndexerJob {
ctx: WorkerContext,
state: &mut JobState<Self>,
) -> Result<(), JobError> {
let Library { sync, db, .. } = &ctx.library;
let location = &state.init.location;
let (sync_stuff, paths): (Vec<_>, Vec<_>) = state.steps[0]
.iter()
.map(|entry| {
let name;
let extension;
// if 'entry.path' is a directory, set extension to an empty string to
// avoid periods in folder names being interpreted as file extensions
if entry.is_dir {
extension = "".to_string();
name = extract_name(entry.path.file_name());
} else {
// if the 'entry.path' is not a directory, then get the extension and name.
extension = extract_name(entry.path.extension()).to_lowercase();
name = extract_name(entry.path.file_stem());
}
let mut materialized_path = entry
.path
.strip_prefix(&location.path)
.unwrap()
.to_str()
.expect("Found non-UTF-8 path")
.to_string();
if entry.is_dir && !materialized_path.ends_with('/') {
materialized_path += "/";
}
use file_path::*;
(
sync.unique_shared_create(
sync::file_path::SyncId {
id: entry.file_id,
location: sync::location::SyncId {
pub_id: state.init.location.pub_id.clone(),
},
},
[
("materialized_path", json!(materialized_path.clone())),
("name", json!(name.clone())),
("is_dir", json!(entry.is_dir)),
("extension", json!(extension.clone())),
("parent_id", json!(entry.parent_id)),
("date_created", json!(entry.created_at)),
],
),
file_path::create_unchecked(
entry.file_id,
location.id,
materialized_path,
name,
extension,
vec![
is_dir::set(entry.is_dir),
parent_id::set(entry.parent_id),
date_created::set(entry.created_at.into()),
],
),
)
execute_indexer_step(&state.init.location, &state.steps[0], ctx)
.await
.map(|indexed_paths| {
state
.data
.as_mut()
.expect("critical error: missing data on job state")
.indexed_paths = indexed_paths;
})
.unzip();
let count = sync
.write_ops(
db,
(
sync_stuff,
db.file_path().create_many(paths).skip_duplicates(),
),
)
.await?;
info!("Inserted {count} records");
Ok(())
}
/// Logs some metadata about the indexer job
async fn finalize(&mut self, _ctx: WorkerContext, state: &mut JobState<Self>) -> JobResult {
let data = state
.data
.as_ref()
.expect("critical error: missing data on job state");
info!(
"scan of {} completed in {:?}. {:?} files found. db write completed in {:?}",
state.init.location.path,
data.scan_read_time,
data.total_paths,
(Utc::now() - data.db_write_start)
.to_std()
.expect("critical error: non-negative duration"),
);
Ok(Some(serde_json::to_value(state)?))
async fn finalize(&mut self, ctx: WorkerContext, state: &mut JobState<Self>) -> JobResult {
finalize_indexer(&state.init.location.path, state, ctx)
}
}
/// Extract name from OsStr returned by PathBuff
fn extract_name(os_string: Option<&OsStr>) -> String {
os_string
.unwrap_or_default()
.to_str()
.unwrap_or_default()
.to_owned()
}

View file

@ -1,15 +1,102 @@
pub mod indexer_job;
pub mod rules;
mod walk;
use crate::{
invalidate_query,
job::{JobError, JobReportUpdate, JobResult, JobState, StatefulJob, WorkerContext},
library::Library,
prisma::file_path,
sync,
};
use globset::Error;
use std::{
hash::{Hash, Hasher},
path::{Path, PathBuf},
time::Duration,
};
use chrono::{DateTime, Utc};
use int_enum::IntEnumError;
use rmp_serde::{decode::Error as RMPDecodeError, encode::Error as RMPEncodeError};
use rmp_serde::{decode, encode};
use rspc::ErrorCode;
use rules::RuleKind;
use serde_json::Error as SerdeJsonError;
use std::io;
use serde::{de::DeserializeOwned, Deserialize, Serialize};
use serde_json::json;
use thiserror::Error;
use tokio::io;
use tracing::info;
use super::{
file_path_helper::{FilePathError, MaterializedPath},
location_with_indexer_rules,
};
pub mod indexer_job;
pub mod rules;
pub mod shallow_indexer_job;
mod walk;
/// `IndexerJobInit` receives a `location::Data` object to be indexed
/// and possibly a `sub_path` to be indexed. The `sub_path` is used when
/// we want do index just a part of a location.
#[derive(Serialize, Deserialize)]
pub struct IndexerJobInit {
pub location: location_with_indexer_rules::Data,
pub sub_path: Option<PathBuf>,
}
impl Hash for IndexerJobInit {
fn hash<H: Hasher>(&self, state: &mut H) {
self.location.id.hash(state);
if let Some(ref sub_path) = self.sub_path {
sub_path.hash(state);
}
}
}
/// `IndexerJobData` contains the state of the indexer job, which includes a `location_path` that
/// is cached and casted on `PathBuf` from `local_path` column in the `location` table. It also
/// contains some metadata for logging purposes.
#[derive(Serialize, Deserialize)]
pub struct IndexerJobData {
db_write_start: DateTime<Utc>,
scan_read_time: Duration,
total_paths: usize,
indexed_paths: i64,
}
/// `IndexerJobStep` is a type alias, specifying that each step of the [`IndexerJob`] is a vector of
/// `IndexerJobStepEntry`. The size of this vector is given by the [`BATCH_SIZE`] constant.
pub type IndexerJobStep = Vec<IndexerJobStepEntry>;
/// `IndexerJobStepEntry` represents a single file to be indexed, given its metadata to be written
/// on the `file_path` table in the database
#[derive(Serialize, Deserialize)]
pub struct IndexerJobStepEntry {
full_path: PathBuf,
materialized_path: MaterializedPath,
created_at: DateTime<Utc>,
file_id: i32,
parent_id: Option<i32>,
}
impl IndexerJobData {
fn on_scan_progress(ctx: &WorkerContext, progress: Vec<ScanProgress>) {
ctx.progress_debounced(
progress
.iter()
.map(|p| match p.clone() {
ScanProgress::ChunkCount(c) => JobReportUpdate::TaskCount(c),
ScanProgress::SavedChunks(p) => JobReportUpdate::CompletedTaskCount(p),
ScanProgress::Message(m) => JobReportUpdate::Message(m),
})
.collect(),
)
}
}
#[derive(Clone)]
pub enum ScanProgress {
ChunkCount(usize),
SavedChunks(usize),
Message(String),
}
/// Error type for the indexer module
#[derive(Error, Debug)]
@ -22,7 +109,7 @@ pub enum IndexerError {
#[error("Invalid indexer rule kind integer: {0}")]
InvalidRuleKindInt(#[from] IntEnumError<RuleKind>),
#[error("Glob builder error: {0}")]
GlobBuilderError(#[from] Error),
GlobBuilderError(#[from] globset::Error),
// Internal Errors
#[error("Database error: {0}")]
@ -30,11 +117,13 @@ pub enum IndexerError {
#[error("I/O error: {0}")]
IOError(#[from] io::Error),
#[error("Indexer rule parameters json serialization error: {0}")]
RuleParametersSerdeJson(#[from] SerdeJsonError),
RuleParametersSerdeJson(#[from] serde_json::Error),
#[error("Indexer rule parameters encode error: {0}")]
RuleParametersRMPEncode(#[from] RMPEncodeError),
RuleParametersRMPEncode(#[from] encode::Error),
#[error("Indexer rule parameters decode error: {0}")]
RuleParametersRMPDecode(#[from] RMPDecodeError),
RuleParametersRMPDecode(#[from] decode::Error),
#[error("File path related error (error: {0})")]
FilePathError(#[from] FilePathError),
}
impl From<IndexerError> for rspc::Error {
@ -52,3 +141,104 @@ impl From<IndexerError> for rspc::Error {
}
}
}
async fn execute_indexer_step(
location: &location_with_indexer_rules::Data,
step: &[IndexerJobStepEntry],
ctx: WorkerContext,
) -> Result<i64, JobError> {
let Library { sync, db, .. } = &ctx.library;
let (sync_stuff, paths): (Vec<_>, Vec<_>) = step
.iter()
.map(|entry| {
let MaterializedPath {
materialized_path,
is_dir,
name,
extension,
..
} = entry.materialized_path.clone();
use file_path::*;
(
sync.unique_shared_create(
sync::file_path::SyncId {
id: entry.file_id,
location: sync::location::SyncId {
pub_id: location.pub_id.clone(),
},
},
[
("materialized_path", json!(materialized_path.clone())),
("name", json!(name.clone())),
("is_dir", json!(is_dir)),
("extension", json!(extension.clone())),
("parent_id", json!(entry.parent_id)),
("date_created", json!(entry.created_at)),
],
),
file_path::create_unchecked(
entry.file_id,
location.id,
materialized_path,
name,
extension,
vec![
is_dir::set(is_dir),
parent_id::set(entry.parent_id),
date_created::set(entry.created_at.into()),
],
),
)
})
.unzip();
let count = sync
.write_ops(
db,
(
sync_stuff,
db.file_path().create_many(paths).skip_duplicates(),
),
)
.await?;
info!("Inserted {count} records");
Ok(count)
}
fn finalize_indexer<SJob, Init>(
location_path: impl AsRef<Path>,
state: &JobState<SJob>,
ctx: WorkerContext,
) -> JobResult
where
SJob: StatefulJob<Init = Init, Data = IndexerJobData, Step = IndexerJobStep>,
Init: Serialize + DeserializeOwned + Send + Sync + Hash,
{
let data = state
.data
.as_ref()
.expect("critical error: missing data on job state");
tracing::info!(
"scan of {} completed in {:?}. {} new files found, \
indexed {} files in db. db write completed in {:?}",
location_path.as_ref().display(),
data.scan_read_time,
data.total_paths,
data.indexed_paths,
(Utc::now() - data.db_write_start)
.to_std()
.expect("critical error: non-negative duration"),
);
if data.indexed_paths > 0 {
invalidate_query!(ctx.library, "locations.getExplorerData");
}
Ok(Some(serde_json::to_value(state)?))
}

View file

@ -29,7 +29,7 @@ pub struct IndexerRuleCreateArgs {
}
impl IndexerRuleCreateArgs {
pub async fn create(self, ctx: &Library) -> Result<indexer_rule::Data, IndexerError> {
pub async fn create(self, library: &Library) -> Result<indexer_rule::Data, IndexerError> {
let parameters = match self.kind {
RuleKind::AcceptFilesByGlob | RuleKind::RejectFilesByGlob => rmp_serde::to_vec(
&Glob::new(&serde_json::from_slice::<String>(&self.parameters)?)?,
@ -41,7 +41,8 @@ impl IndexerRuleCreateArgs {
}
};
ctx.db
library
.db
.indexer_rule()
.create(self.kind as i32, self.name, parameters, vec![])
.exec()

View file

@ -0,0 +1,258 @@
use crate::{
job::{JobError, JobResult, JobState, StatefulJob, WorkerContext},
library::Library,
location::file_path_helper::{
ensure_sub_path_is_directory, ensure_sub_path_is_in_location,
file_path_just_id_materialized_path, find_many_file_paths_by_full_path,
get_existing_file_path_id, MaterializedPath,
},
prisma::location,
};
use std::{
collections::{HashMap, HashSet},
hash::{Hash, Hasher},
path::{Path, PathBuf},
};
use chrono::Utc;
use itertools::Itertools;
use serde::{Deserialize, Serialize};
use tokio::time::Instant;
use tracing::error;
use super::{
execute_indexer_step, finalize_indexer, location_with_indexer_rules,
rules::{IndexerRule, RuleKind},
walk::walk_single_dir,
IndexerError, IndexerJobData, IndexerJobStep, IndexerJobStepEntry, ScanProgress,
};
/// BATCH_SIZE is the number of files to index at each step, writing the chunk of files metadata in the database.
const BATCH_SIZE: usize = 1000;
pub const SHALLOW_INDEXER_JOB_NAME: &str = "shallow_indexer";
/// `ShallowIndexerJobInit` receives a `location::Data` object to be indexed
/// and possibly a `sub_path` to be indexed. The `sub_path` is used when
/// we want do index just a part of a location.
#[derive(Serialize, Deserialize)]
pub struct ShallowIndexerJobInit {
pub location: location_with_indexer_rules::Data,
pub sub_path: PathBuf,
}
impl Hash for ShallowIndexerJobInit {
fn hash<H: Hasher>(&self, state: &mut H) {
self.location.id.hash(state);
self.sub_path.hash(state);
}
}
/// A `ShallowIndexerJob` is a stateful job that indexes all files in a directory, without checking inner directories.
/// First it checks the directory and generates a list of files to index, chunked into
/// batches of [`BATCH_SIZE`]. Then for each chunk it write the file metadata to the database.
pub struct ShallowIndexerJob;
#[async_trait::async_trait]
impl StatefulJob for ShallowIndexerJob {
type Init = ShallowIndexerJobInit;
type Data = IndexerJobData;
type Step = IndexerJobStep;
fn name(&self) -> &'static str {
SHALLOW_INDEXER_JOB_NAME
}
/// Creates a vector of valid path buffers from a directory, chunked into batches of `BATCH_SIZE`.
async fn init(&self, ctx: WorkerContext, state: &mut JobState<Self>) -> Result<(), JobError> {
let Library {
last_file_path_id_manager,
db,
..
} = &ctx.library;
let location_id = state.init.location.id;
let location_path = Path::new(&state.init.location.path);
// grab the next id so we can increment in memory for batch inserting
let first_file_id = last_file_path_id_manager
.get_max_file_path_id(location_id, db)
.await
.map_err(IndexerError::from)?
+ 1;
let mut indexer_rules_by_kind: HashMap<RuleKind, Vec<IndexerRule>> =
HashMap::with_capacity(state.init.location.indexer_rules.len());
for location_rule in &state.init.location.indexer_rules {
let indexer_rule = IndexerRule::try_from(&location_rule.indexer_rule)?;
indexer_rules_by_kind
.entry(indexer_rule.kind)
.or_default()
.push(indexer_rule);
}
let (to_walk_path, parent_id) = if state.init.sub_path != Path::new("") {
let full_path = ensure_sub_path_is_in_location(location_path, &state.init.sub_path)
.await
.map_err(IndexerError::from)?;
ensure_sub_path_is_directory(location_path, &state.init.sub_path)
.await
.map_err(IndexerError::from)?;
(
location_path.join(&state.init.sub_path),
get_existing_file_path_id(
MaterializedPath::new(location_id, location_path, &full_path, true)
.map_err(IndexerError::from)?,
db,
)
.await
.map_err(IndexerError::from)?
.expect("Sub path should already exist in the database"),
)
} else {
(
location_path.to_path_buf(),
get_existing_file_path_id(
MaterializedPath::new(location_id, location_path, location_path, true)
.map_err(IndexerError::from)?,
db,
)
.await
.map_err(IndexerError::from)?
.expect("Location root path should already exist in the database"),
)
};
let scan_start = Instant::now();
let found_paths = walk_single_dir(
to_walk_path,
&indexer_rules_by_kind,
|path, total_entries| {
IndexerJobData::on_scan_progress(
&ctx,
vec![
ScanProgress::Message(format!("Scanning {}", path.display())),
ScanProgress::ChunkCount(total_entries / BATCH_SIZE),
],
);
},
)
.await?;
let already_existing_file_paths = find_many_file_paths_by_full_path(
&location::Data::from(&state.init.location),
&found_paths
.iter()
.map(|entry| &entry.path)
.collect::<Vec<_>>(),
db,
)
.await
.map_err(IndexerError::from)?
.select(file_path_just_id_materialized_path::select())
.exec()
.await?
.into_iter()
.map(|file_path| file_path.materialized_path)
.collect::<HashSet<_>>();
// Filter out paths that are already in the databases
let mut new_paths = found_paths
.into_iter()
.filter_map(|entry| {
MaterializedPath::new(location_id, location_path, &entry.path, entry.is_dir)
.map_or_else(
|e| {
error!("Failed to create materialized path: {e}");
None
},
|materialized_path| {
(!already_existing_file_paths
.contains::<str>(materialized_path.as_ref()))
.then_some(IndexerJobStepEntry {
full_path: entry.path,
materialized_path,
created_at: entry.created_at,
file_id: 0, // To be set later
parent_id: Some(parent_id),
})
},
)
})
// Sadly we have to collect here to be able to check the length so we can set
// the max file path id later
.collect::<Vec<_>>();
let total_paths = new_paths.len();
let last_file_id = first_file_id + total_paths as i32;
// Setting our global state for file_path ids
last_file_path_id_manager
.set_max_file_path_id(location_id, last_file_id)
.await;
new_paths
.iter_mut()
.zip(first_file_id..last_file_id)
.for_each(|(entry, file_id)| {
entry.file_id = file_id;
});
let total_paths = new_paths.len();
state.data = Some(IndexerJobData {
db_write_start: Utc::now(),
scan_read_time: scan_start.elapsed(),
total_paths,
indexed_paths: 0,
});
state.steps = new_paths
.into_iter()
.chunks(BATCH_SIZE)
.into_iter()
.enumerate()
.map(|(i, chunk)| {
let chunk_steps = chunk.collect::<Vec<_>>();
IndexerJobData::on_scan_progress(
&ctx,
vec![
ScanProgress::SavedChunks(i),
ScanProgress::Message(format!(
"Writing {} of {} to db",
i * chunk_steps.len(),
total_paths,
)),
],
);
chunk_steps
})
.collect();
Ok(())
}
/// Process each chunk of entries in the indexer job, writing to the `file_path` table
async fn execute_step(
&self,
ctx: WorkerContext,
state: &mut JobState<Self>,
) -> Result<(), JobError> {
execute_indexer_step(&state.init.location, &state.steps[0], ctx)
.await
.map(|indexed_paths| {
state
.data
.as_mut()
.expect("critical error: missing data on job state")
.indexed_paths = indexed_paths;
})
}
/// Logs some metadata about the indexer job
async fn finalize(&mut self, ctx: WorkerContext, state: &mut JobState<Self>) -> JobResult {
finalize_indexer(&state.init.location.path, state, ctx)
}
}

View file

@ -6,7 +6,7 @@ use std::{
path::{Path, PathBuf},
};
use tokio::fs;
use tracing::{debug, error};
use tracing::{error, trace};
use super::{
rules::{IndexerRule, RuleKind},
@ -48,6 +48,8 @@ impl Ord for WalkEntry {
}
}
type ToWalkEntry = (PathBuf, Option<bool>);
/// This function walks through the filesystem, applying the rules to each entry and then returning
/// a list of accepted entries. There are some useful comments in the implementation of this function
/// in case of doubts.
@ -55,6 +57,7 @@ pub(super) async fn walk(
root: impl AsRef<Path>,
rules_per_kind: &HashMap<RuleKind, Vec<IndexerRule>>,
update_notifier: impl Fn(&Path, usize),
include_root: bool,
) -> Result<Vec<WalkEntry>, IndexerError> {
let root = root.as_ref().to_path_buf();
@ -75,204 +78,265 @@ pub(super) async fn walk(
}
};
// Marking with a loop label here in case of rejection or erros, to continue with next entry
'entries: loop {
let entry = match read_dir.next_entry().await {
Ok(Some(entry)) => entry,
Ok(None) => break,
Err(e) => {
error!(
"Error reading entry in {}: {:#?}",
inner_walk_single_dir(
&root,
(current_path, parent_dir_accepted_by_its_children),
&mut read_dir,
rules_per_kind,
&update_notifier,
&mut indexed_paths,
Some(&mut to_walk),
)
.await?;
}
prepared_indexed_paths(root, indexed_paths, include_root).await
}
async fn inner_walk_single_dir(
root: impl AsRef<Path>,
(current_path, parent_dir_accepted_by_its_children): ToWalkEntry,
read_dir: &mut fs::ReadDir,
rules_per_kind: &HashMap<RuleKind, Vec<IndexerRule>>,
update_notifier: &impl Fn(&Path, usize),
indexed_paths: &mut HashMap<PathBuf, WalkEntry>,
mut maybe_to_walk: Option<&mut VecDeque<(PathBuf, Option<bool>)>>,
) -> Result<(), IndexerError> {
let root = root.as_ref();
// Marking with a loop label here in case of rejection or erros, to continue with next entry
'entries: loop {
let entry = match read_dir.next_entry().await {
Ok(Some(entry)) => entry,
Ok(None) => break,
Err(e) => {
error!(
"Error reading entry in {}: {:#?}",
current_path.display(),
e
);
continue;
}
};
// Accept by children has three states,
// None if we don't now yet or if this check doesn't apply
// Some(true) if this check applies and it passes
// Some(false) if this check applies and it was rejected
// and we pass the current parent state to its children
let mut accept_by_children_dir = parent_dir_accepted_by_its_children;
let current_path = entry.path();
update_notifier(&current_path, indexed_paths.len());
trace!(
"Current filesystem path: {}, accept_by_children_dir: {:#?}",
current_path.display(),
accept_by_children_dir
);
if let Some(reject_rules) = rules_per_kind.get(&RuleKind::RejectFilesByGlob) {
for reject_rule in reject_rules {
// It's ok to unwrap here, reject rules are infallible
if !reject_rule.apply(&current_path).await.unwrap() {
trace!(
"Path {} rejected by rule {}",
current_path.display(),
e
);
continue;
}
};
// Accept by children has three states,
// None if we don't now yet or if this check doesn't apply
// Some(true) if this check applies and it passes
// Some(false) if this check applies and it was rejected
// and we pass the current parent state to its children
let mut accept_by_children_dir = parent_dir_accepted_by_its_children;
let current_path = entry.path();
update_notifier(&current_path, indexed_paths.len());
debug!(
"Current filesystem path: {}, accept_by_children_dir: {:#?}",
current_path.display(),
accept_by_children_dir
);
if let Some(reject_rules) = rules_per_kind.get(&RuleKind::RejectFilesByGlob) {
for reject_rule in reject_rules {
// It's ok to unwrap here, reject rules are infallible
if !reject_rule.apply(&current_path).await.unwrap() {
debug!(
"Path {} rejected by rule {}",
current_path.display(),
reject_rule.name
);
continue 'entries;
}
}
}
let metadata = entry.metadata().await?;
// TODO: Hard ignoring symlinks for now, but this should be configurable
if metadata.is_symlink() {
continue 'entries;
}
let is_dir = metadata.is_dir();
if is_dir {
// If it is a directory, first we check if we must reject it and its children entirely
if let Some(reject_by_children_rules) =
rules_per_kind.get(&RuleKind::RejectIfChildrenDirectoriesArePresent)
{
for reject_by_children_rule in reject_by_children_rules {
match reject_by_children_rule.apply(&current_path).await {
Ok(false) => {
debug!(
"Path {} rejected by rule {}",
current_path.display(),
reject_by_children_rule.name
);
continue 'entries;
}
Ok(true) => {}
Err(e) => {
error!(
"Error applying rule {} to path {}: {:#?}",
reject_by_children_rule.name,
current_path.display(),
e
);
continue 'entries;
}
}
}
}
// Then we check if we must accept it and its children
if let Some(accept_by_children_rules) =
rules_per_kind.get(&RuleKind::AcceptIfChildrenDirectoriesArePresent)
{
for accept_by_children_rule in accept_by_children_rules {
match accept_by_children_rule.apply(&current_path).await {
Ok(true) => {
accept_by_children_dir = Some(true);
break;
}
Ok(false) => {}
Err(e) => {
error!(
"Error applying rule {} to path {}: {:#?}",
accept_by_children_rule.name,
current_path.display(),
e
);
continue 'entries;
}
}
}
// If it wasn't accepted then we mark as rejected
if accept_by_children_dir.is_none() {
debug!(
"Path {} rejected because it didn't passed in any AcceptIfChildrenDirectoriesArePresent rule",
current_path.display()
);
accept_by_children_dir = Some(false);
}
}
// Then we mark this directory the be walked in too
to_walk.push_back((entry.path(), accept_by_children_dir));
}
let mut accept_by_glob = false;
if let Some(accept_rules) = rules_per_kind.get(&RuleKind::AcceptFilesByGlob) {
for accept_rule in accept_rules {
// It's ok to unwrap here, accept rules are infallible
if accept_rule.apply(&current_path).await.unwrap() {
debug!(
"Path {} accepted by rule {}",
current_path.display(),
accept_rule.name
);
accept_by_glob = true;
break;
}
}
if !accept_by_glob {
debug!(
"Path {} reject because it didn't passed in any AcceptFilesByGlob rules",
current_path.display()
reject_rule.name
);
continue 'entries;
}
} else {
// If there are no accept rules, then accept all paths
accept_by_glob = true;
}
}
let metadata = entry.metadata().await?;
// TODO: Hard ignoring symlinks for now, but this should be configurable
if metadata.is_symlink() {
continue 'entries;
}
let is_dir = metadata.is_dir();
if is_dir {
// If it is a directory, first we check if we must reject it and its children entirely
if let Some(reject_by_children_rules) =
rules_per_kind.get(&RuleKind::RejectIfChildrenDirectoriesArePresent)
{
for reject_by_children_rule in reject_by_children_rules {
match reject_by_children_rule.apply(&current_path).await {
Ok(false) => {
trace!(
"Path {} rejected by rule {}",
current_path.display(),
reject_by_children_rule.name
);
continue 'entries;
}
Ok(true) => {}
Err(e) => {
trace!(
"Error applying rule {} to path {}: {:#?}",
reject_by_children_rule.name,
current_path.display(),
e
);
continue 'entries;
}
}
}
}
if accept_by_glob
&& (accept_by_children_dir.is_none() || accept_by_children_dir.unwrap())
// Then we check if we must accept it and its children
if let Some(accept_by_children_rules) =
rules_per_kind.get(&RuleKind::AcceptIfChildrenDirectoriesArePresent)
{
indexed_paths.insert(
current_path.clone(),
WalkEntry {
path: current_path.clone(),
is_dir,
created_at: metadata.created()?.into(),
},
);
// If the ancestors directories wasn't indexed before, now we do
for ancestor in current_path
.ancestors()
.skip(1) // Skip the current directory as it was already indexed
.take_while(|&ancestor| ancestor != root)
{
debug!("Indexing ancestor {}", ancestor.display());
if !indexed_paths.contains_key(ancestor) {
indexed_paths.insert(
ancestor.to_path_buf(),
WalkEntry {
path: ancestor.to_path_buf(),
is_dir: true,
created_at: fs::metadata(ancestor).await?.created()?.into(),
},
);
} else {
// If indexed_paths contains the current ancestors, then it will contain
// also all if its ancestors too, so we can stop here
break;
for accept_by_children_rule in accept_by_children_rules {
match accept_by_children_rule.apply(&current_path).await {
Ok(true) => {
accept_by_children_dir = Some(true);
break;
}
Ok(false) => {}
Err(e) => {
error!(
"Error applying rule {} to path {}: {:#?}",
accept_by_children_rule.name,
current_path.display(),
e
);
continue 'entries;
}
}
}
// If it wasn't accepted then we mark as rejected
if accept_by_children_dir.is_none() {
trace!(
"Path {} rejected because it didn't passed in any AcceptIfChildrenDirectoriesArePresent rule",
current_path.display()
);
accept_by_children_dir = Some(false);
}
}
// Then we mark this directory the be walked in too
if let Some(ref mut to_walk) = maybe_to_walk {
to_walk.push_back((entry.path(), accept_by_children_dir));
}
}
let mut accept_by_glob = false;
if let Some(accept_rules) = rules_per_kind.get(&RuleKind::AcceptFilesByGlob) {
for accept_rule in accept_rules {
// It's ok to unwrap here, accept rules are infallible
if accept_rule.apply(&current_path).await.unwrap() {
trace!(
"Path {} accepted by rule {}",
current_path.display(),
accept_rule.name
);
accept_by_glob = true;
break;
}
}
if !accept_by_glob {
trace!(
"Path {} reject because it didn't passed in any AcceptFilesByGlob rules",
current_path.display()
);
continue 'entries;
}
} else {
// If there are no accept rules, then accept all paths
accept_by_glob = true;
}
if accept_by_glob && (accept_by_children_dir.is_none() || accept_by_children_dir.unwrap()) {
indexed_paths.insert(
current_path.clone(),
WalkEntry {
path: current_path.clone(),
is_dir,
created_at: metadata.created()?.into(),
},
);
// If the ancestors directories wasn't indexed before, now we do
for ancestor in current_path
.ancestors()
.skip(1) // Skip the current directory as it was already indexed
.take_while(|&ancestor| ancestor != root)
{
trace!("Indexing ancestor {}", ancestor.display());
if !indexed_paths.contains_key(ancestor) {
indexed_paths.insert(
ancestor.to_path_buf(),
WalkEntry {
path: ancestor.to_path_buf(),
is_dir: true,
created_at: fs::metadata(ancestor).await?.created()?.into(),
},
);
} else {
// If indexed_paths contains the current ancestors, then it will contain
// also all if its ancestors too, so we can stop here
break;
}
}
}
}
Ok(())
}
async fn prepared_indexed_paths(
root: PathBuf,
indexed_paths: HashMap<PathBuf, WalkEntry>,
include_root: bool,
) -> Result<Vec<WalkEntry>, IndexerError> {
let mut indexed_paths = indexed_paths.into_values().collect::<Vec<_>>();
// Also adding the root location path
let root_created_at = fs::metadata(&root).await?.created()?.into();
indexed_paths.push(WalkEntry {
path: root,
is_dir: true,
created_at: root_created_at,
});
if include_root {
// Also adding the root location path
let root_created_at = fs::metadata(&root).await?.created()?.into();
indexed_paths.push(WalkEntry {
path: root,
is_dir: true,
created_at: root_created_at,
});
}
// Sorting so we can give each path a crescent id given the filesystem hierarchy
indexed_paths.sort();
Ok(indexed_paths)
}
pub(super) async fn walk_single_dir(
root: impl AsRef<Path>,
rules_per_kind: &HashMap<RuleKind, Vec<IndexerRule>>,
update_notifier: impl Fn(&Path, usize),
) -> Result<Vec<WalkEntry>, IndexerError> {
let root = root.as_ref().to_path_buf();
let mut read_dir = fs::read_dir(&root).await?;
let mut indexed_paths = HashMap::new();
inner_walk_single_dir(
&root,
(root.clone(), None),
&mut read_dir,
rules_per_kind,
&update_notifier,
&mut indexed_paths,
None,
)
.await?;
prepared_indexed_paths(root, indexed_paths, false).await
}
#[cfg(test)]
mod tests {
use super::super::rules::ParametersPerKind;
@ -377,7 +441,7 @@ mod tests {
.into_iter()
.collect::<BTreeSet<_>>();
let actual = walk(root_path.to_path_buf(), &HashMap::new(), |_, _| {})
let actual = walk(root_path.to_path_buf(), &HashMap::new(), |_, _| {}, true)
.await
.unwrap()
.into_iter()
@ -416,7 +480,7 @@ mod tests {
.into_iter()
.collect::<HashMap<_, _>>();
let actual = walk(root_path.to_path_buf(), &only_photos_rule, |_, _| {})
let actual = walk(root_path.to_path_buf(), &only_photos_rule, |_, _| {}, true)
.await
.unwrap()
.into_iter()
@ -470,7 +534,7 @@ mod tests {
.into_iter()
.collect::<HashMap<_, _>>();
let actual = walk(root_path.to_path_buf(), &git_repos, |_, _| {})
let actual = walk(root_path.to_path_buf(), &git_repos, |_, _| {}, true)
.await
.unwrap()
.into_iter()
@ -543,6 +607,7 @@ mod tests {
root_path.to_path_buf(),
&git_repos_no_deps_no_build_dirs,
|_, _| {},
true,
)
.await
.unwrap()

View file

@ -2,7 +2,7 @@ use crate::{library::Library, prisma::location};
use std::{
collections::{HashMap, HashSet},
path::{Path, PathBuf},
path::PathBuf,
time::Duration,
};
@ -114,25 +114,6 @@ pub(super) async fn get_location(location_id: i32, library: &Library) -> Option<
})
}
pub(super) fn subtract_location_path(
location_path: impl AsRef<Path>,
current_path: impl AsRef<Path>,
) -> Option<PathBuf> {
let location_path = location_path.as_ref();
let current_path = current_path.as_ref();
if let Ok(stripped) = current_path.strip_prefix(location_path) {
Some(stripped.to_path_buf())
} else {
error!(
"Failed to strip location root path ({}) from current path ({})",
location_path.display(),
current_path.display()
);
None
}
}
pub(super) async fn handle_remove_location_request(
location_id: LocationId,
library: Library,

View file

@ -20,14 +20,14 @@ use tracing::{debug, error};
#[cfg(feature = "location-watcher")]
use tokio::sync::mpsc;
use super::{file_path_helper::FilePathError, LocationId};
#[cfg(feature = "location-watcher")]
mod watcher;
#[cfg(feature = "location-watcher")]
mod helpers;
pub type LocationId = i32;
#[derive(Clone, Copy, Debug)]
#[allow(dead_code)]
enum ManagementMessageAction {
@ -89,12 +89,12 @@ pub enum LocationManagerError {
LocationMissingLocalPath(LocationId),
#[error("Tried to update a non-existing file: <path='{0}'>")]
UpdateNonExistingFile(PathBuf),
#[error("Unable to extract materialized path from location: <id='{0}', path='{1:?}'>")]
UnableToExtractMaterializedPath(LocationId, PathBuf),
#[error("Database error: {0}")]
DatabaseError(#[from] prisma_client_rust::QueryError),
#[error("I/O error: {0}")]
IOError(#[from] io::Error),
#[error("File path related error (error: {0})")]
FilePathError(#[from] FilePathError),
}
type OnlineLocations = BTreeSet<Vec<u8>>;

View file

@ -1,6 +1,6 @@
use crate::{
library::Library,
location::{indexer::indexer_job::indexer_job_location, manager::LocationManagerError},
location::{location_with_indexer_rules, manager::LocationManagerError},
};
use async_trait::async_trait;
@ -26,7 +26,7 @@ impl EventHandler for LinuxEventHandler {
async fn handle_event(
&mut self,
location: indexer_job_location::Data,
location: location_with_indexer_rules::Data,
library: &Library,
event: Event,
) -> Result<(), LocationManagerError> {

View file

@ -1,6 +1,6 @@
use crate::{
library::Library,
location::{indexer::indexer_job::indexer_job_location, manager::LocationManagerError},
location::{location_with_indexer_rules, manager::LocationManagerError},
};
use async_trait::async_trait;
@ -32,7 +32,7 @@ impl EventHandler for MacOsEventHandler {
async fn handle_event(
&mut self,
location: indexer_job_location::Data,
location: location_with_indexer_rules::Data,
library: &Library,
event: Event,
) -> Result<(), LocationManagerError> {

View file

@ -1,6 +1,7 @@
use crate::{
library::Library,
prisma::{file_path, location},
location::{find_location, location_with_indexer_rules, LocationId},
prisma::location,
};
use std::{
@ -18,10 +19,7 @@ use tokio::{
};
use tracing::{debug, error, warn};
use super::{
super::{fetch_location, indexer::indexer_job::indexer_job_location},
LocationId, LocationManagerError,
};
use super::LocationManagerError;
mod linux;
mod macos;
@ -40,8 +38,6 @@ type Handler = macos::MacOsEventHandler;
#[cfg(target_os = "windows")]
type Handler = windows::WindowsEventHandler;
file_path::include!(file_path_with_object { object });
pub(super) type IgnorePath = (PathBuf, bool);
#[async_trait]
@ -52,7 +48,7 @@ trait EventHandler {
async fn handle_event(
&mut self,
location: indexer_job_location::Data,
location: location_with_indexer_rules::Data,
library: &Library,
event: Event,
) -> Result<(), LocationManagerError>;
@ -173,8 +169,8 @@ impl LocationWatcher {
return Ok(());
}
let Some(location) = fetch_location(library, location_id)
.include(indexer_job_location::include())
let Some(location) = find_location(library, location_id)
.include(location_with_indexer_rules::include())
.exec()
.await?
else {

View file

@ -3,12 +3,16 @@ use crate::{
library::Library,
location::{
delete_directory,
file_path_helper::create_file_path,
indexer::indexer_job::indexer_job_location,
manager::{helpers::subtract_location_path, LocationId, LocationManagerError},
file_path_helper::{
extract_materialized_path, file_path_with_object, get_existing_file_or_directory,
get_existing_file_path_with_object, get_parent_dir, MaterializedPath,
},
location_with_indexer_rules,
manager::LocationManagerError,
},
object::{
identifier_job::FileMetadata,
file_identifier::FileMetadata,
object_just_id_has_thumbnail,
preview::{
can_generate_thumbnail_for_image, generate_image_thumbnail, THUMBNAIL_CACHE_DIR_NAME,
},
@ -19,7 +23,6 @@ use crate::{
use std::{
collections::HashSet,
ffi::OsStr,
path::{Path, PathBuf},
str::FromStr,
};
@ -33,8 +36,6 @@ use tokio::{fs, io::ErrorKind};
use tracing::{error, info, trace, warn};
use uuid::Uuid;
use super::file_path_with_object;
pub(super) fn check_event(event: &Event, ignore_paths: &HashSet<PathBuf>) -> bool {
// if path includes .DS_Store, .spacedrive or is in the `ignore_paths` set, we ignore
!event.paths.iter().any(|p| {
@ -47,7 +48,7 @@ pub(super) fn check_event(event: &Event, ignore_paths: &HashSet<PathBuf>) -> boo
}
pub(super) async fn create_dir(
location: &indexer_job_location::Data,
location: &location_with_indexer_rules::Data,
event: &Event,
library: &Library,
) -> Result<(), LocationManagerError> {
@ -61,11 +62,10 @@ pub(super) async fn create_dir(
event.paths[0].display()
);
let Some(subpath) = subtract_location_path(&location.path, &event.paths[0]) else {
return Ok(());
};
let materialized_path =
MaterializedPath::new(location.id, &location.path, &event.paths[0], true)?;
let parent_directory = get_parent_dir(location.id, &subpath, library).await?;
let parent_directory = get_parent_dir(&materialized_path, &library.db).await?;
trace!("parent_directory: {:?}", parent_directory);
@ -74,23 +74,10 @@ pub(super) async fn create_dir(
return Ok(())
};
let created_path = create_file_path(
library,
location.id,
subpath
.to_str()
.map(str::to_string)
.expect("Found non-UTF-8 path"),
subpath
.file_stem()
.and_then(OsStr::to_str)
.map(str::to_string)
.expect("Found non-UTF-8 path"),
"".to_string(),
Some(parent_directory.id),
true,
)
.await?;
let created_path = library
.last_file_path_id_manager
.create_file_path(&library.db, materialized_path, Some(parent_directory.id))
.await?;
info!("Created path: {}", created_path.materialized_path);
@ -100,7 +87,7 @@ pub(super) async fn create_dir(
}
pub(super) async fn create_file(
location: &indexer_job_location::Data,
location: &location_with_indexer_rules::Data,
event: &Event,
library: &Library,
) -> Result<(), LocationManagerError> {
@ -108,44 +95,29 @@ pub(super) async fn create_file(
return Ok(());
}
let full_path = &event.paths[0];
trace!(
"Location: <root_path ='{}'> creating file: {}",
&location.path,
event.paths[0].display()
full_path.display()
);
let db = &library.db;
let Some(materialized_path) = subtract_location_path(&location.path, &event.paths[0]) else { return Ok(()) };
let materialized_path = MaterializedPath::new(location.id, &location.path, full_path, false)?;
let Some(parent_directory) =
get_parent_dir(location.id, &materialized_path, library).await?
get_parent_dir(&materialized_path, &library.db).await?
else {
warn!("Watcher found a path without parent");
return Ok(())
};
let created_file = create_file_path(
library,
location.id,
materialized_path
.to_str()
.expect("Found non-UTF-8 path")
.to_string(),
materialized_path
.file_stem()
.unwrap_or_default()
.to_str()
.expect("Found non-UTF-8 path")
.to_string(),
materialized_path
.extension()
.map(|ext| ext.to_str().expect("Found non-UTF-8 path").to_string())
.unwrap_or_default(),
Some(parent_directory.id),
false,
)
.await?;
let created_file = library
.last_file_path_id_manager
.create_file_path(&library.db, materialized_path, Some(parent_directory.id))
.await?;
info!("Created path: {}", created_file.materialized_path);
@ -164,8 +136,6 @@ pub(super) async fn create_file(
.exec()
.await?;
object::select!(object_id { id has_thumbnail });
let size_str = fs_metadata.len().to_string();
let object = if let Some(object) = existing_object {
@ -179,7 +149,7 @@ pub(super) async fn create_file(
),
],
)
.select(object_id::select())
.select(object_just_id_has_thumbnail::select())
.exec()
.await?
} else {
@ -194,7 +164,7 @@ pub(super) async fn create_file(
object::size_in_bytes::set(size_str.clone()),
],
)
.select(object_id::select())
.select(object_just_id_has_thumbnail::select())
.exec()
.await?
};
@ -218,12 +188,15 @@ pub(super) async fn create_file(
}
pub(super) async fn file_creation_or_update(
location: &indexer_job_location::Data,
location: &location_with_indexer_rules::Data,
event: &Event,
library: &Library,
) -> Result<(), LocationManagerError> {
if let Some(ref file_path) =
get_existing_file_path(location, &event.paths[0], false, library).await?
if let Some(ref file_path) = get_existing_file_path_with_object(
MaterializedPath::new(location.id, &location.path, &event.paths[0], false)?,
&library.db,
)
.await?
{
inner_update_file(location, file_path, event, library).await
} else {
@ -233,13 +206,16 @@ pub(super) async fn file_creation_or_update(
}
pub(super) async fn update_file(
location: &indexer_job_location::Data,
location: &location_with_indexer_rules::Data,
event: &Event,
library: &Library,
) -> Result<(), LocationManagerError> {
if location.node_id == library.node_local_id {
if let Some(ref file_path) =
get_existing_file_path(location, &event.paths[0], false, library).await?
if let Some(ref file_path) = get_existing_file_path_with_object(
MaterializedPath::new(location.id, &location.path, &event.paths[0], false)?,
&library.db,
)
.await?
{
let ret = inner_update_file(location, file_path, event, library).await;
invalidate_query!(library, "locations.getExplorerData");
@ -255,7 +231,7 @@ pub(super) async fn update_file(
}
async fn inner_update_file(
location: &indexer_job_location::Data,
location: &location_with_indexer_rules::Data,
file_path: &file_path_with_object::Data,
event: &Event,
library: &Library,
@ -321,7 +297,7 @@ async fn inner_update_file(
}
pub(super) async fn rename_both_event(
location: &indexer_job_location::Data,
location: &location_with_indexer_rules::Data,
event: &Event,
library: &Library,
) -> Result<(), LocationManagerError> {
@ -331,21 +307,24 @@ pub(super) async fn rename_both_event(
pub(super) async fn rename(
new_path: impl AsRef<Path>,
old_path: impl AsRef<Path>,
location: &indexer_job_location::Data,
location: &location_with_indexer_rules::Data,
library: &Library,
) -> Result<(), LocationManagerError> {
let mut old_path_materialized = extract_materialized_path(location, old_path.as_ref())?
.to_str()
.expect("Found non-UTF-8 path")
.to_string();
let mut old_path_materialized =
extract_materialized_path(location.id, &location.path, old_path.as_ref())?
.to_str()
.expect("Found non-UTF-8 path")
.to_string();
let new_path_materialized = extract_materialized_path(location, new_path.as_ref())?;
let new_path_materialized =
extract_materialized_path(location.id, &location.path, new_path.as_ref())?;
let mut new_path_materialized_str = new_path_materialized
.to_str()
.expect("Found non-UTF-8 path")
.to_string();
if let Some(file_path) = get_existing_file_or_directory(location, old_path, library).await? {
if let Some(file_path) = get_existing_file_or_directory(location, old_path, &library.db).await?
{
// If the renamed path is a directory, we have to update every successor
if file_path.is_dir {
if !old_path_materialized.ends_with('/') {
@ -406,7 +385,7 @@ pub(super) async fn rename(
}
pub(super) async fn remove_event(
location: &indexer_job_location::Data,
location: &location_with_indexer_rules::Data,
event: &Event,
remove_kind: RemoveKind,
library: &Library,
@ -415,7 +394,7 @@ pub(super) async fn remove_event(
// if it doesn't either way, then we don't care
if let Some(file_path) =
get_existing_file_or_directory(location, &event.paths[0], library).await?
get_existing_file_or_directory(location, &event.paths[0], &library.db).await?
{
// check file still exists on disk
match fs::metadata(&event.paths[0]).await {
@ -458,91 +437,6 @@ pub(super) async fn remove_event(
Ok(())
}
fn extract_materialized_path(
location: &indexer_job_location::Data,
path: impl AsRef<Path>,
) -> Result<PathBuf, LocationManagerError> {
subtract_location_path(&location.path, &path).ok_or_else(|| {
LocationManagerError::UnableToExtractMaterializedPath(
location.id,
path.as_ref().to_path_buf(),
)
})
}
async fn get_existing_file_path(
location: &indexer_job_location::Data,
path: impl AsRef<Path>,
is_dir: bool,
library: &Library,
) -> Result<Option<file_path_with_object::Data>, LocationManagerError> {
let mut materialized_path = extract_materialized_path(location, path)?
.to_str()
.expect("Found non-UTF-8 path")
.to_string();
if is_dir && !materialized_path.ends_with('/') {
materialized_path += "/";
}
library
.db
.file_path()
.find_first(vec![file_path::materialized_path::equals(
materialized_path,
)])
// include object for orphan check
.include(file_path_with_object::include())
.exec()
.await
.map_err(Into::into)
}
async fn get_existing_file_or_directory(
location: &indexer_job_location::Data,
path: impl AsRef<Path>,
library: &Library,
) -> Result<Option<file_path_with_object::Data>, LocationManagerError> {
let mut maybe_file_path =
get_existing_file_path(location, path.as_ref(), false, library).await?;
// First we just check if this path was a file in our db, if it isn't then we check for a directory
if maybe_file_path.is_none() {
maybe_file_path = get_existing_file_path(location, path.as_ref(), true, library).await?;
}
Ok(maybe_file_path)
}
async fn get_parent_dir(
location_id: LocationId,
path: impl AsRef<Path>,
library: &Library,
) -> Result<Option<file_path::Data>, LocationManagerError> {
let mut parent_path_str = path
.as_ref()
.parent()
// We have an "/" `materialized_path` for each location_id
.unwrap_or_else(|| Path::new("/"))
.to_str()
.expect("Found non-UTF-8 path")
.to_string();
// As we're looking specifically for a parent directory, it must end with '/'
if !parent_path_str.ends_with('/') {
parent_path_str += "/";
}
library
.db
.file_path()
.find_first(vec![
file_path::location_id::equals(location_id),
file_path::materialized_path::equals(parent_path_str),
])
.exec()
.await
.map_err(Into::into)
}
async fn generate_thumbnail(
extension: &str,
cas_id: &str,

View file

@ -1,6 +1,6 @@
use crate::{
library::Library,
location::{indexer::indexer_job::indexer_job_location, manager::LocationManagerError},
location::{location_with_indexer_rules, manager::LocationManagerError},
};
use async_trait::async_trait;
@ -33,7 +33,7 @@ impl EventHandler for WindowsEventHandler {
async fn handle_event(
&mut self,
location: indexer_job_location::Data,
location: location_with_indexer_rules::Data,
library: &Library,
event: Event,
) -> Result<(), LocationManagerError> {

View file

@ -3,16 +3,19 @@ use crate::{
job::Job,
library::Library,
object::{
identifier_job::full_identifier_job::{FullFileIdentifierJob, FullFileIdentifierJobInit},
preview::{ThumbnailJob, ThumbnailJobInit},
file_identifier::{
file_identifier_job::{FileIdentifierJob, FileIdentifierJobInit},
shallow_file_identifier_job::{ShallowFileIdentifierJob, ShallowFileIdentifierJobInit},
},
preview::{
shallow_thumbnailer_job::{ShallowThumbnailerJob, ShallowThumbnailerJobInit},
thumbnailer_job::{ThumbnailerJob, ThumbnailerJobInit},
},
},
prisma::{file_path, indexer_rules_in_location, location, node, object},
sync,
};
use rspc::Type;
use serde::Deserialize;
use serde_json::json;
use std::{
collections::HashSet,
ffi::OsStr,
@ -20,6 +23,9 @@ use std::{
};
use prisma_client_rust::QueryError;
use rspc::Type;
use serde::Deserialize;
use serde_json::json;
use tokio::{fs, io};
use tracing::{debug, info};
use uuid::Uuid;
@ -31,10 +37,22 @@ mod manager;
mod metadata;
pub use error::LocationError;
use indexer::indexer_job::{indexer_job_location, IndexerJob, IndexerJobInit};
use file_path_helper::file_path_just_object_id;
use indexer::{
indexer_job::IndexerJob,
shallow_indexer_job::{ShallowIndexerJob, ShallowIndexerJobInit},
IndexerJobInit,
};
pub use manager::{LocationManager, LocationManagerError};
use metadata::SpacedriveLocationMetadataFile;
pub type LocationId = i32;
// Location includes!
location::include!(location_with_indexer_rules {
indexer_rules: select { indexer_rule }
});
/// `LocationCreateArgs` is the argument received from the client using `rspc` to create a new location.
/// It has the actual path and a vector of indexer rules ids, to create many-to-many relationships
/// between the location and indexer rules.
@ -45,7 +63,10 @@ pub struct LocationCreateArgs {
}
impl LocationCreateArgs {
pub async fn create(self, ctx: &Library) -> Result<indexer_job_location::Data, LocationError> {
pub async fn create(
self,
library: &Library,
) -> Result<location_with_indexer_rules::Data, LocationError> {
let path_metadata = match fs::metadata(&self.path).await {
Ok(metadata) => metadata,
Err(e) if e.kind() == io::ErrorKind::NotFound => {
@ -67,10 +88,10 @@ impl LocationCreateArgs {
}
if let Some(metadata) = SpacedriveLocationMetadataFile::try_load(&self.path).await? {
return if metadata.has_library(ctx.id) {
return if metadata.has_library(library.id) {
Err(LocationError::NeedRelink {
// SAFETY: This unwrap is ok as we checked that we have this library_id
old_path: metadata.location_path(ctx.id).unwrap().to_path_buf(),
old_path: metadata.location_path(library.id).unwrap().to_path_buf(),
new_path: self.path,
})
} else {
@ -84,18 +105,21 @@ impl LocationCreateArgs {
);
let uuid = Uuid::new_v4();
let location = create_location(ctx, uuid, &self.path, &self.indexer_rules_ids).await?;
let location = create_location(library, uuid, &self.path, &self.indexer_rules_ids).await?;
// Write a location metadata on a .spacedrive file
SpacedriveLocationMetadataFile::create_and_save(
ctx.id,
library.id,
uuid,
&self.path,
location.name.clone(),
)
.await?;
ctx.location_manager().add(location.id, ctx.clone()).await?;
library
.location_manager()
.add(location.id, library.clone())
.await?;
info!("Created location: {location:?}");
@ -104,39 +128,42 @@ impl LocationCreateArgs {
pub async fn add_library(
self,
ctx: &Library,
) -> Result<indexer_job_location::Data, LocationError> {
library: &Library,
) -> Result<location_with_indexer_rules::Data, LocationError> {
let mut metadata = SpacedriveLocationMetadataFile::try_load(&self.path)
.await?
.ok_or_else(|| LocationError::MetadataNotFound(self.path.clone()))?;
if metadata.has_library(ctx.id) {
if metadata.has_library(library.id) {
return Err(LocationError::NeedRelink {
// SAFETY: This unwrap is ok as we checked that we have this library_id
old_path: metadata.location_path(ctx.id).unwrap().to_path_buf(),
old_path: metadata.location_path(library.id).unwrap().to_path_buf(),
new_path: self.path,
});
}
debug!(
"Trying to add a new library (library_id = {}) to an already existing location '{}'",
ctx.id,
library.id,
self.path.display()
);
let uuid = Uuid::new_v4();
let location = create_location(ctx, uuid, &self.path, &self.indexer_rules_ids).await?;
let location = create_location(library, uuid, &self.path, &self.indexer_rules_ids).await?;
metadata
.add_library(ctx.id, uuid, &self.path, location.name.clone())
.add_library(library.id, uuid, &self.path, location.name.clone())
.await?;
ctx.location_manager().add(location.id, ctx.clone()).await?;
library
.location_manager()
.add(location.id, library.clone())
.await?;
info!(
"Added library (library_id = {}) to location: {location:?}",
ctx.id
library.id
);
Ok(location)
@ -160,11 +187,11 @@ pub struct LocationUpdateArgs {
}
impl LocationUpdateArgs {
pub async fn update(self, ctx: &Library) -> Result<(), LocationError> {
let Library { sync, db, .. } = &ctx;
pub async fn update(self, library: &Library) -> Result<(), LocationError> {
let Library { sync, db, .. } = &library;
let location = fetch_location(ctx, self.id)
.include(location::include!({ indexer_rules }))
let location = find_location(library, self.id)
.include(location_with_indexer_rules::include())
.exec()
.await?
.ok_or(LocationError::IdNotFound(self.id))?;
@ -215,11 +242,11 @@ impl LocationUpdateArgs {
)
.await?;
if location.node_id == ctx.node_local_id {
if location.node_id == library.node_local_id {
if let Some(mut metadata) =
SpacedriveLocationMetadataFile::try_load(&location.path).await?
{
metadata.update(ctx.id, self.name.unwrap()).await?;
metadata.update(library.id, self.name.unwrap()).await?;
}
}
}
@ -227,7 +254,7 @@ impl LocationUpdateArgs {
let current_rules_ids = location
.indexer_rules
.iter()
.map(|r| r.indexer_rule_id)
.map(|r| r.indexer_rule.id)
.collect::<HashSet<_>>();
let new_rules_ids = self.indexer_rules_ids.into_iter().collect::<HashSet<_>>();
@ -243,7 +270,8 @@ impl LocationUpdateArgs {
.collect::<Vec<_>>();
if !rule_ids_to_remove.is_empty() {
ctx.db
library
.db
.indexer_rules_in_location()
.delete_many(vec![
indexer_rules_in_location::location_id::equals(self.id),
@ -254,7 +282,7 @@ impl LocationUpdateArgs {
}
if !rule_ids_to_add.is_empty() {
link_location_and_indexer_rules(ctx, self.id, &rule_ids_to_add).await?;
link_location_and_indexer_rules(library, self.id, &rule_ids_to_add).await?;
}
}
@ -262,18 +290,20 @@ impl LocationUpdateArgs {
}
}
pub fn fetch_location(ctx: &Library, location_id: i32) -> location::FindUnique {
ctx.db
pub fn find_location(library: &Library, location_id: i32) -> location::FindUnique {
library
.db
.location()
.find_unique(location::id::equals(location_id))
}
async fn link_location_and_indexer_rules(
ctx: &Library,
library: &Library,
location_id: i32,
rules_ids: &[i32],
) -> Result<(), LocationError> {
ctx.db
library
.db
.indexer_rules_in_location()
.create_many(
rules_ids
@ -288,43 +318,137 @@ async fn link_location_and_indexer_rules(
}
pub async fn scan_location(
ctx: &Library,
location: indexer_job_location::Data,
library: &Library,
location: location_with_indexer_rules::Data,
) -> Result<(), LocationError> {
if location.node_id != ctx.node_local_id {
if location.node_id != library.node_local_id {
return Ok(());
}
ctx.queue_job(Job::new(
FullFileIdentifierJobInit {
location_id: location.id,
sub_path: None,
},
FullFileIdentifierJob {},
))
.await;
library
.queue_job(Job::new(
FileIdentifierJobInit {
location: location::Data::from(&location),
sub_path: None,
},
FileIdentifierJob {},
))
.await;
ctx.queue_job(Job::new(
ThumbnailJobInit {
location_id: location.id,
root_path: PathBuf::new(),
background: true,
},
ThumbnailJob {},
))
.await;
library
.queue_job(Job::new(
ThumbnailerJobInit {
location: location::Data::from(&location),
sub_path: None,
background: true,
},
ThumbnailerJob {},
))
.await;
ctx.spawn_job(Job::new(IndexerJobInit { location }, IndexerJob {}))
library
.spawn_job(Job::new(
IndexerJobInit {
location,
sub_path: None,
},
IndexerJob {},
))
.await;
Ok(())
}
#[allow(dead_code)]
pub async fn scan_location_sub_path(
library: &Library,
location: location_with_indexer_rules::Data,
sub_path: impl AsRef<Path>,
) -> Result<(), LocationError> {
let sub_path = sub_path.as_ref().to_path_buf();
if location.node_id != library.node_local_id {
return Ok(());
}
library
.queue_job(Job::new(
FileIdentifierJobInit {
location: location::Data::from(&location),
sub_path: Some(sub_path.clone()),
},
FileIdentifierJob {},
))
.await;
library
.queue_job(Job::new(
ThumbnailerJobInit {
location: location::Data::from(&location),
sub_path: Some(sub_path.clone()),
background: true,
},
ThumbnailerJob {},
))
.await;
library
.spawn_job(Job::new(
IndexerJobInit {
location,
sub_path: Some(sub_path),
},
IndexerJob {},
))
.await;
Ok(())
}
pub async fn light_scan_location(
library: &Library,
location: location_with_indexer_rules::Data,
sub_path: impl AsRef<Path>,
) -> Result<(), LocationError> {
let sub_path = sub_path.as_ref().to_path_buf();
if location.node_id != library.node_local_id {
return Ok(());
}
library
.queue_job(Job::new(
ShallowFileIdentifierJobInit {
location: location::Data::from(&location),
sub_path: sub_path.clone(),
},
ShallowFileIdentifierJob {},
))
.await;
library
.queue_job(Job::new(
ShallowThumbnailerJobInit {
location: location::Data::from(&location),
sub_path: sub_path.clone(),
},
ShallowThumbnailerJob {},
))
.await;
library
.spawn_job(Job::new(
ShallowIndexerJobInit { location, sub_path },
ShallowIndexerJob {},
))
.await;
Ok(())
}
pub async fn relink_location(
ctx: &Library,
library: &Library,
location_path: impl AsRef<Path>,
) -> Result<(), LocationError> {
let Library { db, id, sync, .. } = &ctx;
let Library { db, id, sync, .. } = &library;
let mut metadata = SpacedriveLocationMetadataFile::try_load(&location_path)
.await?
@ -332,7 +456,7 @@ pub async fn relink_location(
metadata.relink(*id, &location_path).await?;
let pub_id = metadata.location_pub_id(ctx.id)?.as_ref().to_vec();
let pub_id = metadata.location_pub_id(library.id)?.as_ref().to_vec();
let path = location_path
.as_ref()
.to_str()
@ -359,12 +483,12 @@ pub async fn relink_location(
}
async fn create_location(
ctx: &Library,
library: &Library,
location_pub_id: Uuid,
location_path: impl AsRef<Path>,
indexer_rules_ids: &[i32],
) -> Result<indexer_job_location::Data, LocationError> {
let Library { db, sync, .. } = &ctx;
) -> Result<location_with_indexer_rules::Data, LocationError> {
let Library { db, sync, .. } = &library;
let location_path = location_path.as_ref();
@ -387,7 +511,7 @@ async fn create_location(
pub_id: location_pub_id.as_bytes().to_vec(),
},
[
("node", json!({ "pub_id": ctx.id.as_bytes() })),
("node", json!({ "pub_id": library.id.as_bytes() })),
("name", json!(&name)),
("path", json!(&path)),
],
@ -397,39 +521,40 @@ async fn create_location(
location_pub_id.as_bytes().to_vec(),
name,
path,
node::id::equals(ctx.node_local_id),
node::id::equals(library.node_local_id),
vec![],
)
.include(indexer_job_location::include()),
.include(location_with_indexer_rules::include()),
)
.await?;
debug!("created in db");
if !indexer_rules_ids.is_empty() {
link_location_and_indexer_rules(ctx, location.id, indexer_rules_ids).await?;
link_location_and_indexer_rules(library, location.id, indexer_rules_ids).await?;
}
// Updating our location variable to include information about the indexer rules
let location = fetch_location(ctx, location.id)
.include(indexer_job_location::include())
let location = find_location(library, location.id)
.include(location_with_indexer_rules::include())
.exec()
.await?
.ok_or(LocationError::IdNotFound(location.id))?;
invalidate_query!(ctx, "locations.list");
invalidate_query!(library, "locations.list");
Ok(location)
}
pub async fn delete_location(ctx: &Library, location_id: i32) -> Result<(), LocationError> {
let Library { db, .. } = ctx;
pub async fn delete_location(library: &Library, location_id: i32) -> Result<(), LocationError> {
let Library { db, .. } = library;
ctx.location_manager()
.remove(location_id, ctx.clone())
library
.location_manager()
.remove(location_id, library.clone())
.await?;
delete_directory(ctx, location_id, None).await?;
delete_directory(library, location_id, None).await?;
db.indexer_rules_in_location()
.delete_many(vec![indexer_rules_in_location::location_id::equals(
@ -444,26 +569,24 @@ pub async fn delete_location(ctx: &Library, location_id: i32) -> Result<(), Loca
.exec()
.await?;
if location.node_id == ctx.node_local_id {
if location.node_id == library.node_local_id {
if let Ok(Some(mut metadata)) =
SpacedriveLocationMetadataFile::try_load(&location.path).await
{
metadata.remove_library(ctx.id).await?;
metadata.remove_library(library.id).await?;
}
}
info!("Location {} deleted", location_id);
invalidate_query!(ctx, "locations.list");
invalidate_query!(library, "locations.list");
Ok(())
}
file_path::select!(file_path_object_id_only { object_id });
/// Will delete a directory recursively with Objects if left as orphans
/// this function is used to delete a location and when ingesting directory deletion events
pub async fn delete_directory(
ctx: &Library,
library: &Library,
location_id: i32,
parent_materialized_path: Option<String>,
) -> Result<(), QueryError> {
@ -477,11 +600,11 @@ pub async fn delete_directory(
};
// Fetching all object_ids from all children file_paths
let object_ids = ctx
let object_ids = library
.db
.file_path()
.find_many(children_params.clone())
.select(file_path_object_id_only::select())
.select(file_path_just_object_id::select())
.exec()
.await?
.into_iter()
@ -490,14 +613,16 @@ pub async fn delete_directory(
// WARNING: file_paths must be deleted before objects, as they reference objects through object_id
// delete all children file_paths
ctx.db
library
.db
.file_path()
.delete_many(children_params)
.exec()
.await?;
// delete all children objects
ctx.db
library
.db
.object()
.delete_many(vec![
object::id::in_vec(object_ids),
@ -507,11 +632,55 @@ pub async fn delete_directory(
.exec()
.await?;
invalidate_query!(ctx, "locations.getExplorerData");
invalidate_query!(library, "locations.getExplorerData");
Ok(())
}
impl From<location_with_indexer_rules::Data> for location::Data {
fn from(data: location_with_indexer_rules::Data) -> Self {
Self {
id: data.id,
pub_id: data.pub_id,
path: data.path,
node_id: data.node_id,
name: data.name,
total_capacity: data.total_capacity,
available_capacity: data.available_capacity,
is_archived: data.is_archived,
generate_preview_media: data.generate_preview_media,
sync_preview_media: data.sync_preview_media,
hidden: data.hidden,
date_created: data.date_created,
node: None,
file_paths: None,
indexer_rules: None,
}
}
}
impl From<&location_with_indexer_rules::Data> for location::Data {
fn from(data: &location_with_indexer_rules::Data) -> Self {
Self {
id: data.id,
pub_id: data.pub_id.clone(),
path: data.path.clone(),
node_id: data.node_id,
name: data.name.clone(),
total_capacity: data.total_capacity,
available_capacity: data.available_capacity,
is_archived: data.is_archived,
generate_preview_media: data.generate_preview_media,
sync_preview_media: data.sync_preview_media,
hidden: data.hidden,
date_created: data.date_created,
node: None,
file_paths: None,
indexer_rules: None,
}
}
}
// check if a path exists in our database at that location
// pub async fn check_virtual_path_exists(
// library: &Library,

View file

@ -0,0 +1,254 @@
use crate::{
job::{JobError, JobReportUpdate, JobResult, JobState, StatefulJob, WorkerContext},
library::Library,
location::file_path_helper::{
ensure_sub_path_is_directory, ensure_sub_path_is_in_location,
file_path_for_file_identifier, MaterializedPath,
},
prisma::{file_path, location, PrismaClient},
};
use std::{
hash::{Hash, Hasher},
path::{Path, PathBuf},
};
use prisma_client_rust::Direction;
use serde::{Deserialize, Serialize};
use tracing::info;
use super::{
finalize_file_identifier, process_identifier_file_paths, FileIdentifierJobError,
FileIdentifierReport, FilePathIdAndLocationIdCursor, CHUNK_SIZE,
};
pub const FILE_IDENTIFIER_JOB_NAME: &str = "file_identifier";
pub struct FileIdentifierJob {}
/// `FileIdentifierJobInit` takes file_paths without a file_id from an entire location
/// or starting from a `sub_path` (getting every descendent from this `sub_path`
/// and uniquely identifies them:
/// - first: generating the cas_id and extracting metadata
/// - finally: creating unique file records, and linking them to their file_paths
#[derive(Serialize, Deserialize, Clone)]
pub struct FileIdentifierJobInit {
pub location: location::Data,
pub sub_path: Option<PathBuf>, // subpath to start from
}
impl Hash for FileIdentifierJobInit {
fn hash<H: Hasher>(&self, state: &mut H) {
self.location.id.hash(state);
if let Some(ref sub_path) = self.sub_path {
sub_path.hash(state);
}
}
}
#[derive(Serialize, Deserialize)]
pub struct FileIdentifierJobState {
cursor: FilePathIdAndLocationIdCursor,
report: FileIdentifierReport,
maybe_sub_materialized_path: Option<MaterializedPath>,
}
#[async_trait::async_trait]
impl StatefulJob for FileIdentifierJob {
type Init = FileIdentifierJobInit;
type Data = FileIdentifierJobState;
type Step = ();
fn name(&self) -> &'static str {
FILE_IDENTIFIER_JOB_NAME
}
async fn init(&self, ctx: WorkerContext, state: &mut JobState<Self>) -> Result<(), JobError> {
let Library { db, .. } = &ctx.library;
info!("Identifying orphan File Paths...");
let location_id = state.init.location.id;
let location_path = Path::new(&state.init.location.path);
let maybe_sub_materialized_path = if let Some(ref sub_path) = state.init.sub_path {
let full_path = ensure_sub_path_is_in_location(location_path, sub_path)
.await
.map_err(FileIdentifierJobError::from)?;
ensure_sub_path_is_directory(location_path, sub_path)
.await
.map_err(FileIdentifierJobError::from)?;
Some(
MaterializedPath::new(location_id, location_path, &full_path, true)
.map_err(FileIdentifierJobError::from)?,
)
} else {
None
};
let orphan_count =
count_orphan_file_paths(db, location_id, &maybe_sub_materialized_path).await?;
// Initializing `state.data` here because we need a complete state in case of early finish
state.data = Some(FileIdentifierJobState {
report: FileIdentifierReport {
location_path: location_path.to_path_buf(),
total_orphan_paths: orphan_count,
..Default::default()
},
cursor: FilePathIdAndLocationIdCursor {
file_path_id: -1,
location_id,
},
maybe_sub_materialized_path,
});
let data = state.data.as_mut().unwrap(); // SAFETY: We just initialized it
if orphan_count == 0 {
return Err(JobError::EarlyFinish {
name: self.name().to_string(),
reason: "Found no orphan file paths to process".to_string(),
});
}
info!("Found {} orphan file paths", orphan_count);
let task_count = (orphan_count as f64 / CHUNK_SIZE as f64).ceil() as usize;
info!(
"Found {} orphan Paths. Will execute {} tasks...",
orphan_count, task_count
);
// update job with total task count based on orphan file_paths count
ctx.progress(vec![JobReportUpdate::TaskCount(task_count)]);
let first_path_id = db
.file_path()
.find_first(orphan_path_filters(
location_id,
None,
&data.maybe_sub_materialized_path,
))
.order_by(file_path::id::order(Direction::Asc))
.select(file_path::select!({ id }))
.exec()
.await?
.map(|d| d.id)
.unwrap(); // SAFETY: We already validated before that there are orphans `file_path`s
data.cursor.file_path_id = first_path_id;
state.steps = (0..task_count).map(|_| ()).collect();
Ok(())
}
async fn execute_step(
&self,
ctx: WorkerContext,
state: &mut JobState<Self>,
) -> Result<(), JobError> {
let FileIdentifierJobState {
ref mut cursor,
ref mut report,
ref maybe_sub_materialized_path,
} = state
.data
.as_mut()
.expect("Critical error: missing data on job state");
let location = &state.init.location;
// get chunk of orphans to process
let file_paths =
get_orphan_file_paths(&ctx.library.db, cursor, maybe_sub_materialized_path).await?;
process_identifier_file_paths(
self.name(),
location,
&file_paths,
state.step_number,
cursor,
report,
ctx,
)
.await
}
async fn finalize(&mut self, ctx: WorkerContext, state: &mut JobState<Self>) -> JobResult {
finalize_file_identifier(
&state
.data
.as_ref()
.expect("critical error: missing data on job state")
.report,
ctx,
)
}
}
fn orphan_path_filters(
location_id: i32,
file_path_id: Option<i32>,
maybe_sub_materialized_path: &Option<MaterializedPath>,
) -> Vec<file_path::WhereParam> {
let mut params = vec![
file_path::object_id::equals(None),
file_path::is_dir::equals(false),
file_path::location_id::equals(location_id),
];
// this is a workaround for the cursor not working properly
if let Some(file_path_id) = file_path_id {
params.push(file_path::id::gte(file_path_id));
}
if let Some(ref sub_materealized_path) = maybe_sub_materialized_path {
params.push(file_path::materialized_path::starts_with(
sub_materealized_path.into(),
));
}
params
}
async fn count_orphan_file_paths(
db: &PrismaClient,
location_id: i32,
maybe_sub_materialized_path: &Option<MaterializedPath>,
) -> Result<usize, prisma_client_rust::QueryError> {
db.file_path()
.count(orphan_path_filters(
location_id,
None,
maybe_sub_materialized_path,
))
.exec()
.await
.map(|c| c as usize)
}
async fn get_orphan_file_paths(
db: &PrismaClient,
cursor: &FilePathIdAndLocationIdCursor,
maybe_sub_materialized_path: &Option<MaterializedPath>,
) -> Result<Vec<file_path_for_file_identifier::Data>, prisma_client_rust::QueryError> {
info!(
"Querying {} orphan Paths at cursor: {:?}",
CHUNK_SIZE, cursor
);
db.file_path()
.find_many(orphan_path_filters(
cursor.location_id,
Some(cursor.file_path_id),
maybe_sub_materialized_path,
))
.order_by(file_path::id::order(Direction::Asc))
// .cursor(cursor.into())
.take(CHUNK_SIZE as i64)
// .skip(1)
.select(file_path_for_file_identifier::select())
.exec()
.await
}

View file

@ -1,7 +1,9 @@
use crate::{
job::JobError,
invalidate_query,
job::{JobError, JobReportUpdate, JobResult, WorkerContext},
library::Library,
object::cas::generate_cas_id,
location::file_path_helper::{file_path_for_file_identifier, FilePathError},
object::{cas::generate_cas_id, object_for_file_identifier},
prisma::{file_path, location, object, PrismaClient},
sync,
sync::SyncManager,
@ -12,6 +14,7 @@ use sd_sync::CRDTOperation;
use futures::future::join_all;
use int_enum::IntEnum;
use serde::{Deserialize, Serialize};
use serde_json::json;
use std::{
collections::{HashMap, HashSet},
@ -22,17 +25,16 @@ use tokio::{fs, io};
use tracing::{error, info};
use uuid::Uuid;
pub mod full_identifier_job;
pub mod file_identifier_job;
pub mod shallow_file_identifier_job;
// we break these jobs into chunks of 100 to improve performance
const CHUNK_SIZE: usize = 100;
#[derive(Error, Debug)]
pub enum IdentifierJobError {
#[error("Location not found: <id = '{0}'>")]
MissingLocation(i32),
#[error("Root file path not found: <path = '{0}'>")]
MissingRootFilePath(PathBuf),
pub enum FileIdentifierJobError {
#[error("File path related error (error: {0})")]
FilePathError(#[from] FilePathError),
}
#[derive(Debug, Clone)]
@ -75,10 +77,31 @@ impl FileMetadata {
}
}
#[derive(Serialize, Deserialize, Debug)]
struct FilePathIdAndLocationIdCursor {
file_path_id: i32,
location_id: i32,
}
impl From<&FilePathIdAndLocationIdCursor> for file_path::UniqueWhereParam {
fn from(cursor: &FilePathIdAndLocationIdCursor) -> Self {
file_path::location_id_id(cursor.location_id, cursor.file_path_id)
}
}
#[derive(Serialize, Deserialize, Debug, Default)]
pub struct FileIdentifierReport {
location_path: PathBuf,
total_orphan_paths: usize,
total_objects_created: usize,
total_objects_linked: usize,
total_objects_ignored: usize,
}
async fn identifier_job_step(
Library { db, sync, .. }: &Library,
location: &location::Data,
file_paths: &[file_path::Data],
file_paths: &[file_path_for_file_identifier::Data],
) -> Result<(usize, usize), JobError> {
let file_path_metas = join_all(file_paths.iter().map(|file_path| async move {
FileMetadata::new(&location.path, &file_path.materialized_path)
@ -89,7 +112,7 @@ async fn identifier_job_step(
.into_iter()
.flat_map(|data| {
if let Err(e) = &data {
error!("Error assembling Object metadata: {:#?}", e);
error!("Error assembling Object metadata: {e}");
}
data
@ -136,10 +159,7 @@ async fn identifier_job_step(
.find_many(vec![object::file_paths::some(vec![
file_path::cas_id::in_vec(unique_cas_ids),
])])
.select(object::select!({
pub_id
file_paths: select { id cas_id }
}))
.select(object_for_file_identifier::select())
.exec()
.await?;
@ -166,14 +186,16 @@ async fn identifier_job_step(
.map(|o| (*id, o))
})
.map(|(id, object)| {
file_path_object_connect_ops(
let (crdt_op, db_op) = file_path_object_connect_ops(
id,
// SAFETY: This pub_id is generated by the uuid lib, but we have to store bytes in sqlite
Uuid::from_slice(&object.pub_id).unwrap(),
location,
sync,
db,
)
);
(crdt_op, db_op.select(file_path::select!({ id })))
})
.unzip::<_, _, Vec<_>, Vec<_>>(),
)
@ -239,10 +261,12 @@ async fn identifier_job_step(
),
);
(
object_creation_args,
file_path_object_connect_ops(*id, pub_id, location, sync, db),
)
(object_creation_args, {
let (crdt_op, db_op) =
file_path_object_connect_ops(*id, pub_id, location, sync, db);
(crdt_op, db_op.select(file_path::select!({ id })))
})
})
.unzip();
@ -278,18 +302,13 @@ async fn identifier_job_step(
Ok((total_created, updated_file_paths.len()))
}
file_path::select!(file_path_only_id { id });
fn file_path_object_connect_ops<'db>(
file_path_id: i32,
object_id: Uuid,
location: &location::Data,
sync: &SyncManager,
db: &'db PrismaClient,
) -> (
CRDTOperation,
prisma_client_rust::Select<'db, file_path_only_id::Data>,
) {
) -> (CRDTOperation, file_path::Update<'db>) {
info!("Connecting <FilePath id={file_path_id}> to <Object pub_id={object_id}'>");
(
@ -303,13 +322,70 @@ fn file_path_object_connect_ops<'db>(
"object",
json!({ "pub_id": object_id }),
),
db.file_path()
.update(
file_path::location_id_id(location.id, file_path_id),
vec![file_path::object::connect(object::pub_id::equals(
object_id.as_bytes().to_vec(),
))],
)
.select(file_path_only_id::select()),
db.file_path().update(
file_path::location_id_id(location.id, file_path_id),
vec![file_path::object::connect(object::pub_id::equals(
object_id.as_bytes().to_vec(),
))],
),
)
}
async fn process_identifier_file_paths(
job_name: &str,
location: &location::Data,
file_paths: &[file_path_for_file_identifier::Data],
step_number: usize,
cursor: &mut FilePathIdAndLocationIdCursor,
report: &mut FileIdentifierReport,
ctx: WorkerContext,
) -> Result<(), JobError> {
// if no file paths found, abort entire job early, there is nothing to do
// if we hit this error, there is something wrong with the data/query
if file_paths.is_empty() {
return Err(JobError::EarlyFinish {
name: job_name.to_string(),
reason: "Expected orphan Paths not returned from database query for this chunk"
.to_string(),
});
}
info!(
"Processing {:?} orphan Paths. ({} completed of {})",
file_paths.len(),
step_number,
report.total_orphan_paths
);
let (total_objects_created, total_objects_linked) =
identifier_job_step(&ctx.library, location, file_paths).await?;
report.total_objects_created += total_objects_created;
report.total_objects_linked += total_objects_linked;
// set the step data cursor to the last row of this chunk
if let Some(last_row) = file_paths.last() {
cursor.file_path_id = last_row.id;
}
ctx.progress(vec![
JobReportUpdate::CompletedTaskCount(step_number),
JobReportUpdate::Message(format!(
"Processed {} of {} orphan Paths",
step_number * CHUNK_SIZE,
report.total_orphan_paths
)),
]);
Ok(())
}
fn finalize_file_identifier(report: &FileIdentifierReport, ctx: WorkerContext) -> JobResult {
info!("Finalizing identifier job: {report:?}");
if report.total_orphan_paths > 0 {
invalidate_query!(ctx.library, "locations.getExplorerData");
}
Ok(Some(serde_json::to_value(report)?))
}

View file

@ -0,0 +1,246 @@
use crate::{
job::{JobError, JobReportUpdate, JobResult, JobState, StatefulJob, WorkerContext},
library::Library,
location::file_path_helper::{
ensure_sub_path_is_directory, ensure_sub_path_is_in_location,
file_path_for_file_identifier, get_existing_file_path_id, MaterializedPath,
},
prisma::{file_path, location, PrismaClient},
};
use std::{
hash::{Hash, Hasher},
path::{Path, PathBuf},
};
use prisma_client_rust::Direction;
use serde::{Deserialize, Serialize};
use tracing::info;
use super::{
finalize_file_identifier, process_identifier_file_paths, FileIdentifierJobError,
FileIdentifierReport, FilePathIdAndLocationIdCursor, CHUNK_SIZE,
};
pub const SHALLOW_FILE_IDENTIFIER_JOB_NAME: &str = "shallow_file_identifier";
pub struct ShallowFileIdentifierJob {}
/// `ShallowFileIdentifierJobInit` takes file_paths without a file_id from a specific path
/// (just direct children of this path) and uniquely identifies them:
/// - first: generating the cas_id and extracting metadata
/// - finally: creating unique file records, and linking them to their file_paths
#[derive(Serialize, Deserialize, Clone)]
pub struct ShallowFileIdentifierJobInit {
pub location: location::Data,
pub sub_path: PathBuf,
}
impl Hash for ShallowFileIdentifierJobInit {
fn hash<H: Hasher>(&self, state: &mut H) {
self.location.id.hash(state);
self.sub_path.hash(state);
}
}
#[derive(Serialize, Deserialize)]
pub struct ShallowFileIdentifierJobState {
cursor: FilePathIdAndLocationIdCursor,
report: FileIdentifierReport,
sub_path_id: i32,
}
#[async_trait::async_trait]
impl StatefulJob for ShallowFileIdentifierJob {
type Init = ShallowFileIdentifierJobInit;
type Data = ShallowFileIdentifierJobState;
type Step = ();
fn name(&self) -> &'static str {
SHALLOW_FILE_IDENTIFIER_JOB_NAME
}
async fn init(&self, ctx: WorkerContext, state: &mut JobState<Self>) -> Result<(), JobError> {
let Library { db, .. } = &ctx.library;
info!("Identifying orphan File Paths...");
let location_id = state.init.location.id;
let location_path = Path::new(&state.init.location.path);
let sub_path_id = if state.init.sub_path != Path::new("") {
let full_path = ensure_sub_path_is_in_location(location_path, &state.init.sub_path)
.await
.map_err(FileIdentifierJobError::from)?;
ensure_sub_path_is_directory(location_path, &state.init.sub_path)
.await
.map_err(FileIdentifierJobError::from)?;
get_existing_file_path_id(
MaterializedPath::new(location_id, location_path, &full_path, true)
.map_err(FileIdentifierJobError::from)?,
db,
)
.await
.map_err(FileIdentifierJobError::from)?
.expect("Sub path should already exist in the database")
} else {
get_existing_file_path_id(
MaterializedPath::new(location_id, location_path, location_path, true)
.map_err(FileIdentifierJobError::from)?,
db,
)
.await
.map_err(FileIdentifierJobError::from)?
.expect("Location root path should already exist in the database")
};
let orphan_count = count_orphan_file_paths(db, location_id, sub_path_id).await?;
// Initializing `state.data` here because we need a complete state in case of early finish
state.data = Some(ShallowFileIdentifierJobState {
report: FileIdentifierReport {
location_path: location_path.to_path_buf(),
total_orphan_paths: orphan_count,
..Default::default()
},
cursor: FilePathIdAndLocationIdCursor {
file_path_id: -1,
location_id,
},
sub_path_id,
});
if orphan_count == 0 {
return Err(JobError::EarlyFinish {
name: self.name().to_string(),
reason: "Found no orphan file paths to process".to_string(),
});
}
info!("Found {} orphan file paths", orphan_count);
let task_count = (orphan_count as f64 / CHUNK_SIZE as f64).ceil() as usize;
info!(
"Found {} orphan Paths. Will execute {} tasks...",
orphan_count, task_count
);
// update job with total task count based on orphan file_paths count
ctx.progress(vec![JobReportUpdate::TaskCount(task_count)]);
let first_path_id = db
.file_path()
.find_first(orphan_path_filters(location_id, None, sub_path_id))
.order_by(file_path::id::order(Direction::Asc))
.select(file_path::select!({ id }))
.exec()
.await?
.map(|d| d.id)
.unwrap(); // SAFETY: We already validated before that there are orphans `file_path`s
// SAFETY: We just initialized `state.data` above
state.data.as_mut().unwrap().cursor.file_path_id = first_path_id;
state.steps = (0..task_count).map(|_| ()).collect();
Ok(())
}
async fn execute_step(
&self,
ctx: WorkerContext,
state: &mut JobState<Self>,
) -> Result<(), JobError> {
let ShallowFileIdentifierJobState {
ref mut cursor,
ref mut report,
ref sub_path_id,
} = state
.data
.as_mut()
.expect("Critical error: missing data on job state");
let location = &state.init.location;
// get chunk of orphans to process
let file_paths = get_orphan_file_paths(&ctx.library.db, cursor, *sub_path_id).await?;
process_identifier_file_paths(
self.name(),
location,
&file_paths,
state.step_number,
cursor,
report,
ctx,
)
.await
}
async fn finalize(&mut self, ctx: WorkerContext, state: &mut JobState<Self>) -> JobResult {
finalize_file_identifier(
&state
.data
.as_ref()
.expect("critical error: missing data on job state")
.report,
ctx,
)
}
}
fn orphan_path_filters(
location_id: i32,
file_path_id: Option<i32>,
sub_path_id: i32,
) -> Vec<file_path::WhereParam> {
let mut params = vec![
file_path::object_id::equals(None),
file_path::is_dir::equals(false),
file_path::location_id::equals(location_id),
file_path::parent_id::equals(Some(sub_path_id)),
];
// this is a workaround for the cursor not working properly
if let Some(file_path_id) = file_path_id {
params.push(file_path::id::gte(file_path_id));
}
params
}
async fn count_orphan_file_paths(
db: &PrismaClient,
location_id: i32,
sub_path_id: i32,
) -> Result<usize, prisma_client_rust::QueryError> {
db.file_path()
.count(orphan_path_filters(location_id, None, sub_path_id))
.exec()
.await
.map(|c| c as usize)
}
async fn get_orphan_file_paths(
db: &PrismaClient,
cursor: &FilePathIdAndLocationIdCursor,
sub_path_id: i32,
) -> Result<Vec<file_path_for_file_identifier::Data>, prisma_client_rust::QueryError> {
info!(
"Querying {} orphan Paths at cursor: {:?}",
CHUNK_SIZE, cursor
);
db.file_path()
.find_many(orphan_path_filters(
cursor.location_id,
Some(cursor.file_path_id),
sub_path_id,
))
.order_by(file_path::id::order(Direction::Asc))
// .cursor(cursor.into())
.take(CHUNK_SIZE as i64)
// .skip(1)
.select(file_path_for_file_identifier::select())
.exec()
.await
}

View file

@ -1,7 +1,6 @@
pub mod create;
use crate::{
job::JobError,
location::file_path_helper::file_path_with_object,
prisma::{file_path, location, PrismaClient},
};
@ -9,7 +8,7 @@ use std::{ffi::OsStr, path::PathBuf};
use serde::{Deserialize, Serialize};
use super::preview::file_path_with_object;
pub mod create;
pub mod copy;
pub mod cut;

View file

@ -1,234 +0,0 @@
use crate::{
invalidate_query,
job::{JobError, JobReportUpdate, JobResult, JobState, StatefulJob, WorkerContext},
library::Library,
prisma::{file_path, location},
};
use std::path::PathBuf;
use prisma_client_rust::Direction;
use serde::{Deserialize, Serialize};
use tracing::info;
use super::{identifier_job_step, IdentifierJobError, CHUNK_SIZE};
pub const FULL_IDENTIFIER_JOB_NAME: &str = "file_identifier";
pub struct FullFileIdentifierJob {}
// FileIdentifierJobInit takes file_paths without a file_id and uniquely identifies them
// first: generating the cas_id and extracting metadata
// finally: creating unique file records, and linking them to their file_paths
#[derive(Serialize, Deserialize, Clone, Hash)]
pub struct FullFileIdentifierJobInit {
pub location_id: i32,
pub sub_path: Option<PathBuf>, // subpath to start from
}
#[derive(Serialize, Deserialize, Debug)]
struct FilePathIdAndLocationIdCursor {
file_path_id: i32,
location_id: i32,
}
impl From<&FilePathIdAndLocationIdCursor> for file_path::UniqueWhereParam {
fn from(cursor: &FilePathIdAndLocationIdCursor) -> Self {
file_path::location_id_id(cursor.location_id, cursor.file_path_id)
}
}
#[derive(Serialize, Deserialize)]
pub struct FullFileIdentifierJobState {
location: location::Data,
cursor: FilePathIdAndLocationIdCursor,
report: FileIdentifierReport,
}
#[derive(Serialize, Deserialize, Debug, Default)]
pub struct FileIdentifierReport {
location_path: PathBuf,
total_orphan_paths: usize,
total_objects_created: usize,
total_objects_linked: usize,
total_objects_ignored: usize,
}
#[async_trait::async_trait]
impl StatefulJob for FullFileIdentifierJob {
type Init = FullFileIdentifierJobInit;
type Data = FullFileIdentifierJobState;
type Step = ();
fn name(&self) -> &'static str {
FULL_IDENTIFIER_JOB_NAME
}
async fn init(&self, ctx: WorkerContext, state: &mut JobState<Self>) -> Result<(), JobError> {
info!("Identifying orphan File Paths...");
let location_id = state.init.location_id;
let db = &ctx.library.db;
let location = db
.location()
.find_unique(location::id::equals(location_id))
.exec()
.await?
.ok_or(IdentifierJobError::MissingLocation(state.init.location_id))?;
let orphan_count = count_orphan_file_paths(&ctx.library, location_id).await?;
info!("Found {} orphan file paths", orphan_count);
let task_count = (orphan_count as f64 / CHUNK_SIZE as f64).ceil() as usize;
info!(
"Found {} orphan Paths. Will execute {} tasks...",
orphan_count, task_count
);
// update job with total task count based on orphan file_paths count
ctx.progress(vec![JobReportUpdate::TaskCount(task_count)]);
let first_path_id = db
.file_path()
.find_first(orphan_path_filters(location_id, None))
.exec()
.await?
.map(|d| d.id)
.unwrap_or(1);
state.data = Some(FullFileIdentifierJobState {
report: FileIdentifierReport {
location_path: location.path.clone().into(),
total_orphan_paths: orphan_count,
..Default::default()
},
location,
cursor: FilePathIdAndLocationIdCursor {
file_path_id: first_path_id,
location_id: state.init.location_id,
},
});
state.steps = (0..task_count).map(|_| ()).collect();
Ok(())
}
async fn execute_step(
&self,
ctx: WorkerContext,
state: &mut JobState<Self>,
) -> Result<(), JobError> {
let data = state
.data
.as_mut()
.expect("Critical error: missing data on job state");
// get chunk of orphans to process
let file_paths =
get_orphan_file_paths(&ctx.library, &data.cursor, data.location.id).await?;
// if no file paths found, abort entire job early, there is nothing to do
// if we hit this error, there is something wrong with the data/query
if file_paths.is_empty() {
return Err(JobError::EarlyFinish {
name: self.name().to_string(),
reason: "Expected orphan Paths not returned from database query for this chunk"
.to_string(),
});
}
info!(
"Processing {:?} orphan Paths. ({} completed of {})",
file_paths.len(),
state.step_number,
data.report.total_orphan_paths
);
let (total_objects_created, total_objects_linked) =
identifier_job_step(&ctx.library, &data.location, &file_paths).await?;
data.report.total_objects_created += total_objects_created;
data.report.total_objects_linked += total_objects_linked;
// set the step data cursor to the last row of this chunk
if let Some(last_row) = file_paths.last() {
data.cursor.file_path_id = last_row.id;
}
ctx.progress(vec![
JobReportUpdate::CompletedTaskCount(state.step_number),
JobReportUpdate::Message(format!(
"Processed {} of {} orphan Paths",
state.step_number * CHUNK_SIZE,
data.report.total_orphan_paths
)),
]);
invalidate_query!(ctx.library, "locations.getExplorerData");
// let _remaining = count_orphan_file_paths(&ctx.core_ctx, location_id.into()).await?;
Ok(())
}
async fn finalize(&mut self, _ctx: WorkerContext, state: &mut JobState<Self>) -> JobResult {
let data = state
.data
.as_ref()
.expect("critical error: missing data on job state");
info!("Finalizing identifier job: {:#?}", data.report);
Ok(Some(serde_json::to_value(&data.report)?))
}
}
fn orphan_path_filters(location_id: i32, file_path_id: Option<i32>) -> Vec<file_path::WhereParam> {
let mut params = vec![
file_path::object_id::equals(None),
file_path::is_dir::equals(false),
file_path::location_id::equals(location_id),
];
// this is a workaround for the cursor not working properly
if let Some(file_path_id) = file_path_id {
params.push(file_path::id::gte(file_path_id));
}
params
}
async fn count_orphan_file_paths(
ctx: &Library,
location_id: i32,
) -> Result<usize, prisma_client_rust::QueryError> {
Ok(ctx
.db
.file_path()
.count(vec![
file_path::object_id::equals(None),
file_path::is_dir::equals(false),
file_path::location_id::equals(location_id),
])
.exec()
.await? as usize)
}
async fn get_orphan_file_paths(
ctx: &Library,
cursor: &FilePathIdAndLocationIdCursor,
location_id: i32,
) -> Result<Vec<file_path::Data>, prisma_client_rust::QueryError> {
info!(
"Querying {} orphan Paths at cursor: {:?}",
CHUNK_SIZE, cursor
);
ctx.db
.file_path()
.find_many(orphan_path_filters(location_id, Some(cursor.file_path_id)))
.order_by(file_path::id::order(Direction::Asc))
// .cursor(cursor.into())
.take(CHUNK_SIZE as i64)
// .skip(1)
.exec()
.await
}

View file

@ -1,6 +1,11 @@
use crate::prisma::{file_path, object};
use rspc::Type;
use serde::{Deserialize, Serialize};
pub mod cas;
pub mod file_identifier;
pub mod fs;
pub mod identifier_job;
pub mod preview;
pub mod tag;
pub mod validation;
@ -9,10 +14,12 @@ pub mod validation;
// Some Objects are purely virtual, unless they have one or more associated Paths, which refer to a file found in a Location
// Objects are what can be added to Spaces
use rspc::Type;
use serde::{Deserialize, Serialize};
use crate::prisma;
// Object selectables!
object::select!(object_just_id_has_thumbnail { id has_thumbnail });
object::select!(object_for_file_identifier {
pub_id
file_paths: select { id cas_id }
});
// The response to provide the Explorer when looking at Objects
#[derive(Debug, Serialize, Deserialize, Type)]
@ -23,14 +30,14 @@ pub struct ObjectsForExplorer {
// #[derive(Debug, Serialize, Deserialize, Type)]
// pub enum ExplorerContext {
// Location(Box<prisma::file_path::Data>),
// Space(Box<prisma::file::Data>),
// Tag(Box<prisma::file::Data>),
// // Search(Box<prisma::file_path::Data>),
// Location(Box<file_path::Data>),
// Space(Box<space::Data>),
// Tag(Box<tag::Data>),
// // Search(Box<file_path::Data>),
// }
#[derive(Debug, Serialize, Deserialize, Type)]
pub enum ObjectData {
Object(Box<prisma::object::Data>),
Path(Box<prisma::file_path::Data>),
Object(Box<object::Data>),
Path(Box<file_path::Data>),
}

View file

@ -1,5 +1,5 @@
mod media_data;
mod thumb;
mod thumbnail;
pub use media_data::*;
pub use thumb::*;
pub use thumbnail::*;

View file

@ -1,385 +0,0 @@
use crate::{
api::CoreEvent,
invalidate_query,
job::{JobError, JobReportUpdate, JobResult, JobState, StatefulJob, WorkerContext},
library::Library,
prisma::{file_path, location},
};
use std::{
collections::VecDeque,
error::Error,
ops::Deref,
path::{Path, PathBuf},
};
use image::{self, imageops, DynamicImage, GenericImageView};
use sd_file_ext::extensions::{Extension, ImageExtension, VideoExtension};
use serde::{Deserialize, Serialize};
use thiserror::Error;
use tokio::{fs, task::block_in_place};
use tracing::{error, info, trace, warn};
use webp::Encoder;
static THUMBNAIL_SIZE_FACTOR: f32 = 0.2;
static THUMBNAIL_QUALITY: f32 = 30.0;
pub static THUMBNAIL_CACHE_DIR_NAME: &str = "thumbnails";
pub const THUMBNAIL_JOB_NAME: &str = "thumbnailer";
pub struct ThumbnailJob {}
#[derive(Serialize, Deserialize, Clone, Hash)]
pub struct ThumbnailJobInit {
pub location_id: i32,
pub root_path: PathBuf,
pub background: bool,
}
#[derive(Debug, Serialize, Deserialize)]
pub struct ThumbnailJobState {
thumbnail_dir: PathBuf,
root_path: PathBuf,
}
#[derive(Error, Debug)]
pub enum ThumbnailError {
#[error("Location not found: <id = '{0}'>")]
MissingLocation(i32),
#[error("Root file path not found: <path = '{0}'>")]
MissingRootFilePath(PathBuf),
}
file_path::include!(file_path_with_object { object });
#[derive(Debug, Serialize, Deserialize, Clone, Copy)]
enum ThumbnailJobStepKind {
Image,
#[cfg(feature = "ffmpeg")]
Video,
}
#[derive(Debug, Serialize, Deserialize)]
pub struct ThumbnailJobStep {
file_path: file_path_with_object::Data,
object_id: i32,
kind: ThumbnailJobStepKind,
}
#[async_trait::async_trait]
impl StatefulJob for ThumbnailJob {
type Init = ThumbnailJobInit;
type Data = ThumbnailJobState;
type Step = ThumbnailJobStep;
fn name(&self) -> &'static str {
THUMBNAIL_JOB_NAME
}
async fn init(&self, ctx: WorkerContext, state: &mut JobState<Self>) -> Result<(), JobError> {
let Library { db, .. } = &ctx.library;
let thumbnail_dir = ctx
.library
.config()
.data_directory()
.join(THUMBNAIL_CACHE_DIR_NAME);
let location = db
.location()
.find_unique(location::id::equals(state.init.location_id))
.exec()
.await?
.ok_or(ThumbnailError::MissingLocation(state.init.location_id))?;
let root_path_str = state
.init
.root_path
.to_str()
.expect("Found non-UTF-8 path")
.to_string();
let parent_directory_id = db
.file_path()
.find_first(vec![
file_path::location_id::equals(state.init.location_id),
file_path::materialized_path::equals(if !root_path_str.is_empty() {
root_path_str
} else {
"/".to_string()
}),
file_path::is_dir::equals(true),
])
.select(file_path::select!({ id }))
.exec()
.await?
.ok_or_else(|| ThumbnailError::MissingRootFilePath(state.init.root_path.clone()))?
.id;
info!(
"Searching for images in location {} at directory {}",
location.id, parent_directory_id
);
// create all necessary directories if they don't exist
fs::create_dir_all(&thumbnail_dir).await?;
let root_path = location.path.into();
// query database for all image files in this location that need thumbnails
let image_files = get_files_by_extensions(
&ctx.library,
state.init.location_id,
parent_directory_id,
&sd_file_ext::extensions::ALL_IMAGE_EXTENSIONS
.iter()
.map(Clone::clone)
.filter(can_generate_thumbnail_for_image)
.map(Extension::Image)
.collect::<Vec<_>>(),
ThumbnailJobStepKind::Image,
)
.await?;
info!("Found {:?} image files", image_files.len());
#[cfg(feature = "ffmpeg")]
let all_files = {
// query database for all video files in this location that need thumbnails
let video_files = get_files_by_extensions(
&ctx.library,
state.init.location_id,
parent_directory_id,
&sd_file_ext::extensions::ALL_VIDEO_EXTENSIONS
.iter()
.map(Clone::clone)
.filter(can_generate_thumbnail_for_video)
.map(Extension::Video)
.collect::<Vec<_>>(),
ThumbnailJobStepKind::Video,
)
.await?;
info!("Found {:?} video files", video_files.len());
image_files
.into_iter()
.chain(video_files.into_iter())
.collect::<VecDeque<_>>()
};
#[cfg(not(feature = "ffmpeg"))]
let all_files = { image_files.into_iter().collect::<VecDeque<_>>() };
ctx.progress(vec![
JobReportUpdate::TaskCount(all_files.len()),
JobReportUpdate::Message(format!("Preparing to process {} files", all_files.len())),
]);
state.data = Some(ThumbnailJobState {
thumbnail_dir,
root_path,
});
state.steps = all_files;
Ok(())
}
async fn execute_step(
&self,
ctx: WorkerContext,
state: &mut JobState<Self>,
) -> Result<(), JobError> {
let step = &state.steps[0];
ctx.progress(vec![JobReportUpdate::Message(format!(
"Processing {}",
step.file_path.materialized_path
))]);
let data = state
.data
.as_ref()
.expect("critical error: missing data on job state");
// assemble the file path
let path = data.root_path.join(&step.file_path.materialized_path);
trace!("image_file {:?}", step);
// get cas_id, if none found skip
let Some(cas_id) = &step.file_path.cas_id else {
warn!(
"skipping thumbnail generation for {}",
step.file_path.materialized_path
);
return Ok(());
};
// Define and write the WebP-encoded file to a given path
let output_path = data.thumbnail_dir.join(cas_id).with_extension("webp");
// check if file exists at output path
if !output_path.try_exists().unwrap() {
info!("Writing {:?} to {:?}", path, output_path);
match step.kind {
ThumbnailJobStepKind::Image => {
if let Err(e) = generate_image_thumbnail(&path, &output_path).await {
error!("Error generating thumb for image {:#?}", e);
}
}
#[cfg(feature = "ffmpeg")]
ThumbnailJobStepKind::Video => {
// use crate::{
// object::preview::{extract_media_data, StreamKind},
// prisma::media_data,
// };
// use
if let Err(e) = generate_video_thumbnail(&path, &output_path).await {
error!("Error generating thumb for video: {:?} {:#?}", &path, e);
}
// extract MediaData from video and put in the database
// TODO: this is bad here, maybe give it its own job?
// if let Ok(media_data) = extract_media_data(&path) {
// info!(
// "Extracted media data for object {}: {:?}",
// step.object_id, media_data
// );
// // let primary_video_stream = media_data
// // .steams
// // .iter()
// // .find(|s| s.kind == Some(StreamKind::Video(_)));
// let params = vec![
// media_data::duration_seconds::set(Some(media_data.duration_seconds)),
// // media_data::pixel_width::set(Some(media_data.width)),
// // media_data::pixel_height::set(Some(media_data.height)),
// ];
// let _ = ctx
// .library()
// .db
// .media_data()
// .upsert(
// media_data::id::equals(step.object_id),
// params.clone(),
// params,
// )
// .exec()
// .await?;
// }
}
}
if !state.init.background {
ctx.library.emit(CoreEvent::NewThumbnail {
cas_id: cas_id.clone(),
});
};
// With this invalidate query, we update the user interface to show each new thumbnail
invalidate_query!(ctx.library, "locations.getExplorerData");
} else {
info!("Thumb exists, skipping... {}", output_path.display());
}
ctx.progress(vec![JobReportUpdate::CompletedTaskCount(
state.step_number + 1,
)]);
Ok(())
}
async fn finalize(&mut self, _ctx: WorkerContext, state: &mut JobState<Self>) -> JobResult {
let data = state
.data
.as_ref()
.expect("critical error: missing data on job state");
info!(
"Finished thumbnail generation for location {} at {}",
state.init.location_id,
data.root_path.display()
);
// TODO: Serialize and return metadata here
Ok(None)
}
}
pub async fn generate_image_thumbnail<P: AsRef<Path>>(
file_path: P,
output_path: P,
) -> Result<(), Box<dyn Error>> {
// Webp creation has blocking code
let webp = block_in_place(|| -> Result<Vec<u8>, Box<dyn Error>> {
// Using `image` crate, open the included .jpg file
let img = image::open(file_path)?;
let (w, h) = img.dimensions();
// Optionally, resize the existing photo and convert back into DynamicImage
let img = DynamicImage::ImageRgba8(imageops::resize(
&img,
// FIXME : Think of a better heuristic to get the thumbnail size
(w as f32 * THUMBNAIL_SIZE_FACTOR) as u32,
(h as f32 * THUMBNAIL_SIZE_FACTOR) as u32,
imageops::FilterType::Triangle,
));
// Create the WebP encoder for the above image
let encoder = Encoder::from_image(&img)?;
// Encode the image at a specified quality 0-100
// Type WebPMemory is !Send, which makes the Future in this function !Send,
// this make us `deref` to have a `&[u8]` and then `to_owned` to make a Vec<u8>
// which implies on a unwanted clone...
Ok(encoder.encode(THUMBNAIL_QUALITY).deref().to_owned())
})?;
fs::write(output_path, &webp).await.map_err(Into::into)
}
#[cfg(feature = "ffmpeg")]
pub async fn generate_video_thumbnail<P: AsRef<Path>>(
file_path: P,
output_path: P,
) -> Result<(), Box<dyn Error>> {
use sd_ffmpeg::to_thumbnail;
to_thumbnail(file_path, output_path, 256, THUMBNAIL_QUALITY).await?;
Ok(())
}
async fn get_files_by_extensions(
ctx: &Library,
location_id: i32,
_parent_file_path_id: i32,
extensions: &[Extension],
kind: ThumbnailJobStepKind,
) -> Result<Vec<ThumbnailJobStep>, JobError> {
Ok(ctx
.db
.file_path()
.find_many(vec![
file_path::location_id::equals(location_id),
file_path::extension::in_vec(extensions.iter().map(ToString::to_string).collect()),
// file_path::parent_id::equals(Some(parent_file_path_id)),
])
.include(file_path_with_object::include())
.exec()
.await?
.into_iter()
.map(|file_path| ThumbnailJobStep {
object_id: file_path.object.as_ref().unwrap().id,
file_path,
kind,
})
.collect())
}
#[allow(unused)]
pub fn can_generate_thumbnail_for_video(video_extension: &VideoExtension) -> bool {
use VideoExtension::*;
// File extensions that are specifically not supported by the thumbnailer
!matches!(video_extension, Mpg | Swf | M2v | Hevc)
}
#[allow(unused)]
pub fn can_generate_thumbnail_for_image(image_extension: &ImageExtension) -> bool {
use ImageExtension::*;
matches!(image_extension, Jpg | Jpeg | Png | Webp | Gif)
}

View file

@ -0,0 +1,239 @@
use crate::{
api::CoreEvent,
invalidate_query,
job::{JobError, JobReportUpdate, JobResult, WorkerContext},
location::{
file_path_helper::{file_path_just_materialized_path_cas_id, FilePathError},
LocationId,
},
};
use std::{
error::Error,
ops::Deref,
path::{Path, PathBuf},
};
use sd_file_ext::extensions::{Extension, ImageExtension};
#[cfg(feature = "ffmpeg")]
use sd_file_ext::extensions::VideoExtension;
use image::{self, imageops, DynamicImage, GenericImageView};
use once_cell::sync::Lazy;
use serde::{Deserialize, Serialize};
use thiserror::Error;
use tokio::{fs, io, task::block_in_place};
use tracing::{error, info, trace, warn};
use webp::Encoder;
pub mod shallow_thumbnailer_job;
pub mod thumbnailer_job;
static THUMBNAIL_SIZE_FACTOR: f32 = 0.2;
static THUMBNAIL_QUALITY: f32 = 30.0;
pub static THUMBNAIL_CACHE_DIR_NAME: &str = "thumbnails";
#[cfg(feature = "ffmpeg")]
static FILTERED_VIDEO_EXTENSIONS: Lazy<Vec<Extension>> = Lazy::new(|| {
sd_file_ext::extensions::ALL_VIDEO_EXTENSIONS
.iter()
.map(Clone::clone)
.filter(can_generate_thumbnail_for_video)
.map(Extension::Video)
.collect()
});
static FILTERED_IMAGE_EXTENSIONS: Lazy<Vec<Extension>> = Lazy::new(|| {
sd_file_ext::extensions::ALL_IMAGE_EXTENSIONS
.iter()
.map(Clone::clone)
.filter(can_generate_thumbnail_for_image)
.map(Extension::Image)
.collect()
});
#[derive(Debug, Serialize, Deserialize)]
pub struct ThumbnailerJobState {
thumbnail_dir: PathBuf,
location_path: PathBuf,
report: ThumbnailerJobReport,
}
#[derive(Error, Debug)]
pub enum ThumbnailerError {
#[error("File path related error (error: {0})")]
FilePathError(#[from] FilePathError),
#[error("IO error (error: {0})")]
IOError(#[from] io::Error),
}
#[derive(Debug, Serialize, Deserialize)]
pub struct ThumbnailerJobReport {
location_id: LocationId,
materialized_path: String,
thumbnails_created: u32,
}
#[derive(Debug, Serialize, Deserialize, Clone, Copy)]
enum ThumbnailerJobStepKind {
Image,
#[cfg(feature = "ffmpeg")]
Video,
}
#[derive(Debug, Serialize, Deserialize)]
pub struct ThumbnailerJobStep {
file_path: file_path_just_materialized_path_cas_id::Data,
kind: ThumbnailerJobStepKind,
}
pub async fn generate_image_thumbnail<P: AsRef<Path>>(
file_path: P,
output_path: P,
) -> Result<(), Box<dyn Error>> {
// Webp creation has blocking code
let webp = block_in_place(|| -> Result<Vec<u8>, Box<dyn Error>> {
// Using `image` crate, open the included .jpg file
let img = image::open(file_path)?;
let (w, h) = img.dimensions();
// Optionally, resize the existing photo and convert back into DynamicImage
let img = DynamicImage::ImageRgba8(imageops::resize(
&img,
// FIXME : Think of a better heuristic to get the thumbnail size
(w as f32 * THUMBNAIL_SIZE_FACTOR) as u32,
(h as f32 * THUMBNAIL_SIZE_FACTOR) as u32,
imageops::FilterType::Triangle,
));
// Create the WebP encoder for the above image
let encoder = Encoder::from_image(&img)?;
// Encode the image at a specified quality 0-100
// Type WebPMemory is !Send, which makes the Future in this function !Send,
// this make us `deref` to have a `&[u8]` and then `to_owned` to make a Vec<u8>
// which implies on a unwanted clone...
Ok(encoder.encode(THUMBNAIL_QUALITY).deref().to_owned())
})?;
fs::write(output_path, &webp).await.map_err(Into::into)
}
#[cfg(feature = "ffmpeg")]
pub async fn generate_video_thumbnail<P: AsRef<Path>>(
file_path: P,
output_path: P,
) -> Result<(), Box<dyn Error>> {
use sd_ffmpeg::to_thumbnail;
to_thumbnail(file_path, output_path, 256, THUMBNAIL_QUALITY).await?;
Ok(())
}
#[cfg(feature = "ffmpeg")]
pub const fn can_generate_thumbnail_for_video(video_extension: &VideoExtension) -> bool {
use VideoExtension::*;
// File extensions that are specifically not supported by the thumbnailer
!matches!(video_extension, Mpg | Swf | M2v | Hevc)
}
pub const fn can_generate_thumbnail_for_image(image_extension: &ImageExtension) -> bool {
use ImageExtension::*;
matches!(image_extension, Jpg | Jpeg | Png | Webp | Gif)
}
fn finalize_thumbnailer(data: &ThumbnailerJobState, ctx: WorkerContext) -> JobResult {
info!(
"Finished thumbnail generation for location {} at {}",
data.report.location_id,
data.location_path
.join(&data.report.materialized_path)
.display()
);
if data.report.thumbnails_created > 0 {
invalidate_query!(ctx.library, "locations.getExplorerData");
}
Ok(Some(serde_json::to_value(&data.report)?))
}
async fn process_step(
is_background: bool,
step_number: usize,
step: &ThumbnailerJobStep,
data: &mut ThumbnailerJobState,
ctx: WorkerContext,
) -> Result<(), JobError> {
ctx.progress(vec![JobReportUpdate::Message(format!(
"Processing {}",
step.file_path.materialized_path
))]);
let step_result = inner_process_step(is_background, step, data, &ctx).await;
ctx.progress(vec![JobReportUpdate::CompletedTaskCount(step_number + 1)]);
step_result
}
async fn inner_process_step(
is_background: bool,
step: &ThumbnailerJobStep,
data: &mut ThumbnailerJobState,
ctx: &WorkerContext,
) -> Result<(), JobError> {
// assemble the file path
let path = data.location_path.join(&step.file_path.materialized_path);
trace!("image_file {:?}", step);
// get cas_id, if none found skip
let Some(cas_id) = &step.file_path.cas_id else {
warn!(
"skipping thumbnail generation for {}",
step.file_path.materialized_path
);
return Ok(());
};
// Define and write the WebP-encoded file to a given path
let output_path = data.thumbnail_dir.join(format!("{cas_id}.webp"));
match fs::metadata(&output_path).await {
Ok(_) => {
info!("Thumb exists, skipping... {}", output_path.display());
}
Err(e) if e.kind() == io::ErrorKind::NotFound => {
info!("Writing {:?} to {:?}", path, output_path);
match step.kind {
ThumbnailerJobStepKind::Image => {
if let Err(e) = generate_image_thumbnail(&path, &output_path).await {
error!("Error generating thumb for image {:#?}", e);
}
}
#[cfg(feature = "ffmpeg")]
ThumbnailerJobStepKind::Video => {
if let Err(e) = generate_video_thumbnail(&path, &output_path).await {
error!("Error generating thumb for video: {:?} {:#?}", &path, e);
}
}
}
if !is_background {
ctx.library.emit(CoreEvent::NewThumbnail {
cas_id: cas_id.clone(),
});
// With this invalidate query, we update the user interface to show each new thumbnail
invalidate_query!(ctx.library, "locations.getExplorerData");
};
data.report.thumbnails_created += 1;
}
Err(e) => return Err(ThumbnailerError::from(e).into()),
}
Ok(())
}

View file

@ -0,0 +1,211 @@
use crate::{
job::{JobError, JobReportUpdate, JobResult, JobState, StatefulJob, WorkerContext},
library::Library,
location::{
file_path_helper::{
ensure_sub_path_is_directory, ensure_sub_path_is_in_location,
file_path_just_materialized_path_cas_id, get_existing_file_path_id, MaterializedPath,
},
LocationId,
},
prisma::{file_path, location, PrismaClient},
};
use std::{
collections::VecDeque,
hash::Hash,
path::{Path, PathBuf},
};
use sd_file_ext::extensions::Extension;
use serde::{Deserialize, Serialize};
use tokio::fs;
use tracing::info;
use super::{
finalize_thumbnailer, process_step, ThumbnailerError, ThumbnailerJobReport,
ThumbnailerJobState, ThumbnailerJobStep, ThumbnailerJobStepKind, FILTERED_IMAGE_EXTENSIONS,
THUMBNAIL_CACHE_DIR_NAME,
};
#[cfg(feature = "ffmpeg")]
use super::FILTERED_VIDEO_EXTENSIONS;
pub const SHALLOW_THUMBNAILER_JOB_NAME: &str = "shallow_thumbnailer";
pub struct ShallowThumbnailerJob {}
#[derive(Serialize, Deserialize, Clone)]
pub struct ShallowThumbnailerJobInit {
pub location: location::Data,
pub sub_path: PathBuf,
}
impl Hash for ShallowThumbnailerJobInit {
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
self.location.id.hash(state);
self.sub_path.hash(state);
}
}
#[async_trait::async_trait]
impl StatefulJob for ShallowThumbnailerJob {
type Init = ShallowThumbnailerJobInit;
type Data = ThumbnailerJobState;
type Step = ThumbnailerJobStep;
fn name(&self) -> &'static str {
SHALLOW_THUMBNAILER_JOB_NAME
}
async fn init(&self, ctx: WorkerContext, state: &mut JobState<Self>) -> Result<(), JobError> {
let Library { db, .. } = &ctx.library;
let thumbnail_dir = ctx
.library
.config()
.data_directory()
.join(THUMBNAIL_CACHE_DIR_NAME);
let location_id = state.init.location.id;
let location_path = PathBuf::from(&state.init.location.path);
let sub_path_id = if state.init.sub_path != Path::new("") {
let full_path = ensure_sub_path_is_in_location(&location_path, &state.init.sub_path)
.await
.map_err(ThumbnailerError::from)?;
ensure_sub_path_is_directory(&location_path, &state.init.sub_path)
.await
.map_err(ThumbnailerError::from)?;
get_existing_file_path_id(
MaterializedPath::new(location_id, &location_path, &full_path, true)
.map_err(ThumbnailerError::from)?,
db,
)
.await
.map_err(ThumbnailerError::from)?
.expect("Sub path should already exist in the database")
} else {
get_existing_file_path_id(
MaterializedPath::new(location_id, &location_path, &location_path, true)
.map_err(ThumbnailerError::from)?,
db,
)
.await
.map_err(ThumbnailerError::from)?
.expect("Location root path should already exist in the database")
};
info!("Searching for images in location {location_id} at parent directory with id {sub_path_id}");
// create all necessary directories if they don't exist
fs::create_dir_all(&thumbnail_dir).await?;
// query database for all image files in this location that need thumbnails
let image_files = get_files_by_extensions(
db,
location_id,
sub_path_id,
&FILTERED_IMAGE_EXTENSIONS,
ThumbnailerJobStepKind::Image,
)
.await?;
info!("Found {:?} image files", image_files.len());
#[cfg(feature = "ffmpeg")]
let all_files = {
// query database for all video files in this location that need thumbnails
let video_files = get_files_by_extensions(
db,
location_id,
sub_path_id,
&FILTERED_VIDEO_EXTENSIONS,
ThumbnailerJobStepKind::Video,
)
.await?;
info!("Found {:?} video files", video_files.len());
image_files
.into_iter()
.chain(video_files.into_iter())
.collect::<VecDeque<_>>()
};
#[cfg(not(feature = "ffmpeg"))]
let all_files = { image_files.into_iter().collect::<VecDeque<_>>() };
ctx.progress(vec![
JobReportUpdate::TaskCount(all_files.len()),
JobReportUpdate::Message(format!("Preparing to process {} files", all_files.len())),
]);
state.data = Some(ThumbnailerJobState {
thumbnail_dir,
location_path,
report: ThumbnailerJobReport {
location_id,
materialized_path: if state.init.sub_path != Path::new("") {
// SAFETY: We know that the sub_path is a valid UTF-8 string because we validated it before
state.init.sub_path.to_str().unwrap().to_string()
} else {
"".to_string()
},
thumbnails_created: 0,
},
});
state.steps = all_files;
Ok(())
}
async fn execute_step(
&self,
ctx: WorkerContext,
state: &mut JobState<Self>,
) -> Result<(), JobError> {
process_step(
false, // On shallow thumbnailer, we want to show thumbnails ASAP
state.step_number,
&state.steps[0],
state
.data
.as_mut()
.expect("critical error: missing data on job state"),
ctx,
)
.await
}
async fn finalize(&mut self, ctx: WorkerContext, state: &mut JobState<Self>) -> JobResult {
finalize_thumbnailer(
state
.data
.as_ref()
.expect("critical error: missing data on job state"),
ctx,
)
}
}
async fn get_files_by_extensions(
db: &PrismaClient,
location_id: LocationId,
parent_id: i32,
extensions: &[Extension],
kind: ThumbnailerJobStepKind,
) -> Result<Vec<ThumbnailerJobStep>, JobError> {
Ok(db
.file_path()
.find_many(vec![
file_path::location_id::equals(location_id),
file_path::extension::in_vec(extensions.iter().map(ToString::to_string).collect()),
file_path::parent_id::equals(Some(parent_id)),
])
.select(file_path_just_materialized_path_cas_id::select())
.exec()
.await?
.into_iter()
.map(|file_path| ThumbnailerJobStep { file_path, kind })
.collect())
}

View file

@ -0,0 +1,187 @@
use crate::{
job::{JobError, JobReportUpdate, JobResult, JobState, StatefulJob, WorkerContext},
library::Library,
location::file_path_helper::{
ensure_sub_path_is_directory, ensure_sub_path_is_in_location,
file_path_just_materialized_path_cas_id, MaterializedPath,
},
prisma::{file_path, location, PrismaClient},
};
use std::{collections::VecDeque, hash::Hash, path::PathBuf};
use sd_file_ext::extensions::Extension;
use serde::{Deserialize, Serialize};
use tokio::fs;
use tracing::info;
use super::{
finalize_thumbnailer, process_step, ThumbnailerError, ThumbnailerJobReport,
ThumbnailerJobState, ThumbnailerJobStep, ThumbnailerJobStepKind, FILTERED_IMAGE_EXTENSIONS,
THUMBNAIL_CACHE_DIR_NAME,
};
#[cfg(feature = "ffmpeg")]
use super::FILTERED_VIDEO_EXTENSIONS;
pub const THUMBNAILER_JOB_NAME: &str = "thumbnailer";
pub struct ThumbnailerJob {}
#[derive(Serialize, Deserialize, Clone)]
pub struct ThumbnailerJobInit {
pub location: location::Data,
pub sub_path: Option<PathBuf>,
pub background: bool,
}
impl Hash for ThumbnailerJobInit {
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
self.location.id.hash(state);
if let Some(ref sub_path) = self.sub_path {
sub_path.hash(state);
}
}
}
#[async_trait::async_trait]
impl StatefulJob for ThumbnailerJob {
type Init = ThumbnailerJobInit;
type Data = ThumbnailerJobState;
type Step = ThumbnailerJobStep;
fn name(&self) -> &'static str {
THUMBNAILER_JOB_NAME
}
async fn init(&self, ctx: WorkerContext, state: &mut JobState<Self>) -> Result<(), JobError> {
let Library { db, .. } = &ctx.library;
let thumbnail_dir = ctx
.library
.config()
.data_directory()
.join(THUMBNAIL_CACHE_DIR_NAME);
let location_id = state.init.location.id;
let location_path = PathBuf::from(&state.init.location.path);
let materialized_path = if let Some(ref sub_path) = state.init.sub_path {
let full_path = ensure_sub_path_is_in_location(&location_path, sub_path)
.await
.map_err(ThumbnailerError::from)?;
ensure_sub_path_is_directory(&location_path, sub_path)
.await
.map_err(ThumbnailerError::from)?;
MaterializedPath::new(location_id, &location_path, &full_path, true)
.map_err(ThumbnailerError::from)?
} else {
MaterializedPath::new(location_id, &location_path, &location_path, true)
.map_err(ThumbnailerError::from)?
};
info!("Searching for images in location {location_id} at directory {materialized_path}");
// create all necessary directories if they don't exist
fs::create_dir_all(&thumbnail_dir).await?;
// query database for all image files in this location that need thumbnails
let image_files = get_files_by_extensions(
db,
&materialized_path,
&FILTERED_IMAGE_EXTENSIONS,
ThumbnailerJobStepKind::Image,
)
.await?;
info!("Found {:?} image files", image_files.len());
#[cfg(feature = "ffmpeg")]
let all_files = {
// query database for all video files in this location that need thumbnails
let video_files = get_files_by_extensions(
db,
&materialized_path,
&FILTERED_VIDEO_EXTENSIONS,
ThumbnailerJobStepKind::Video,
)
.await?;
info!("Found {:?} video files", video_files.len());
image_files
.into_iter()
.chain(video_files.into_iter())
.collect::<VecDeque<_>>()
};
#[cfg(not(feature = "ffmpeg"))]
let all_files = { image_files.into_iter().collect::<VecDeque<_>>() };
ctx.progress(vec![
JobReportUpdate::TaskCount(all_files.len()),
JobReportUpdate::Message(format!("Preparing to process {} files", all_files.len())),
]);
state.data = Some(ThumbnailerJobState {
thumbnail_dir,
location_path,
report: ThumbnailerJobReport {
location_id,
materialized_path: materialized_path.into(),
thumbnails_created: 0,
},
});
state.steps = all_files;
Ok(())
}
async fn execute_step(
&self,
ctx: WorkerContext,
state: &mut JobState<Self>,
) -> Result<(), JobError> {
process_step(
state.init.background,
state.step_number,
&state.steps[0],
state
.data
.as_mut()
.expect("critical error: missing data on job state"),
ctx,
)
.await
}
async fn finalize(&mut self, ctx: WorkerContext, state: &mut JobState<Self>) -> JobResult {
finalize_thumbnailer(
state
.data
.as_ref()
.expect("critical error: missing data on job state"),
ctx,
)
}
}
async fn get_files_by_extensions(
db: &PrismaClient,
materialized_path: &MaterializedPath,
extensions: &[Extension],
kind: ThumbnailerJobStepKind,
) -> Result<Vec<ThumbnailerJobStep>, JobError> {
Ok(db
.file_path()
.find_many(vec![
file_path::location_id::equals(materialized_path.location_id()),
file_path::extension::in_vec(extensions.iter().map(ToString::to_string).collect()),
file_path::materialized_path::starts_with(materialized_path.into()),
])
.select(file_path_just_materialized_path_cas_id::select())
.exec()
.await?
.into_iter()
.map(|file_path| ThumbnailerJobStep { file_path, kind })
.collect())
}

View file

@ -1,15 +1,15 @@
use serde::{Deserialize, Serialize};
use serde_json::json;
use std::{collections::VecDeque, path::PathBuf};
use crate::{
job::{JobError, JobReportUpdate, JobResult, JobState, StatefulJob, WorkerContext},
library::Library,
location::file_path_helper::file_path_for_object_validator,
prisma::{file_path, location},
sync,
};
use std::{collections::VecDeque, path::PathBuf};
use serde::{Deserialize, Serialize};
use serde_json::json;
use tracing::info;
use super::hash::file_checksum;
@ -36,31 +36,18 @@ pub struct ObjectValidatorJobInit {
pub background: bool,
}
file_path::select!(file_path_and_object {
id
materialized_path
integrity_checksum
location: select {
id
pub_id
}
object: select {
id
}
});
#[async_trait::async_trait]
impl StatefulJob for ObjectValidatorJob {
type Init = ObjectValidatorJobInit;
type Data = ObjectValidatorJobState;
type Step = file_path_and_object::Data;
type Step = file_path_for_object_validator::Data;
fn name(&self) -> &'static str {
VALIDATOR_JOB_NAME
}
async fn init(&self, ctx: WorkerContext, state: &mut JobState<Self>) -> Result<(), JobError> {
let db = &ctx.library.db;
let Library { db, .. } = &ctx.library;
state.steps = db
.file_path()
@ -69,7 +56,7 @@ impl StatefulJob for ObjectValidatorJob {
file_path::is_dir::equals(false),
file_path::integrity_checksum::equals(None),
])
.select(file_path_and_object::select())
.select(file_path_for_object_validator::select())
.exec()
.await?
.into_iter()

View file

@ -82,7 +82,7 @@ impl P2PManager {
events_tx
.send(P2PEvent::DiscoveredPeer {
peer_id: event.peer_id.clone(),
peer_id: event.peer_id,
metadata: event.metadata.clone(),
})
.map_err(|_| error!("Failed to send event to p2p event stream!"))

View file

@ -38,21 +38,22 @@ impl From<VolumeError> for rspc::Error {
}
}
pub async fn save_volume(ctx: &Library) -> Result<(), VolumeError> {
pub async fn save_volume(library: &Library) -> Result<(), VolumeError> {
let volumes = get_volumes()?;
// enter all volumes associate with this client add to db
for volume in volumes {
ctx.db
library
.db
.volume()
.upsert(
node_id_mount_point_name(
ctx.node_local_id,
library.node_local_id,
volume.mount_point.to_string(),
volume.name.to_string(),
),
(
ctx.node_local_id,
library.node_local_id,
volume.name,
volume.mount_point,
vec![

View file

@ -195,7 +195,6 @@ impl From<SecretKey> for SecretKeyString {
c.to_string()
}
})
.into_iter()
.collect();
Self::new(hex_string)

View file

@ -1,6 +1,6 @@
import { useEffect } from 'react';
import { useParams, useSearchParams } from 'react-router-dom';
import { useLibraryQuery } from '@sd/client';
import { useLibraryMutation, useLibraryQuery } from '@sd/client';
import { getExplorerStore } from '~/hooks/useExplorerStore';
import Explorer from '../Explorer';
@ -16,11 +16,15 @@ export function useExplorerParams() {
}
export default () => {
const { location_id, path } = useExplorerParams();
const { location_id, path, limit } = useExplorerParams();
const quickRescan = useLibraryMutation('locations.quickRescan');
const explorerState = getExplorerStore();
useEffect(() => {
getExplorerStore().locationId = location_id;
}, [location_id]);
explorerState.locationId = location_id;
if (location_id !== null) quickRescan.mutate({ location_id, sub_path: path });
}, [location_id, path]);
if (location_id === null) throw new Error(`location_id is null!`);
@ -28,8 +32,8 @@ export default () => {
'locations.getExplorerData',
{
location_id,
path: path,
limit: 100,
path,
limit,
cursor: null
}
]);

View file

@ -36,7 +36,7 @@ export default function EditLocation() {
form.reset({
displayName: data.name,
localPath: data.path,
indexer_rules_ids: data.indexer_rules.map((i) => i.indexer_rule_id.toString()),
indexer_rules_ids: data.indexer_rules.map((i) => i.indexer_rule.id.toString()),
generatePreviewMedia: data.generate_preview_media,
syncPreviewMedia: data.sync_preview_media,
hidden: data.hidden

View file

@ -17,7 +17,7 @@ export type Procedures = {
{ key: "keys.listMounted", input: LibraryArgs<null>, result: string[] } |
{ key: "library.getStatistics", input: LibraryArgs<null>, result: Statistics } |
{ key: "library.list", input: never, result: LibraryConfigWrapped[] } |
{ key: "locations.getById", input: LibraryArgs<number>, result: { id: number, pub_id: number[], node_id: number, name: string, path: string, total_capacity: number | null, available_capacity: number | null, is_archived: boolean, generate_preview_media: boolean, sync_preview_media: boolean, hidden: boolean, date_created: string, indexer_rules: IndexerRulesInLocation[] } | null } |
{ key: "locations.getById", input: LibraryArgs<number>, result: location_with_indexer_rules | null } |
{ key: "locations.getExplorerData", input: LibraryArgs<LocationExplorerArgs>, result: ExplorerData } |
{ key: "locations.indexer_rules.get", input: LibraryArgs<number>, result: IndexerRule } |
{ key: "locations.indexer_rules.list", input: LibraryArgs<null>, result: IndexerRule[] } |
@ -66,7 +66,7 @@ export type Procedures = {
{ key: "locations.fullRescan", input: LibraryArgs<number>, result: null } |
{ key: "locations.indexer_rules.create", input: LibraryArgs<IndexerRuleCreateArgs>, result: IndexerRule } |
{ key: "locations.indexer_rules.delete", input: LibraryArgs<number>, result: null } |
{ key: "locations.quickRescan", input: LibraryArgs<null>, result: null } |
{ key: "locations.quickRescan", input: LibraryArgs<LightScanArgs>, result: null } |
{ key: "locations.relink", input: LibraryArgs<string>, result: null } |
{ key: "locations.update", input: LibraryArgs<LocationUpdateArgs>, result: null } |
{ key: "nodes.tokenizeSensitiveKey", input: TokenizeKeyArgs, result: TokenizeResponse } |
@ -155,8 +155,6 @@ export type IndexerRule = { id: number, kind: number, name: string, parameters:
*/
export type IndexerRuleCreateArgs = { kind: RuleKind, name: string, parameters: number[] }
export type IndexerRulesInLocation = { date_created: string, location_id: number, indexer_rule_id: number }
export type InvalidateOperationEvent = { key: string, arg: any }
export type JobReport = { id: string, name: string, data: number[] | null, metadata: any | null, date_created: string, date_modified: string, status: JobStatus, task_count: number, completed_task_count: number, message: string, seconds_elapsed: number }
@ -177,6 +175,8 @@ export type LibraryConfig = ({ version: string | null }) & { name: string, descr
export type LibraryConfigWrapped = { uuid: string, config: LibraryConfig }
export type LightScanArgs = { location_id: number, sub_path: string }
export type Location = { id: number, pub_id: number[], node_id: number, name: string, path: string, total_capacity: number | null, available_capacity: number | null, is_archived: boolean, generate_preview_media: boolean, sync_preview_media: boolean, hidden: boolean, date_created: string }
/**
@ -296,4 +296,6 @@ export type Volume = { name: string, mount_point: string, total_capacity: string
export type file_path_with_object = { id: number, is_dir: boolean, cas_id: string | null, integrity_checksum: string | null, location_id: number, materialized_path: string, name: string, extension: string, object_id: number | null, parent_id: number | null, key_id: number | null, date_created: string, date_modified: string, date_indexed: string, object: Object | null }
export type location_with_indexer_rules = { id: number, pub_id: number[], node_id: number, name: string, path: string, total_capacity: number | null, available_capacity: number | null, is_archived: boolean, generate_preview_media: boolean, sync_preview_media: boolean, hidden: boolean, date_created: string, indexer_rules: { indexer_rule: IndexerRule }[] }
export type object_with_file_paths = { id: number, pub_id: number[], name: string | null, extension: string | null, kind: number, size_in_bytes: string, key_id: number | null, hidden: boolean, favorite: boolean, important: boolean, has_thumbnail: boolean, has_thumbstrip: boolean, has_video_preview: boolean, ipfs_id: string | null, note: string | null, date_created: string, date_modified: string, date_indexed: string, file_paths: FilePath[] }