mirror of
https://github.com/spacedriveapp/spacedrive
synced 2024-07-02 10:03:28 +00:00
Extend indexer rules with .gitignore
when available (#2459)
* Extend indexer rules with `.gitignore` when available * Make indexer accounts for valid git repos when reading ignore files * Ignore poorly formatted gitignore rules * Reduce allocation in gitignore indexer rules * Fix tests for gitignore indexer rules * Remove code duplication * Remove double call to git rules * Improve doc for todo * rustfmt * Skip star rules that matches a negated pattern when parsing gitignore * Skip conflicting glob rules * Rename and remove print * Make git ignore indexer consider multiple paths * Use gix_ignore to handle gitignore rules * Fix gitignore patterns relative to git repo * Git indexer also consider nested repos * Create a dedicated type for rules coming from git * Consider every gitignore file inside a git repo * Add IgnoredByGit to interface * Add Gitignore system IndexerRule to allow front-end to toggle GitIgnoreRules - Make IndexerRules names unique - CLippy fmt * Fix migration concat --------- Co-authored-by: Vítor Vasconcellos <vasconcellos.dev@gmail.com>
This commit is contained in:
parent
f847b76154
commit
26b6baffb6
82
Cargo.lock
generated
82
Cargo.lock
generated
|
@ -3012,6 +3012,12 @@ version = "0.1.9"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a"
|
||||
|
||||
[[package]]
|
||||
name = "faster-hex"
|
||||
version = "0.9.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a2a2b11eda1d40935b26cf18f6833c526845ae8c41e58d09af6adeb6f0269183"
|
||||
|
||||
[[package]]
|
||||
name = "fastrand"
|
||||
version = "1.9.0"
|
||||
|
@ -3673,6 +3679,73 @@ dependencies = [
|
|||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "gix-features"
|
||||
version = "0.38.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "db4254037d20a247a0367aa79333750146a369719f0c6617fec4f5752cc62b37"
|
||||
dependencies = [
|
||||
"gix-hash",
|
||||
"gix-trace",
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "gix-glob"
|
||||
version = "0.16.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "682bdc43cb3c00dbedfcc366de2a849b582efd8d886215dbad2ea662ec156bb5"
|
||||
dependencies = [
|
||||
"bitflags 2.4.1",
|
||||
"bstr",
|
||||
"gix-features",
|
||||
"gix-path",
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "gix-hash"
|
||||
version = "0.14.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f93d7df7366121b5018f947a04d37f034717e113dcf9ccd85c34b58e57a74d5e"
|
||||
dependencies = [
|
||||
"faster-hex",
|
||||
"thiserror",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "gix-ignore"
|
||||
version = "0.11.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "640dbeb4f5829f9fc14d31f654a34a0350e43a24e32d551ad130d99bf01f63f1"
|
||||
dependencies = [
|
||||
"bstr",
|
||||
"gix-glob",
|
||||
"gix-path",
|
||||
"gix-trace",
|
||||
"serde",
|
||||
"unicode-bom",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "gix-path"
|
||||
version = "0.10.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "23623cf0f475691a6d943f898c4d0b89f5c1a2a64d0f92bce0e0322ee6528783"
|
||||
dependencies = [
|
||||
"bstr",
|
||||
"gix-trace",
|
||||
"home",
|
||||
"once_cell",
|
||||
"thiserror",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "gix-trace"
|
||||
version = "0.1.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f924267408915fddcd558e3f37295cc7d6a3e50f8bd8b606cee0808c3915157e"
|
||||
|
||||
[[package]]
|
||||
name = "glib"
|
||||
version = "0.18.5"
|
||||
|
@ -8983,6 +9056,7 @@ dependencies = [
|
|||
"flate2",
|
||||
"futures",
|
||||
"futures-concurrency",
|
||||
"gix-ignore",
|
||||
"globset",
|
||||
"hostname",
|
||||
"http-body 0.4.6",
|
||||
|
@ -9124,7 +9198,9 @@ version = "0.1.0"
|
|||
dependencies = [
|
||||
"chrono",
|
||||
"futures-concurrency",
|
||||
"gix-ignore",
|
||||
"globset",
|
||||
"once_cell",
|
||||
"prisma-client-rust",
|
||||
"rmp-serde",
|
||||
"rspc",
|
||||
|
@ -11650,6 +11726,12 @@ version = "0.1.0"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "56d12260fb92d52f9008be7e4bca09f584780eb2266dc8fecc6a192bec561694"
|
||||
|
||||
[[package]]
|
||||
name = "unicode-bom"
|
||||
version = "2.0.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7eec5d1121208364f6793f7d2e222bf75a915c19557537745b195b253dd64217"
|
||||
|
||||
[[package]]
|
||||
name = "unicode-ccc"
|
||||
version = "0.1.2"
|
||||
|
|
|
@ -18,7 +18,7 @@ use hyper::server::{accept::Accept, conn::AddrIncoming};
|
|||
use rand::{distributions::Alphanumeric, Rng};
|
||||
use sd_core::{custom_uri, Node, NodeError};
|
||||
use serde::Deserialize;
|
||||
use tauri::{async_runtime::block_on, plugin::TauriPlugin, AppHandle, RunEvent, Runtime};
|
||||
use tauri::{async_runtime::block_on, plugin::TauriPlugin, RunEvent, Runtime};
|
||||
use thiserror::Error;
|
||||
use tokio::{net::TcpListener, task::block_in_place};
|
||||
use tracing::info;
|
||||
|
|
|
@ -129,6 +129,7 @@ slotmap = "1.0.6"
|
|||
sysinfo = "0.29.10"
|
||||
tar = "0.4.40"
|
||||
tower-service = "0.3.2"
|
||||
gix-ignore = "0.11.2"
|
||||
|
||||
# Override features of transitive dependencies
|
||||
[dependencies.openssl]
|
||||
|
|
|
@ -1,7 +1,10 @@
|
|||
use crate::{indexer, Error, NonCriticalError};
|
||||
|
||||
use sd_core_file_path_helper::{FilePathError, FilePathMetadata, IsolatedFilePathData};
|
||||
use sd_core_indexer_rules::{IndexerRuler, MetadataForIndexerRules, RuleKind};
|
||||
use sd_core_indexer_rules::{
|
||||
seed::{GitIgnoreRules, GITIGNORE},
|
||||
IndexerRuler, MetadataForIndexerRules, RuleKind,
|
||||
};
|
||||
use sd_core_prisma_helpers::{file_path_pub_and_cas_ids, file_path_walker};
|
||||
|
||||
use sd_prisma::prisma::file_path;
|
||||
|
@ -540,6 +543,14 @@ where
|
|||
let (to_create, to_update, total_size, to_remove, accepted_ancestors, handles) = loop {
|
||||
match stage {
|
||||
WalkerStage::Start => {
|
||||
if indexer_ruler.has_system(&GITIGNORE).await {
|
||||
if let Some(rules) =
|
||||
GitIgnoreRules::get_rules_if_in_git_repo(root.as_ref(), path).await
|
||||
{
|
||||
indexer_ruler.extend(rules.map(Into::into)).await;
|
||||
}
|
||||
}
|
||||
|
||||
*stage = WalkerStage::Walking {
|
||||
read_dir_stream: ReadDirStream::new(fs::read_dir(&path).await.map_err(
|
||||
|e| {
|
||||
|
@ -1130,7 +1141,7 @@ mod tests {
|
|||
use globset::{Glob, GlobSetBuilder};
|
||||
use lending_stream::{LendingStream, StreamExt};
|
||||
use tempfile::{tempdir, TempDir};
|
||||
use tokio::fs;
|
||||
use tokio::{fs, io::AsyncWriteExt};
|
||||
use tracing::debug;
|
||||
use tracing_test::traced_test;
|
||||
|
||||
|
@ -1188,23 +1199,30 @@ mod tests {
|
|||
// root
|
||||
// |__ rust_project
|
||||
// | |__ .git
|
||||
// | |__ <empty>
|
||||
// | | |__ <empty>
|
||||
// | |__ .gitignore
|
||||
// | |__ ignorable.file
|
||||
// | |__ Cargo.toml
|
||||
// | |__ src
|
||||
// | | |__ main.rs
|
||||
// | |__ target
|
||||
// | |__ debug
|
||||
// | |__ main
|
||||
// |__ inner
|
||||
// | |__ node_project
|
||||
// | |__ .git
|
||||
// | |__ <empty>
|
||||
// | |__ package.json
|
||||
// | |__ src
|
||||
// | | |__ App.tsx
|
||||
// | |__ node_modules
|
||||
// | |__ react
|
||||
// | |__ package.json
|
||||
// | | |__ debug
|
||||
// | | |__ main
|
||||
// | |__ partial
|
||||
// | | |__ ignoreme
|
||||
// | | |__ readme
|
||||
// | |__ inner
|
||||
// | |__ node_project
|
||||
// | |__ .git
|
||||
// | | |__ <empty>
|
||||
// | |__ .gitignore
|
||||
// | |__ ignorable.file
|
||||
// | |__ package.json
|
||||
// | |__ src
|
||||
// | | |__ App.tsx
|
||||
// | |__ node_modules
|
||||
// | |__ react
|
||||
// | |__ package.json
|
||||
// |__ photos
|
||||
// |__ photo1.png
|
||||
// |__ photo2.jpg
|
||||
|
@ -1223,14 +1241,33 @@ mod tests {
|
|||
fs::create_dir(&node_project).await.unwrap();
|
||||
fs::create_dir(&photos).await.unwrap();
|
||||
|
||||
// Inner directory partially ignored by git
|
||||
let partial_dir = rust_project.join("partial");
|
||||
fs::create_dir(&partial_dir).await.unwrap();
|
||||
fs::File::create(partial_dir.join("ignoreme"))
|
||||
.await
|
||||
.unwrap();
|
||||
fs::File::create(partial_dir.join("readme")).await.unwrap();
|
||||
|
||||
// Making rust and node projects a git repository
|
||||
fs::create_dir(rust_project.join(".git")).await.unwrap();
|
||||
let gitignore = rust_project.join(".gitignore");
|
||||
let mut file = fs::File::create(gitignore).await.unwrap();
|
||||
file.write_all(b"*.file\n/target\npartial/ignoreme")
|
||||
.await
|
||||
.unwrap();
|
||||
fs::create_dir(node_project.join(".git")).await.unwrap();
|
||||
let gitignore = node_project.join(".gitignore");
|
||||
let mut file = fs::File::create(gitignore).await.unwrap();
|
||||
file.write_all(b"ignorable.file").await.unwrap();
|
||||
|
||||
// Populating rust project
|
||||
fs::File::create(rust_project.join("Cargo.toml"))
|
||||
.await
|
||||
.unwrap();
|
||||
fs::File::create(rust_project.join("ignorable.file"))
|
||||
.await
|
||||
.unwrap();
|
||||
let rust_src_dir = rust_project.join("src");
|
||||
fs::create_dir(&rust_src_dir).await.unwrap();
|
||||
fs::File::create(rust_src_dir.join("main.rs"))
|
||||
|
@ -1246,6 +1283,9 @@ mod tests {
|
|||
fs::File::create(node_project.join("package.json"))
|
||||
.await
|
||||
.unwrap();
|
||||
fs::File::create(node_project.join("ignorable.file"))
|
||||
.await
|
||||
.unwrap();
|
||||
let node_src_dir = node_project.join("src");
|
||||
fs::create_dir(&node_src_dir).await.unwrap();
|
||||
fs::File::create(node_src_dir.join("App.tsx"))
|
||||
|
@ -1363,15 +1403,16 @@ mod tests {
|
|||
let expected = [
|
||||
WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project"), true), metadata },
|
||||
WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project/.git"), true), metadata },
|
||||
WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project/.gitignore"), false), metadata },
|
||||
WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project/Cargo.toml"), false), metadata },
|
||||
WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project/partial"), true), metadata },
|
||||
WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project/partial/readme"), false), metadata },
|
||||
WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project/src"), true), metadata },
|
||||
WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project/src/main.rs"), false), metadata },
|
||||
WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project/target"), true), metadata },
|
||||
WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project/target/debug"), true), metadata },
|
||||
WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project/target/debug/main"), false), metadata },
|
||||
WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("inner"), true), metadata },
|
||||
WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("inner/node_project"), true), metadata },
|
||||
WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("inner/node_project/.git"), true), metadata },
|
||||
WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("inner/node_project/.gitignore"), false), metadata },
|
||||
WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("inner/node_project/package.json"), false), metadata },
|
||||
WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("inner/node_project/src"), true), metadata },
|
||||
WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("inner/node_project/src/App.tsx"), false), metadata },
|
||||
|
@ -1458,16 +1499,17 @@ mod tests {
|
|||
let expected = [
|
||||
WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project"), true), metadata },
|
||||
WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project/.git"), true), metadata },
|
||||
WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project/.gitignore"), false), metadata },
|
||||
WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project/Cargo.toml"), false), metadata },
|
||||
WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project/src"), true), metadata },
|
||||
WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project/src/main.rs"), false), metadata },
|
||||
WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project/target"), true), metadata },
|
||||
WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project/target/debug"), true), metadata },
|
||||
WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project/target/debug/main"), false), metadata },
|
||||
WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project/partial"), true), metadata },
|
||||
WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project/partial/readme"), false), metadata },
|
||||
WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("inner"), true), metadata },
|
||||
WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("inner/node_project"), true), metadata },
|
||||
WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("inner/node_project/.git"), true), metadata },
|
||||
WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("inner/node_project/package.json"), false), metadata },
|
||||
WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("inner/node_project/.gitignore"), false), metadata },
|
||||
WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("inner/node_project/src"), true), metadata },
|
||||
WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("inner/node_project/src/App.tsx"), false), metadata },
|
||||
WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("inner/node_project/node_modules"), true), metadata },
|
||||
|
@ -1513,12 +1555,16 @@ mod tests {
|
|||
let expected = [
|
||||
WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project"), true), metadata },
|
||||
WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project/.git"), true), metadata },
|
||||
WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project/.gitignore"), false), metadata },
|
||||
WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project/Cargo.toml"), false), metadata },
|
||||
WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project/partial"), true), metadata },
|
||||
WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project/partial/readme"), false), metadata },
|
||||
WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project/src"), true), metadata },
|
||||
WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project/src/main.rs"), false), metadata },
|
||||
WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("inner"), true), metadata },
|
||||
WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("inner/node_project"), true), metadata },
|
||||
WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("inner/node_project/.git"), true), metadata },
|
||||
WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("inner/node_project/.gitignore"), false), metadata },
|
||||
WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("inner/node_project/package.json"), false), metadata },
|
||||
WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("inner/node_project/src"), true), metadata },
|
||||
WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("inner/node_project/src/App.tsx"), false), metadata },
|
||||
|
|
|
@ -19,6 +19,7 @@ sd-utils = { path = "../../../crates/utils" }
|
|||
chrono = { workspace = true }
|
||||
futures-concurrency = { workspace = true }
|
||||
globset = { workspace = true, features = ["serde1"] }
|
||||
once_cell = { workspace = true }
|
||||
prisma-client-rust = { workspace = true }
|
||||
rmp-serde = { workspace = true }
|
||||
rspc = { workspace = true }
|
||||
|
@ -28,6 +29,7 @@ thiserror = { workspace = true }
|
|||
tokio = { workspace = true, features = ["fs"] }
|
||||
tracing = { workspace = true }
|
||||
uuid = { workspace = true, features = ["v4", "serde"] }
|
||||
gix-ignore = { version = "0.11.2", features = ["serde"] }
|
||||
|
||||
[dev-dependencies]
|
||||
tempfile = { workspace = true }
|
||||
|
|
|
@ -32,17 +32,19 @@ use sd_utils::{
|
|||
db::{maybe_missing, MissingFieldError},
|
||||
error::{FileIOError, NonUtf8PathError},
|
||||
};
|
||||
use seed::SystemIndexerRule;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use std::{
|
||||
collections::{HashMap, HashSet},
|
||||
fs::Metadata,
|
||||
path::Path,
|
||||
path::{Path, PathBuf},
|
||||
sync::Arc,
|
||||
};
|
||||
|
||||
use chrono::{DateTime, Utc};
|
||||
use futures_concurrency::future::TryJoin;
|
||||
use gix_ignore::{glob::pattern::Case, Search};
|
||||
use globset::{Glob, GlobSet, GlobSetBuilder};
|
||||
use rmp_serde::{decode, encode};
|
||||
use rspc::ErrorCode;
|
||||
|
@ -149,6 +151,9 @@ impl IndexerRuleCreateArgs {
|
|||
parameters.into_iter().collect(),
|
||||
))
|
||||
}
|
||||
RuleKind::IgnoredByGit => {
|
||||
Ok(RulePerKind::IgnoredByGit(PathBuf::new(), Search::default()))
|
||||
}
|
||||
})
|
||||
.collect::<Result<Vec<_>, _>>()?,
|
||||
)?;
|
||||
|
@ -184,13 +189,14 @@ pub enum RuleKind {
|
|||
RejectFilesByGlob = 1,
|
||||
AcceptIfChildrenDirectoriesArePresent = 2,
|
||||
RejectIfChildrenDirectoriesArePresent = 3,
|
||||
IgnoredByGit = 4,
|
||||
}
|
||||
|
||||
impl RuleKind {
|
||||
#[must_use]
|
||||
pub const fn variant_count() -> usize {
|
||||
// TODO: Use https://doc.rust-lang.org/std/mem/fn.variant_count.html if it ever gets stabilized
|
||||
4
|
||||
5
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -202,7 +208,7 @@ impl RuleKind {
|
|||
/// In case of `ParametersPerKind::AcceptIfChildrenDirectoriesArePresent` or
|
||||
/// `ParametersPerKind::RejectIfChildrenDirectoriesArePresent`
|
||||
/// first we change the data structure to a vector, then we serialize it.
|
||||
#[derive(Debug)]
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum RulePerKind {
|
||||
// TODO: Add an indexer rule that filter files based on their extended attributes
|
||||
// https://learn.microsoft.com/en-us/windows/win32/fileio/file-attribute-constants
|
||||
|
@ -211,6 +217,7 @@ pub enum RulePerKind {
|
|||
RejectFilesByGlob(Vec<Glob>, GlobSet),
|
||||
AcceptIfChildrenDirectoriesArePresent(HashSet<String>),
|
||||
RejectIfChildrenDirectoriesArePresent(HashSet<String>),
|
||||
IgnoredByGit(PathBuf, Search),
|
||||
}
|
||||
|
||||
impl RulePerKind {
|
||||
|
@ -285,6 +292,10 @@ impl RulePerKind {
|
|||
RuleKind::RejectFilesByGlob,
|
||||
reject_by_glob(source, reject_glob_set),
|
||||
)),
|
||||
Self::IgnoredByGit(git_repo, patterns) => Ok((
|
||||
RuleKind::IgnoredByGit,
|
||||
accept_by_gitpattern(source.as_ref(), git_repo, patterns),
|
||||
)),
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -313,11 +324,29 @@ impl RulePerKind {
|
|||
RuleKind::RejectFilesByGlob,
|
||||
reject_by_glob(source, reject_glob_set),
|
||||
)),
|
||||
Self::IgnoredByGit(base_dir, patterns) => Ok((
|
||||
RuleKind::IgnoredByGit,
|
||||
accept_by_gitpattern(source.as_ref(), base_dir, patterns),
|
||||
)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
fn accept_by_gitpattern(source: &Path, base_dir: &Path, search: &Search) -> bool {
|
||||
let relative = source
|
||||
.strip_prefix(base_dir)
|
||||
.expect("`base_dir` should be our git repo, and `source` should be inside of it");
|
||||
|
||||
let Some(src) = relative.to_str().map(|s| s.as_bytes().into()) else {
|
||||
return false;
|
||||
};
|
||||
|
||||
search
|
||||
.pattern_matching_relative_path(src, Some(source.is_dir()), Case::Fold)
|
||||
.map_or(true, |rule| rule.pattern.is_negative())
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
pub struct IndexerRule {
|
||||
pub id: Option<i32>,
|
||||
pub name: String,
|
||||
|
@ -387,7 +416,6 @@ impl IndexerRule {
|
|||
|
||||
#[derive(Debug, Clone, Default)]
|
||||
pub struct IndexerRuler {
|
||||
// TODO(fogodev): Use this RwLock later to acquire new rules while applying rules, like from a .gitignore file
|
||||
rules: Arc<RwLock<Vec<IndexerRule>>>,
|
||||
}
|
||||
|
||||
|
@ -436,6 +464,22 @@ impl IndexerRuler {
|
|||
|
||||
inner(&self.rules.read().await, source.as_ref(), metadata).await
|
||||
}
|
||||
|
||||
/// Extend the indexer rules with the contents from an iterator of rules
|
||||
pub async fn extend(&self, iter: impl IntoIterator<Item = IndexerRule> + Send) {
|
||||
let mut indexer = self.rules.write().await;
|
||||
indexer.extend(iter);
|
||||
}
|
||||
|
||||
pub async fn has_system(&self, rule: &SystemIndexerRule) -> bool {
|
||||
let rules = self.rules.read().await;
|
||||
|
||||
rules
|
||||
.iter()
|
||||
.map(|rule| (rule.id, rule.name.clone()))
|
||||
.collect::<Vec<(Option<i32>, String)>>();
|
||||
rules.iter().any(|inner_rule| rule == inner_rule)
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<&indexer_rule::Data> for IndexerRule {
|
||||
|
|
|
@ -1,10 +1,16 @@
|
|||
use std::path::{Path, PathBuf};
|
||||
|
||||
use futures_concurrency::future::Join;
|
||||
use gix_ignore::{glob::search::pattern::List, search::Ignore, Search};
|
||||
use sd_prisma::prisma::{indexer_rule, PrismaClient};
|
||||
|
||||
use chrono::Utc;
|
||||
use thiserror::Error;
|
||||
use tokio::fs;
|
||||
use uuid::Uuid;
|
||||
|
||||
use super::{IndexerRule, IndexerRuleError, RulePerKind};
|
||||
use once_cell::sync::Lazy;
|
||||
|
||||
#[derive(Error, Debug)]
|
||||
pub enum SeederError {
|
||||
|
@ -12,14 +18,126 @@ pub enum SeederError {
|
|||
IndexerRules(#[from] IndexerRuleError),
|
||||
#[error("An error occurred with the database while applying migrations: {0}")]
|
||||
DatabaseError(#[from] prisma_client_rust::QueryError),
|
||||
#[error("Failed to parse indexer rules based on external system")]
|
||||
InhirentedExternalRules,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct GitIgnoreRules {
|
||||
rules: RulePerKind,
|
||||
}
|
||||
|
||||
impl GitIgnoreRules {
|
||||
pub async fn get_rules_if_in_git_repo(
|
||||
library_root: &Path,
|
||||
current: &Path,
|
||||
) -> Option<Result<Self, SeederError>> {
|
||||
let mut git_repo = None;
|
||||
|
||||
let mut ignores = Vec::new();
|
||||
|
||||
for ancestor in current
|
||||
.ancestors()
|
||||
.take_while(|&path| path.starts_with(library_root))
|
||||
{
|
||||
let git_ignore = ancestor.join(".gitignore");
|
||||
|
||||
// consider any `.gitignore` files that are inside a git repo
|
||||
if matches!(fs::try_exists(&git_ignore).await, Ok(true)) {
|
||||
ignores.push(git_ignore);
|
||||
}
|
||||
|
||||
if Self::is_git_repo(ancestor).await {
|
||||
git_repo.replace(ancestor);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
let git_repo = git_repo?;
|
||||
Some(Self::parse_gitrepo(git_repo, ignores).await)
|
||||
}
|
||||
|
||||
async fn parse_gitrepo(git_repo: &Path, gitignores: Vec<PathBuf>) -> Result<Self, SeederError> {
|
||||
let mut search = Search::default();
|
||||
|
||||
let gitignores = gitignores
|
||||
.into_iter()
|
||||
.map(Self::parse_git_ignore)
|
||||
.collect::<Vec<_>>()
|
||||
.join()
|
||||
.await;
|
||||
search
|
||||
.patterns
|
||||
.extend(gitignores.into_iter().filter_map(Result::ok));
|
||||
|
||||
let git_exclude_rules = Self::parse_git_exclude(git_repo.join(".git")).await;
|
||||
if let Ok(rules) = git_exclude_rules {
|
||||
search.patterns.extend(rules);
|
||||
}
|
||||
|
||||
Ok(Self {
|
||||
rules: RulePerKind::IgnoredByGit(git_repo.to_owned(), search),
|
||||
})
|
||||
}
|
||||
|
||||
async fn parse_git_ignore(gitignore: PathBuf) -> Result<List<Ignore>, SeederError> {
|
||||
tokio::task::spawn_blocking(move || {
|
||||
let mut buf = Vec::with_capacity(30);
|
||||
if let Ok(Some(patterns)) = List::from_file(gitignore, None, true, &mut buf) {
|
||||
Ok(patterns)
|
||||
} else {
|
||||
Err(SeederError::InhirentedExternalRules)
|
||||
}
|
||||
})
|
||||
.await
|
||||
.map_err(|_| SeederError::InhirentedExternalRules)?
|
||||
}
|
||||
|
||||
async fn parse_git_exclude(dot_git: PathBuf) -> Result<Vec<List<Ignore>>, SeederError> {
|
||||
tokio::task::spawn_blocking(move || {
|
||||
let mut buf = Vec::new();
|
||||
Search::from_git_dir(dot_git.as_ref(), None, &mut buf)
|
||||
.map(|search| search.patterns)
|
||||
.map_err(|_| SeederError::InhirentedExternalRules)
|
||||
})
|
||||
.await
|
||||
.map_err(|_| SeederError::InhirentedExternalRules)?
|
||||
}
|
||||
|
||||
async fn is_git_repo(path: &Path) -> bool {
|
||||
let path = path.join(".git");
|
||||
tokio::task::spawn_blocking(move || path.is_dir())
|
||||
.await
|
||||
.unwrap_or_default()
|
||||
}
|
||||
}
|
||||
|
||||
impl From<GitIgnoreRules> for IndexerRule {
|
||||
fn from(git: GitIgnoreRules) -> Self {
|
||||
Self {
|
||||
id: None,
|
||||
name: ".gitignore'd".to_owned(),
|
||||
default: true,
|
||||
date_created: Utc::now(),
|
||||
date_modified: Utc::now(),
|
||||
rules: vec![git.rules],
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct SystemIndexerRule {
|
||||
name: &'static str,
|
||||
rules: Vec<RulePerKind>,
|
||||
default: bool,
|
||||
}
|
||||
|
||||
impl PartialEq<IndexerRule> for SystemIndexerRule {
|
||||
fn eq(&self, other: &IndexerRule) -> bool {
|
||||
self.name == other.name
|
||||
}
|
||||
}
|
||||
|
||||
impl From<SystemIndexerRule> for IndexerRule {
|
||||
fn from(rule: SystemIndexerRule) -> Self {
|
||||
Self {
|
||||
|
@ -33,14 +151,33 @@ impl From<SystemIndexerRule> for IndexerRule {
|
|||
}
|
||||
}
|
||||
|
||||
impl From<&SystemIndexerRule> for IndexerRule {
|
||||
fn from(rule: &SystemIndexerRule) -> Self {
|
||||
Self {
|
||||
id: None,
|
||||
name: rule.name.to_string(),
|
||||
default: rule.default,
|
||||
rules: rule.rules.clone(),
|
||||
date_created: Utc::now(),
|
||||
date_modified: Utc::now(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Seeds system indexer rules into a new or existing library,
|
||||
pub async fn new_or_existing_library(db: &PrismaClient) -> Result<(), SeederError> {
|
||||
use indexer_rule::{date_created, date_modified, default, name, rules_per_kind};
|
||||
|
||||
// DO NOT REORDER THIS ARRAY!
|
||||
for (i, rule) in [no_os_protected(), no_hidden(), no_git(), only_images()]
|
||||
.into_iter()
|
||||
.enumerate()
|
||||
for (i, rule) in [
|
||||
&NO_SYSTEM_FILES,
|
||||
&NO_HIDDEN,
|
||||
&NO_GIT,
|
||||
&GITIGNORE,
|
||||
&ONLY_IMAGES,
|
||||
]
|
||||
.into_iter()
|
||||
.enumerate()
|
||||
{
|
||||
let pub_id = sd_utils::uuid_to_bytes(Uuid::from_u128(i as u128));
|
||||
let rules = rmp_serde::to_vec_named(&rule.rules).map_err(IndexerRuleError::from)?;
|
||||
|
@ -66,147 +203,140 @@ pub async fn new_or_existing_library(db: &PrismaClient) -> Result<(), SeederErro
|
|||
Ok(())
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
#[allow(clippy::missing_panics_doc)]
|
||||
pub fn no_os_protected() -> SystemIndexerRule {
|
||||
pub static NO_SYSTEM_FILES: Lazy<SystemIndexerRule> = Lazy::new(|| {
|
||||
SystemIndexerRule {
|
||||
// TODO: On windows, beside the listed files, any file with the FILE_ATTRIBUTE_SYSTEM should be considered a system file
|
||||
// https://learn.microsoft.com/en-us/windows/win32/fileio/file-attribute-constants#FILE_ATTRIBUTE_SYSTEM
|
||||
name: "No OS protected",
|
||||
default: true,
|
||||
rules: vec![
|
||||
RulePerKind::new_reject_files_by_globs_str(
|
||||
[
|
||||
vec![
|
||||
"**/.spacedrive",
|
||||
],
|
||||
// Globset, even on Windows, requires the use of / as a separator
|
||||
// https://github.com/github/gitignore/blob/main/Global/Windows.gitignore
|
||||
// https://learn.microsoft.com/en-us/windows/win32/fileio/naming-a-file
|
||||
#[cfg(target_os = "windows")]
|
||||
vec![
|
||||
// Windows thumbnail cache files
|
||||
"**/{Thumbs.db,Thumbs.db:encryptable,ehthumbs.db,ehthumbs_vista.db}",
|
||||
// Dump file
|
||||
"**/*.stackdump",
|
||||
// Folder config file
|
||||
"**/[Dd]esktop.ini",
|
||||
// Recycle Bin used on file shares
|
||||
"**/$RECYCLE.BIN",
|
||||
// Chkdsk recovery directory
|
||||
"**/FOUND.[0-9][0-9][0-9]",
|
||||
// Reserved names
|
||||
"**/{CON,PRN,AUX,NUL,COM0,COM1,COM2,COM3,COM4,COM5,COM6,COM7,COM8,COM9,LPT0,LPT1,LPT2,LPT3,LPT4,LPT5,LPT6,LPT7,LPT8,LPT9}",
|
||||
"**/{CON,PRN,AUX,NUL,COM0,COM1,COM2,COM3,COM4,COM5,COM6,COM7,COM8,COM9,LPT0,LPT1,LPT2,LPT3,LPT4,LPT5,LPT6,LPT7,LPT8,LPT9}.*",
|
||||
// User special files
|
||||
"C:/Users/*/NTUSER.DAT*",
|
||||
"C:/Users/*/ntuser.dat*",
|
||||
"C:/Users/*/{ntuser.ini,ntuser.dat,NTUSER.DAT}",
|
||||
// User special folders (most of these the user don't even have permission to access)
|
||||
"C:/Users/*/{Cookies,AppData,NetHood,Recent,PrintHood,SendTo,Templates,Start Menu,Application Data,Local Settings,My Documents}",
|
||||
// System special folders
|
||||
"C:/{$Recycle.Bin,$WinREAgent,Documents and Settings,Program Files,Program Files (x86),ProgramData,Recovery,PerfLogs,Windows,Windows.old}",
|
||||
// NTFS internal dir, can exists on any drive
|
||||
"[A-Z]:/System Volume Information",
|
||||
// System special files
|
||||
"C:/{config,pagefile,hiberfil}.sys",
|
||||
// Windows can create a swapfile on any drive
|
||||
"[A-Z]:/swapfile.sys",
|
||||
"C:/DumpStack.log.tmp",
|
||||
],
|
||||
// https://github.com/github/gitignore/blob/main/Global/macOS.gitignore
|
||||
// https://developer.apple.com/library/archive/documentation/FileManagement/Conceptual/FileSystemProgrammingGuide/FileSystemOverview/FileSystemOverview.html#//apple_ref/doc/uid/TP40010672-CH2-SW14
|
||||
#[cfg(any(target_os = "ios", target_os = "macos"))]
|
||||
vec![
|
||||
"**/.{DS_Store,AppleDouble,LSOverride}",
|
||||
// Icon must end with two \r
|
||||
"**/Icon\r\r",
|
||||
// Thumbnails
|
||||
"**/._*",
|
||||
],
|
||||
#[cfg(target_os = "macos")]
|
||||
vec![
|
||||
"/{System,Network,Library,Applications,.PreviousSystemInformation,.com.apple.templatemigration.boot-install}",
|
||||
"/System/Volumes/Data/{System,Network,Library,Applications,.PreviousSystemInformation,.com.apple.templatemigration.boot-install}",
|
||||
"/Users/*/{Library,Applications}",
|
||||
"/System/Volumes/Data/Users/*/{Library,Applications}",
|
||||
"**/*.photoslibrary/{database,external,private,resources,scope}",
|
||||
// Files that might appear in the root of a volume
|
||||
"**/.{DocumentRevisions-V100,fseventsd,Spotlight-V100,TemporaryItems,Trashes,VolumeIcon.icns,com.apple.timemachine.donotpresent}",
|
||||
// Directories potentially created on remote AFP share
|
||||
"**/.{AppleDB,AppleDesktop,apdisk}",
|
||||
"**/{Network Trash Folder,Temporary Items}",
|
||||
],
|
||||
// https://github.com/github/gitignore/blob/main/Global/Linux.gitignore
|
||||
#[cfg(target_os = "linux")]
|
||||
vec![
|
||||
"**/*~",
|
||||
// temporary files which can be created if a process still has a handle open of a deleted file
|
||||
"**/.fuse_hidden*",
|
||||
// KDE directory preferences
|
||||
"**/.directory",
|
||||
// Linux trash folder which might appear on any partition or disk
|
||||
"**/.Trash-*",
|
||||
// .nfs files are created when an open file is removed but is still being accessed
|
||||
"**/.nfs*",
|
||||
],
|
||||
#[cfg(target_os = "android")]
|
||||
vec![
|
||||
"**/.nomedia",
|
||||
"**/.thumbnails",
|
||||
],
|
||||
// https://en.wikipedia.org/wiki/Unix_filesystem#Conventional_directory_layout
|
||||
// https://en.wikipedia.org/wiki/Filesystem_Hierarchy_Standard
|
||||
#[cfg(target_family = "unix")]
|
||||
vec![
|
||||
// Directories containing unix memory/device mapped files/dirs
|
||||
"/{dev,sys,proc}",
|
||||
// Directories containing special files for current running programs
|
||||
"/{run,var,boot}",
|
||||
// ext2-4 recovery directory
|
||||
"**/lost+found",
|
||||
],
|
||||
]
|
||||
.into_iter()
|
||||
.flatten()
|
||||
).expect("this is hardcoded and should always work"),
|
||||
],
|
||||
}
|
||||
// TODO: On windows, beside the listed files, any file with the FILE_ATTRIBUTE_SYSTEM should be considered a system file
|
||||
// https://learn.microsoft.com/en-us/windows/win32/fileio/file-attribute-constants#FILE_ATTRIBUTE_SYSTEM
|
||||
name: "No System files",
|
||||
default: true,
|
||||
rules: vec![
|
||||
RulePerKind::new_reject_files_by_globs_str(
|
||||
[
|
||||
vec![
|
||||
"**/.spacedrive",
|
||||
],
|
||||
// Globset, even on Windows, requires the use of / as a separator
|
||||
// https://github.com/github/gitignore/blob/main/Global/Windows.gitignore
|
||||
// https://learn.microsoft.com/en-us/windows/win32/fileio/naming-a-file
|
||||
#[cfg(target_os = "windows")]
|
||||
vec![
|
||||
// Windows thumbnail cache files
|
||||
"**/{Thumbs.db,Thumbs.db:encryptable,ehthumbs.db,ehthumbs_vista.db}",
|
||||
// Dump file
|
||||
"**/*.stackdump",
|
||||
// Folder config file
|
||||
"**/[Dd]esktop.ini",
|
||||
// Recycle Bin used on file shares
|
||||
"**/$RECYCLE.BIN",
|
||||
// Chkdsk recovery directory
|
||||
"**/FOUND.[0-9][0-9][0-9]",
|
||||
// Reserved names
|
||||
"**/{CON,PRN,AUX,NUL,COM0,COM1,COM2,COM3,COM4,COM5,COM6,COM7,COM8,COM9,LPT0,LPT1,LPT2,LPT3,LPT4,LPT5,LPT6,LPT7,LPT8,LPT9}",
|
||||
"**/{CON,PRN,AUX,NUL,COM0,COM1,COM2,COM3,COM4,COM5,COM6,COM7,COM8,COM9,LPT0,LPT1,LPT2,LPT3,LPT4,LPT5,LPT6,LPT7,LPT8,LPT9}.*",
|
||||
// User special files
|
||||
"C:/Users/*/NTUSER.DAT*",
|
||||
"C:/Users/*/ntuser.dat*",
|
||||
"C:/Users/*/{ntuser.ini,ntuser.dat,NTUSER.DAT}",
|
||||
// User special folders (most of these the user don't even have permission to access)
|
||||
"C:/Users/*/{Cookies,AppData,NetHood,Recent,PrintHood,SendTo,Templates,Start Menu,Application Data,Local Settings,My Documents}",
|
||||
// System special folders
|
||||
"C:/{$Recycle.Bin,$WinREAgent,Documents and Settings,Program Files,Program Files (x86),ProgramData,Recovery,PerfLogs,Windows,Windows.old}",
|
||||
// NTFS internal dir, can exists on any drive
|
||||
"[A-Z]:/System Volume Information",
|
||||
// System special files
|
||||
"C:/{config,pagefile,hiberfil}.sys",
|
||||
// Windows can create a swapfile on any drive
|
||||
"[A-Z]:/swapfile.sys",
|
||||
"C:/DumpStack.log.tmp",
|
||||
],
|
||||
// https://github.com/github/gitignore/blob/main/Global/macOS.gitignore
|
||||
// https://developer.apple.com/library/archive/documentation/FileManagement/Conceptual/FileSystemProgrammingGuide/FileSystemOverview/FileSystemOverview.html#//apple_ref/doc/uid/TP40010672-CH2-SW14
|
||||
#[cfg(any(target_os = "ios", target_os = "macos"))]
|
||||
vec![
|
||||
"**/.{DS_Store,AppleDouble,LSOverride}",
|
||||
// Icon must end with two \r
|
||||
"**/Icon\r\r",
|
||||
// Thumbnails
|
||||
"**/._*",
|
||||
],
|
||||
#[cfg(target_os = "macos")]
|
||||
vec![
|
||||
"/{System,Network,Library,Applications,.PreviousSystemInformation,.com.apple.templatemigration.boot-install}",
|
||||
"/System/Volumes/Data/{System,Network,Library,Applications,.PreviousSystemInformation,.com.apple.templatemigration.boot-install}",
|
||||
"/Users/*/{Library,Applications}",
|
||||
"/System/Volumes/Data/Users/*/{Library,Applications}",
|
||||
"**/*.photoslibrary/{database,external,private,resources,scope}",
|
||||
// Files that might appear in the root of a volume
|
||||
"**/.{DocumentRevisions-V100,fseventsd,Spotlight-V100,TemporaryItems,Trashes,VolumeIcon.icns,com.apple.timemachine.donotpresent}",
|
||||
// Directories potentially created on remote AFP share
|
||||
"**/.{AppleDB,AppleDesktop,apdisk}",
|
||||
"**/{Network Trash Folder,Temporary Items}",
|
||||
],
|
||||
// https://github.com/github/gitignore/blob/main/Global/Linux.gitignore
|
||||
#[cfg(target_os = "linux")]
|
||||
vec![
|
||||
"**/*~",
|
||||
// temporary files which can be created if a process still has a handle open of a deleted file
|
||||
"**/.fuse_hidden*",
|
||||
// KDE directory preferences
|
||||
"**/.directory",
|
||||
// Linux trash folder which might appear on any partition or disk
|
||||
"**/.Trash-*",
|
||||
// .nfs files are created when an open file is removed but is still being accessed
|
||||
"**/.nfs*",
|
||||
],
|
||||
#[cfg(target_os = "android")]
|
||||
vec![
|
||||
"**/.nomedia",
|
||||
"**/.thumbnails",
|
||||
],
|
||||
// https://en.wikipedia.org/wiki/Unix_filesystem#Conventional_directory_layout
|
||||
// https://en.wikipedia.org/wiki/Filesystem_Hierarchy_Standard
|
||||
#[cfg(target_family = "unix")]
|
||||
vec![
|
||||
// Directories containing unix memory/device mapped files/dirs
|
||||
"/{dev,sys,proc}",
|
||||
// Directories containing special files for current running programs
|
||||
"/{run,var,boot}",
|
||||
// ext2-4 recovery directory
|
||||
"**/lost+found",
|
||||
],
|
||||
]
|
||||
.into_iter()
|
||||
.flatten()
|
||||
).expect("this is hardcoded and should always work"),
|
||||
],
|
||||
}
|
||||
});
|
||||
|
||||
#[must_use]
|
||||
#[allow(clippy::missing_panics_doc)]
|
||||
pub fn no_hidden() -> SystemIndexerRule {
|
||||
SystemIndexerRule {
|
||||
name: "No Hidden",
|
||||
default: false,
|
||||
rules: vec![RulePerKind::new_reject_files_by_globs_str(["**/.*"])
|
||||
.expect("this is hardcoded and should always work")],
|
||||
}
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
#[allow(clippy::missing_panics_doc)]
|
||||
fn no_git() -> SystemIndexerRule {
|
||||
SystemIndexerRule {
|
||||
name: "No Git",
|
||||
default: false,
|
||||
rules: vec![RulePerKind::new_reject_files_by_globs_str([
|
||||
"**/{.git,.gitignore,.gitattributes,.gitkeep,.gitconfig,.gitmodules}",
|
||||
])
|
||||
pub static NO_HIDDEN: Lazy<SystemIndexerRule> = Lazy::new(|| SystemIndexerRule {
|
||||
name: "No Hidden files",
|
||||
default: false,
|
||||
rules: vec![RulePerKind::new_reject_files_by_globs_str(["**/.*"])
|
||||
.expect("this is hardcoded and should always work")],
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
#[must_use]
|
||||
#[allow(clippy::missing_panics_doc)]
|
||||
fn only_images() -> SystemIndexerRule {
|
||||
SystemIndexerRule {
|
||||
name: "Only Images",
|
||||
default: false,
|
||||
rules: vec![RulePerKind::new_accept_files_by_globs_str([
|
||||
"*.{avif,bmp,gif,ico,jpeg,jpg,png,svg,tif,tiff,webp}",
|
||||
])
|
||||
.expect("this is hardcoded and should always work")],
|
||||
}
|
||||
}
|
||||
pub static NO_GIT: Lazy<SystemIndexerRule> = Lazy::new(|| SystemIndexerRule {
|
||||
name: "No Git files",
|
||||
default: true,
|
||||
rules: vec![RulePerKind::new_reject_files_by_globs_str([
|
||||
"**/{.git,.gitignore,.gitattributes,.gitkeep,.gitconfig,.gitmodules}",
|
||||
])
|
||||
.expect("this is hardcoded and should always work")],
|
||||
});
|
||||
|
||||
pub static GITIGNORE: Lazy<SystemIndexerRule> = Lazy::new(|| SystemIndexerRule {
|
||||
name: "Gitignore",
|
||||
default: true,
|
||||
// Empty rules because this rule is only used to allow frontend to toggle GitIgnoreRules
|
||||
rules: vec![],
|
||||
});
|
||||
|
||||
pub static ONLY_IMAGES: Lazy<SystemIndexerRule> = Lazy::new(|| SystemIndexerRule {
|
||||
name: "Only Images",
|
||||
default: false,
|
||||
rules: vec![RulePerKind::new_accept_files_by_globs_str([
|
||||
"*.{avif,bmp,gif,ico,jpeg,jpg,png,svg,tif,tiff,webp}",
|
||||
])
|
||||
.expect("this is hardcoded and should always work")],
|
||||
});
|
||||
|
|
|
@ -31,6 +31,9 @@ impl Serialize for RulePerKind {
|
|||
"RejectIfChildrenDirectoriesArePresent",
|
||||
children,
|
||||
),
|
||||
Self::IgnoredByGit(_, _) => {
|
||||
unreachable!("git ignore rules are dynamic and not serialized")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,12 @@
|
|||
-- Update duplicate names to make them unique
|
||||
UPDATE "indexer_rule"
|
||||
SET "name" = "name" || '_' || "id"
|
||||
WHERE "name" IN (
|
||||
SELECT "name"
|
||||
FROM "indexer_rule"
|
||||
GROUP BY "name"
|
||||
HAVING COUNT(*) > 1
|
||||
);
|
||||
|
||||
-- CreateIndex
|
||||
CREATE UNIQUE INDEX "indexer_rule_name_key" ON "indexer_rule"("name");
|
|
@ -624,7 +624,7 @@ model IndexerRule {
|
|||
id Int @id @default(autoincrement())
|
||||
pub_id Bytes @unique
|
||||
|
||||
name String?
|
||||
name String? @unique
|
||||
default Boolean?
|
||||
rules_per_kind Bytes?
|
||||
date_created DateTime?
|
||||
|
|
|
@ -64,6 +64,7 @@ impl Hash for OldIndexerJobInit {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// `IndexerJobData` contains the state of the indexer job, which includes a `location_path` that
|
||||
/// is cached and casted on `PathBuf` from `local_path` column in the `location` table. It also
|
||||
/// contains some metadata for logging purposes.
|
||||
|
@ -207,6 +208,7 @@ impl StatefulJob for OldIndexerJobInit {
|
|||
errors,
|
||||
paths_and_sizes,
|
||||
} = walk(
|
||||
&location_path,
|
||||
&to_walk_path,
|
||||
&indexer_rules,
|
||||
update_notifier_fn(ctx),
|
||||
|
@ -395,6 +397,7 @@ impl StatefulJob for OldIndexerJobInit {
|
|||
errors,
|
||||
paths_and_sizes,
|
||||
} = keep_walking(
|
||||
location_path,
|
||||
to_walk_entry,
|
||||
&data.indexer_rules,
|
||||
update_notifier_fn(ctx),
|
||||
|
|
|
@ -79,6 +79,7 @@ pub async fn old_shallow(
|
|||
|
||||
let (walked, to_update, to_remove, errors, _s) = {
|
||||
walk_single_dir(
|
||||
location_path,
|
||||
&to_walk_path,
|
||||
&indexer_rules,
|
||||
file_paths_db_fetcher_fn!(&db),
|
||||
|
|
|
@ -1,5 +1,8 @@
|
|||
use sd_core_file_path_helper::{FilePathMetadata, IsolatedFilePathData};
|
||||
use sd_core_indexer_rules::{IndexerRule, RuleKind};
|
||||
use sd_core_indexer_rules::{
|
||||
seed::{GitIgnoreRules, GITIGNORE},
|
||||
IndexerRule, RuleKind,
|
||||
};
|
||||
use sd_core_prisma_helpers::{file_path_pub_and_cas_ids, file_path_walker};
|
||||
|
||||
use sd_prisma::prisma::file_path;
|
||||
|
@ -9,6 +12,7 @@ use std::{
|
|||
collections::{HashMap, HashSet, VecDeque},
|
||||
future::Future,
|
||||
hash::{Hash, Hasher},
|
||||
ops::Deref,
|
||||
path::{Path, PathBuf},
|
||||
};
|
||||
|
||||
|
@ -115,7 +119,8 @@ where
|
|||
/// a list of accepted entries. There are some useful comments in the implementation of this function
|
||||
/// in case of doubts.
|
||||
pub(super) async fn walk<FilePathDBFetcherFut, ToRemoveDbFetcherFut>(
|
||||
root: impl AsRef<Path>,
|
||||
library_root: impl AsRef<Path>,
|
||||
current_dir: impl AsRef<Path>,
|
||||
indexer_rules: &[IndexerRule],
|
||||
mut update_notifier: impl FnMut(&Path, usize),
|
||||
file_paths_db_fetcher: impl Fn(Vec<file_path::WhereParam>) -> FilePathDBFetcherFut,
|
||||
|
@ -138,11 +143,11 @@ where
|
|||
ToRemoveDbFetcherFut:
|
||||
Future<Output = Result<Vec<file_path_pub_and_cas_ids::Data>, IndexerError>>,
|
||||
{
|
||||
let root = root.as_ref();
|
||||
let current_dir = current_dir.as_ref();
|
||||
|
||||
let mut to_walk = VecDeque::with_capacity(TO_WALK_QUEUE_INITIAL_CAPACITY);
|
||||
to_walk.push_back(ToWalkEntry {
|
||||
path: root.to_path_buf(),
|
||||
path: current_dir.to_path_buf(),
|
||||
parent_dir_accepted_by_its_children: None,
|
||||
maybe_parent: None,
|
||||
});
|
||||
|
@ -156,7 +161,8 @@ where
|
|||
let last_indexed_count = indexed_paths.len();
|
||||
|
||||
let (entry_size, current_to_remove) = inner_walk_single_dir(
|
||||
root,
|
||||
library_root.as_ref(),
|
||||
current_dir,
|
||||
&entry,
|
||||
indexer_rules,
|
||||
&to_remove_db_fetcher,
|
||||
|
@ -199,6 +205,7 @@ where
|
|||
}
|
||||
|
||||
pub(super) async fn keep_walking<FilePathDBFetcherFut, ToRemoveDbFetcherFut>(
|
||||
location_path: impl AsRef<Path>,
|
||||
to_walk_entry: &ToWalkEntry,
|
||||
indexer_rules: &[IndexerRule],
|
||||
mut update_notifier: impl FnMut(&Path, usize),
|
||||
|
@ -227,6 +234,7 @@ where
|
|||
let mut errors = vec![];
|
||||
|
||||
let (to_walk_entry_size, to_remove) = inner_walk_single_dir(
|
||||
location_path,
|
||||
to_walk_entry.path.clone(),
|
||||
to_walk_entry,
|
||||
indexer_rules,
|
||||
|
@ -265,7 +273,8 @@ where
|
|||
}
|
||||
|
||||
pub(super) async fn walk_single_dir<FilePathDBFetcherFut, ToRemoveDbFetcherFut>(
|
||||
root: impl AsRef<Path>,
|
||||
location_path: impl AsRef<Path>,
|
||||
current_dir: impl AsRef<Path>,
|
||||
indexer_rules: &[IndexerRule],
|
||||
file_paths_db_fetcher: impl Fn(Vec<file_path::WhereParam>) -> FilePathDBFetcherFut,
|
||||
to_remove_db_fetcher: impl Fn(
|
||||
|
@ -289,18 +298,18 @@ where
|
|||
ToRemoveDbFetcherFut:
|
||||
Future<Output = Result<Vec<file_path_pub_and_cas_ids::Data>, IndexerError>>,
|
||||
{
|
||||
let root = root.as_ref();
|
||||
let current_directory = current_dir.as_ref();
|
||||
|
||||
let mut indexed_paths = HashSet::with_capacity(WALK_SINGLE_DIR_PATHS_BUFFER_INITIAL_CAPACITY);
|
||||
|
||||
if add_root {
|
||||
let metadata = fs::metadata(root)
|
||||
let metadata = fs::metadata(current_directory)
|
||||
.await
|
||||
.map_err(|e| FileIOError::from((root, e)))?;
|
||||
.map_err(|e| FileIOError::from((current_directory, e)))?;
|
||||
|
||||
indexed_paths.insert(WalkingEntry {
|
||||
iso_file_path: iso_file_path_factory(root, true)?,
|
||||
maybe_metadata: Some(FilePathMetadata::from_path(root, &metadata)?),
|
||||
iso_file_path: iso_file_path_factory(current_directory, true)?,
|
||||
maybe_metadata: Some(FilePathMetadata::from_path(current_directory, &metadata)?),
|
||||
});
|
||||
}
|
||||
|
||||
|
@ -308,9 +317,10 @@ where
|
|||
let mut errors = vec![];
|
||||
|
||||
let (root_size, to_remove) = inner_walk_single_dir(
|
||||
root,
|
||||
location_path,
|
||||
current_directory,
|
||||
&ToWalkEntry {
|
||||
path: root.to_path_buf(),
|
||||
path: current_directory.to_path_buf(),
|
||||
parent_dir_accepted_by_its_children: None,
|
||||
maybe_parent: None,
|
||||
},
|
||||
|
@ -430,7 +440,8 @@ struct WorkingTable<'a> {
|
|||
}
|
||||
|
||||
async fn inner_walk_single_dir<ToRemoveDbFetcherFut>(
|
||||
root: impl AsRef<Path>,
|
||||
library_root: impl AsRef<Path>,
|
||||
current_dir: impl AsRef<Path>,
|
||||
ToWalkEntry {
|
||||
path,
|
||||
parent_dir_accepted_by_its_children,
|
||||
|
@ -465,7 +476,17 @@ where
|
|||
return (0, vec![]);
|
||||
};
|
||||
|
||||
let root = root.as_ref();
|
||||
let mut rules = indexer_rules.to_owned();
|
||||
|
||||
if rules.iter().any(|rule| GITIGNORE.deref() == rule) {
|
||||
if let Some(pat) =
|
||||
GitIgnoreRules::get_rules_if_in_git_repo(library_root.as_ref(), path).await
|
||||
{
|
||||
rules.extend(pat.into_iter().map(Into::into));
|
||||
}
|
||||
}
|
||||
|
||||
let current_dir = current_dir.as_ref();
|
||||
|
||||
// Just to make sure...
|
||||
paths_buffer.clear();
|
||||
|
@ -496,7 +517,7 @@ where
|
|||
accept_by_children_dir
|
||||
);
|
||||
|
||||
let Ok(rules_per_kind) = IndexerRule::apply_all(indexer_rules, ¤t_path)
|
||||
let Ok(rules_per_kind) = IndexerRule::apply_all(&rules, ¤t_path)
|
||||
.await
|
||||
.map_err(|e| errors.push(e.into()))
|
||||
else {
|
||||
|
@ -515,6 +536,12 @@ where
|
|||
continue 'entries;
|
||||
}
|
||||
|
||||
if let Some(f) = rules_per_kind.get(&RuleKind::IgnoredByGit) {
|
||||
if f.iter().any(|s| !s) {
|
||||
continue 'entries;
|
||||
}
|
||||
}
|
||||
|
||||
let Ok(metadata) = entry
|
||||
.metadata()
|
||||
.await
|
||||
|
@ -562,6 +589,14 @@ where
|
|||
}
|
||||
}
|
||||
|
||||
// Then we check if there's a git ignore rule for it
|
||||
if let Some(accept) = rules_per_kind.get(&RuleKind::IgnoredByGit) {
|
||||
if !accept.iter().any(|&r| r) {
|
||||
trace!(dir=?current_path, "ignoring files because of git ignore");
|
||||
continue 'entries;
|
||||
}
|
||||
}
|
||||
|
||||
// Then we mark this directory the be walked in too
|
||||
if let Some(ref mut to_walk) = maybe_to_walk {
|
||||
to_walk.push_back(ToWalkEntry {
|
||||
|
@ -606,7 +641,7 @@ where
|
|||
for ancestor in current_path
|
||||
.ancestors()
|
||||
.skip(1) // Skip the current directory as it was already indexed
|
||||
.take_while(|&ancestor| ancestor != root)
|
||||
.take_while(|&ancestor| ancestor != current_dir)
|
||||
{
|
||||
let Ok(iso_file_path) =
|
||||
iso_file_path_factory(ancestor, true).map_err(|e| errors.push(e))
|
||||
|
@ -821,7 +856,8 @@ mod tests {
|
|||
|
||||
let walk_result = walk(
|
||||
root_path.to_path_buf(),
|
||||
&[],
|
||||
root_path.to_path_buf(),
|
||||
&mut [],
|
||||
|_, _| {},
|
||||
|_| async { Ok(vec![]) },
|
||||
|_, _| async { Ok(vec![]) },
|
||||
|
@ -872,7 +908,7 @@ mod tests {
|
|||
.into_iter()
|
||||
.collect::<HashSet<_>>();
|
||||
|
||||
let only_photos_rule = &[new_indexer_rule(
|
||||
let mut only_photos_rule = vec![new_indexer_rule(
|
||||
"only photos".to_string(),
|
||||
false,
|
||||
vec![RulePerKind::AcceptFilesByGlob(
|
||||
|
@ -886,7 +922,8 @@ mod tests {
|
|||
|
||||
let walk_result = walk(
|
||||
root_path.to_path_buf(),
|
||||
only_photos_rule,
|
||||
root_path.to_path_buf(),
|
||||
&mut only_photos_rule,
|
||||
|_, _| {},
|
||||
|_| async { Ok(vec![]) },
|
||||
|_, _| async { Ok(vec![]) },
|
||||
|
@ -934,7 +971,7 @@ mod tests {
|
|||
WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project/Cargo.toml"), false), metadata },
|
||||
WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project/src"), true), metadata },
|
||||
WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project/src/main.rs"), false), metadata },
|
||||
WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project/target"), true), metadata },
|
||||
WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project/target/"), true), metadata },
|
||||
WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project/target/debug"), true), metadata },
|
||||
WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project/target/debug/main"), false), metadata },
|
||||
WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("inner"), true), metadata },
|
||||
|
@ -950,7 +987,7 @@ mod tests {
|
|||
.into_iter()
|
||||
.collect::<HashSet<_>>();
|
||||
|
||||
let git_repos = &[new_indexer_rule(
|
||||
let mut git_repos = vec![new_indexer_rule(
|
||||
"git repos".to_string(),
|
||||
false,
|
||||
vec![RulePerKind::AcceptIfChildrenDirectoriesArePresent(
|
||||
|
@ -960,7 +997,8 @@ mod tests {
|
|||
|
||||
let walk_result = walk(
|
||||
root_path.to_path_buf(),
|
||||
git_repos,
|
||||
root_path.to_path_buf(),
|
||||
&mut git_repos,
|
||||
|_, _| {},
|
||||
|_| async { Ok(vec![]) },
|
||||
|_, _| async { Ok(vec![]) },
|
||||
|
@ -979,7 +1017,9 @@ mod tests {
|
|||
let actual = walk_result.walked.collect::<HashSet<_>>();
|
||||
|
||||
if actual != expected {
|
||||
panic!("difference: {:#?}", expected.difference(&actual));
|
||||
let not_found = expected.difference(&actual);
|
||||
let not_expected = actual.difference(&expected);
|
||||
panic!("difference:\nexpected, but not found: {not_found:#?}\nfound, but not expected:{not_expected:#?}");
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1018,7 +1058,7 @@ mod tests {
|
|||
.into_iter()
|
||||
.collect::<HashSet<_>>();
|
||||
|
||||
let git_repos_no_deps_no_build_dirs = &[
|
||||
let mut git_repos_no_deps_no_build_dirs = vec![
|
||||
new_indexer_rule(
|
||||
"git repos".to_string(),
|
||||
false,
|
||||
|
@ -1052,7 +1092,8 @@ mod tests {
|
|||
|
||||
let walk_result = walk(
|
||||
root_path.to_path_buf(),
|
||||
git_repos_no_deps_no_build_dirs,
|
||||
root_path.to_path_buf(),
|
||||
&mut git_repos_no_deps_no_build_dirs,
|
||||
|_, _| {},
|
||||
|_| async { Ok(vec![]) },
|
||||
|_, _| async { Ok(vec![]) },
|
||||
|
@ -1071,7 +1112,9 @@ mod tests {
|
|||
let actual = walk_result.walked.collect::<HashSet<_>>();
|
||||
|
||||
if actual != expected {
|
||||
panic!("difference: {:#?}", expected.difference(&actual));
|
||||
let not_found = expected.difference(&actual);
|
||||
let not_expected = actual.difference(&expected);
|
||||
panic!("difference:\nexpected, but not found: {not_found:#?}\nfound, but not expected:{not_expected:#?}");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -10,7 +10,7 @@ use crate::{
|
|||
|
||||
use sd_core_file_path_helper::{path_is_hidden, MetadataExt};
|
||||
use sd_core_indexer_rules::{
|
||||
seed::{no_hidden, no_os_protected},
|
||||
seed::{NO_HIDDEN, NO_SYSTEM_FILES},
|
||||
IndexerRule, RuleKind,
|
||||
};
|
||||
|
||||
|
@ -21,6 +21,7 @@ use sd_utils::{chain_optional_iter, error::FileIOError};
|
|||
use std::{
|
||||
collections::HashMap,
|
||||
io::ErrorKind,
|
||||
ops::Deref,
|
||||
path::{Path, PathBuf},
|
||||
sync::Arc,
|
||||
};
|
||||
|
@ -123,8 +124,8 @@ pub async fn walk(
|
|||
let task = tokio::spawn(async move {
|
||||
let path = &path;
|
||||
let rules = chain_optional_iter(
|
||||
[IndexerRule::from(no_os_protected())],
|
||||
[(!with_hidden_files).then(|| IndexerRule::from(no_hidden()))],
|
||||
[IndexerRule::from(NO_SYSTEM_FILES.deref())],
|
||||
[(!with_hidden_files).then(|| IndexerRule::from(NO_HIDDEN.deref()))],
|
||||
);
|
||||
|
||||
let mut thumbnails_to_generate = vec![];
|
||||
|
|
|
@ -204,9 +204,9 @@ impl QuicTransport {
|
|||
addr.set_port(
|
||||
TcpListener::bind(addr)
|
||||
.await
|
||||
.map_err(|e| QuicTransportError::ListenerSetup(e))?
|
||||
.map_err(QuicTransportError::ListenerSetup)?
|
||||
.local_addr()
|
||||
.map_err(|e| QuicTransportError::ListenerSetup(e))?
|
||||
.map_err(QuicTransportError::ListenerSetup)?
|
||||
.port(),
|
||||
);
|
||||
}
|
||||
|
@ -230,8 +230,8 @@ impl QuicTransport {
|
|||
.map_err(|e| QuicTransportError::SendChannelClosed(e.to_string()))?;
|
||||
|
||||
rx.await
|
||||
.map_err(|e| QuicTransportError::ReceiveChannelClosed(e))
|
||||
.and_then(|r| r.map_err(|e| QuicTransportError::InternalEvent(e)))
|
||||
.map_err(QuicTransportError::ReceiveChannelClosed)
|
||||
.and_then(|r| r.map_err(QuicTransportError::InternalEvent))
|
||||
}
|
||||
|
||||
pub async fn shutdown(self) {
|
||||
|
|
|
@ -21,7 +21,8 @@ const ruleKinds: UnionToTuple<RuleKind> = [
|
|||
'AcceptFilesByGlob',
|
||||
'RejectFilesByGlob',
|
||||
'AcceptIfChildrenDirectoriesArePresent',
|
||||
'RejectIfChildrenDirectoriesArePresent'
|
||||
'RejectIfChildrenDirectoriesArePresent',
|
||||
'IgnoredByGit'
|
||||
];
|
||||
const ruleKindEnum = z.enum(ruleKinds);
|
||||
|
||||
|
|
|
@ -571,7 +571,7 @@ export type Resolution = { width: number; height: number }
|
|||
|
||||
export type Response = { Start: { user_code: string; verification_url: string; verification_url_complete: string } } | "Complete" | { Error: string }
|
||||
|
||||
export type RuleKind = "AcceptFilesByGlob" | "RejectFilesByGlob" | "AcceptIfChildrenDirectoriesArePresent" | "RejectIfChildrenDirectoriesArePresent"
|
||||
export type RuleKind = "AcceptFilesByGlob" | "RejectFilesByGlob" | "AcceptIfChildrenDirectoriesArePresent" | "RejectIfChildrenDirectoriesArePresent" | "IgnoredByGit"
|
||||
|
||||
export type SavedSearch = { id: number; pub_id: number[]; target: string | null; search: string | null; filters: string | null; name: string | null; icon: string | null; description: string | null; date_created: string | null; date_modified: string | null }
|
||||
|
||||
|
|
Loading…
Reference in a new issue