Extend indexer rules with .gitignore when available (#2459)

* Extend indexer rules with `.gitignore` when available

* Make indexer accounts for valid git  repos when reading ignore files

* Ignore poorly formatted gitignore rules

* Reduce allocation in gitignore indexer rules

* Fix tests for gitignore indexer rules

* Remove code duplication

* Remove double call to git rules

* Improve doc for todo

* rustfmt

* Skip star rules that matches a negated pattern when parsing gitignore

* Skip conflicting glob rules

* Rename and remove print

* Make git ignore indexer consider multiple paths

* Use gix_ignore to handle gitignore rules

* Fix gitignore patterns relative to git repo

* Git indexer also consider nested repos

* Create a dedicated type for rules coming from git

* Consider every gitignore file inside a git repo

* Add IgnoredByGit to interface

* Add Gitignore system IndexerRule to allow front-end to toggle GitIgnoreRules
 - Make IndexerRules names unique
 - CLippy fmt

* Fix migration concat

---------

Co-authored-by: Vítor Vasconcellos <vasconcellos.dev@gmail.com>
This commit is contained in:
Consoli 2024-05-17 03:28:12 -03:00 committed by GitHub
parent f847b76154
commit 26b6baffb6
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
17 changed files with 575 additions and 206 deletions

82
Cargo.lock generated
View file

@ -3012,6 +3012,12 @@ version = "0.1.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a"
[[package]]
name = "faster-hex"
version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a2a2b11eda1d40935b26cf18f6833c526845ae8c41e58d09af6adeb6f0269183"
[[package]]
name = "fastrand"
version = "1.9.0"
@ -3673,6 +3679,73 @@ dependencies = [
"winapi",
]
[[package]]
name = "gix-features"
version = "0.38.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "db4254037d20a247a0367aa79333750146a369719f0c6617fec4f5752cc62b37"
dependencies = [
"gix-hash",
"gix-trace",
"libc",
]
[[package]]
name = "gix-glob"
version = "0.16.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "682bdc43cb3c00dbedfcc366de2a849b582efd8d886215dbad2ea662ec156bb5"
dependencies = [
"bitflags 2.4.1",
"bstr",
"gix-features",
"gix-path",
"serde",
]
[[package]]
name = "gix-hash"
version = "0.14.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f93d7df7366121b5018f947a04d37f034717e113dcf9ccd85c34b58e57a74d5e"
dependencies = [
"faster-hex",
"thiserror",
]
[[package]]
name = "gix-ignore"
version = "0.11.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "640dbeb4f5829f9fc14d31f654a34a0350e43a24e32d551ad130d99bf01f63f1"
dependencies = [
"bstr",
"gix-glob",
"gix-path",
"gix-trace",
"serde",
"unicode-bom",
]
[[package]]
name = "gix-path"
version = "0.10.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "23623cf0f475691a6d943f898c4d0b89f5c1a2a64d0f92bce0e0322ee6528783"
dependencies = [
"bstr",
"gix-trace",
"home",
"once_cell",
"thiserror",
]
[[package]]
name = "gix-trace"
version = "0.1.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f924267408915fddcd558e3f37295cc7d6a3e50f8bd8b606cee0808c3915157e"
[[package]]
name = "glib"
version = "0.18.5"
@ -8983,6 +9056,7 @@ dependencies = [
"flate2",
"futures",
"futures-concurrency",
"gix-ignore",
"globset",
"hostname",
"http-body 0.4.6",
@ -9124,7 +9198,9 @@ version = "0.1.0"
dependencies = [
"chrono",
"futures-concurrency",
"gix-ignore",
"globset",
"once_cell",
"prisma-client-rust",
"rmp-serde",
"rspc",
@ -11650,6 +11726,12 @@ version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "56d12260fb92d52f9008be7e4bca09f584780eb2266dc8fecc6a192bec561694"
[[package]]
name = "unicode-bom"
version = "2.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7eec5d1121208364f6793f7d2e222bf75a915c19557537745b195b253dd64217"
[[package]]
name = "unicode-ccc"
version = "0.1.2"

View file

@ -18,7 +18,7 @@ use hyper::server::{accept::Accept, conn::AddrIncoming};
use rand::{distributions::Alphanumeric, Rng};
use sd_core::{custom_uri, Node, NodeError};
use serde::Deserialize;
use tauri::{async_runtime::block_on, plugin::TauriPlugin, AppHandle, RunEvent, Runtime};
use tauri::{async_runtime::block_on, plugin::TauriPlugin, RunEvent, Runtime};
use thiserror::Error;
use tokio::{net::TcpListener, task::block_in_place};
use tracing::info;

View file

@ -129,6 +129,7 @@ slotmap = "1.0.6"
sysinfo = "0.29.10"
tar = "0.4.40"
tower-service = "0.3.2"
gix-ignore = "0.11.2"
# Override features of transitive dependencies
[dependencies.openssl]

View file

@ -1,7 +1,10 @@
use crate::{indexer, Error, NonCriticalError};
use sd_core_file_path_helper::{FilePathError, FilePathMetadata, IsolatedFilePathData};
use sd_core_indexer_rules::{IndexerRuler, MetadataForIndexerRules, RuleKind};
use sd_core_indexer_rules::{
seed::{GitIgnoreRules, GITIGNORE},
IndexerRuler, MetadataForIndexerRules, RuleKind,
};
use sd_core_prisma_helpers::{file_path_pub_and_cas_ids, file_path_walker};
use sd_prisma::prisma::file_path;
@ -540,6 +543,14 @@ where
let (to_create, to_update, total_size, to_remove, accepted_ancestors, handles) = loop {
match stage {
WalkerStage::Start => {
if indexer_ruler.has_system(&GITIGNORE).await {
if let Some(rules) =
GitIgnoreRules::get_rules_if_in_git_repo(root.as_ref(), path).await
{
indexer_ruler.extend(rules.map(Into::into)).await;
}
}
*stage = WalkerStage::Walking {
read_dir_stream: ReadDirStream::new(fs::read_dir(&path).await.map_err(
|e| {
@ -1130,7 +1141,7 @@ mod tests {
use globset::{Glob, GlobSetBuilder};
use lending_stream::{LendingStream, StreamExt};
use tempfile::{tempdir, TempDir};
use tokio::fs;
use tokio::{fs, io::AsyncWriteExt};
use tracing::debug;
use tracing_test::traced_test;
@ -1188,23 +1199,30 @@ mod tests {
// root
// |__ rust_project
// | |__ .git
// | |__ <empty>
// | | |__ <empty>
// | |__ .gitignore
// | |__ ignorable.file
// | |__ Cargo.toml
// | |__ src
// | | |__ main.rs
// | |__ target
// | |__ debug
// | |__ main
// |__ inner
// | |__ node_project
// | |__ .git
// | |__ <empty>
// | |__ package.json
// | |__ src
// | | |__ App.tsx
// | |__ node_modules
// | |__ react
// | |__ package.json
// | | |__ debug
// | | |__ main
// | |__ partial
// | | |__ ignoreme
// | | |__ readme
// | |__ inner
// | |__ node_project
// | |__ .git
// | | |__ <empty>
// | |__ .gitignore
// | |__ ignorable.file
// | |__ package.json
// | |__ src
// | | |__ App.tsx
// | |__ node_modules
// | |__ react
// | |__ package.json
// |__ photos
// |__ photo1.png
// |__ photo2.jpg
@ -1223,14 +1241,33 @@ mod tests {
fs::create_dir(&node_project).await.unwrap();
fs::create_dir(&photos).await.unwrap();
// Inner directory partially ignored by git
let partial_dir = rust_project.join("partial");
fs::create_dir(&partial_dir).await.unwrap();
fs::File::create(partial_dir.join("ignoreme"))
.await
.unwrap();
fs::File::create(partial_dir.join("readme")).await.unwrap();
// Making rust and node projects a git repository
fs::create_dir(rust_project.join(".git")).await.unwrap();
let gitignore = rust_project.join(".gitignore");
let mut file = fs::File::create(gitignore).await.unwrap();
file.write_all(b"*.file\n/target\npartial/ignoreme")
.await
.unwrap();
fs::create_dir(node_project.join(".git")).await.unwrap();
let gitignore = node_project.join(".gitignore");
let mut file = fs::File::create(gitignore).await.unwrap();
file.write_all(b"ignorable.file").await.unwrap();
// Populating rust project
fs::File::create(rust_project.join("Cargo.toml"))
.await
.unwrap();
fs::File::create(rust_project.join("ignorable.file"))
.await
.unwrap();
let rust_src_dir = rust_project.join("src");
fs::create_dir(&rust_src_dir).await.unwrap();
fs::File::create(rust_src_dir.join("main.rs"))
@ -1246,6 +1283,9 @@ mod tests {
fs::File::create(node_project.join("package.json"))
.await
.unwrap();
fs::File::create(node_project.join("ignorable.file"))
.await
.unwrap();
let node_src_dir = node_project.join("src");
fs::create_dir(&node_src_dir).await.unwrap();
fs::File::create(node_src_dir.join("App.tsx"))
@ -1363,15 +1403,16 @@ mod tests {
let expected = [
WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project"), true), metadata },
WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project/.git"), true), metadata },
WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project/.gitignore"), false), metadata },
WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project/Cargo.toml"), false), metadata },
WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project/partial"), true), metadata },
WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project/partial/readme"), false), metadata },
WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project/src"), true), metadata },
WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project/src/main.rs"), false), metadata },
WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project/target"), true), metadata },
WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project/target/debug"), true), metadata },
WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project/target/debug/main"), false), metadata },
WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("inner"), true), metadata },
WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("inner/node_project"), true), metadata },
WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("inner/node_project/.git"), true), metadata },
WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("inner/node_project/.gitignore"), false), metadata },
WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("inner/node_project/package.json"), false), metadata },
WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("inner/node_project/src"), true), metadata },
WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("inner/node_project/src/App.tsx"), false), metadata },
@ -1458,16 +1499,17 @@ mod tests {
let expected = [
WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project"), true), metadata },
WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project/.git"), true), metadata },
WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project/.gitignore"), false), metadata },
WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project/Cargo.toml"), false), metadata },
WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project/src"), true), metadata },
WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project/src/main.rs"), false), metadata },
WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project/target"), true), metadata },
WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project/target/debug"), true), metadata },
WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project/target/debug/main"), false), metadata },
WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project/partial"), true), metadata },
WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project/partial/readme"), false), metadata },
WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("inner"), true), metadata },
WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("inner/node_project"), true), metadata },
WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("inner/node_project/.git"), true), metadata },
WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("inner/node_project/package.json"), false), metadata },
WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("inner/node_project/.gitignore"), false), metadata },
WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("inner/node_project/src"), true), metadata },
WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("inner/node_project/src/App.tsx"), false), metadata },
WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("inner/node_project/node_modules"), true), metadata },
@ -1513,12 +1555,16 @@ mod tests {
let expected = [
WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project"), true), metadata },
WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project/.git"), true), metadata },
WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project/.gitignore"), false), metadata },
WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project/Cargo.toml"), false), metadata },
WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project/partial"), true), metadata },
WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project/partial/readme"), false), metadata },
WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project/src"), true), metadata },
WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project/src/main.rs"), false), metadata },
WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("inner"), true), metadata },
WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("inner/node_project"), true), metadata },
WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("inner/node_project/.git"), true), metadata },
WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("inner/node_project/.gitignore"), false), metadata },
WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("inner/node_project/package.json"), false), metadata },
WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("inner/node_project/src"), true), metadata },
WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("inner/node_project/src/App.tsx"), false), metadata },

View file

@ -19,6 +19,7 @@ sd-utils = { path = "../../../crates/utils" }
chrono = { workspace = true }
futures-concurrency = { workspace = true }
globset = { workspace = true, features = ["serde1"] }
once_cell = { workspace = true }
prisma-client-rust = { workspace = true }
rmp-serde = { workspace = true }
rspc = { workspace = true }
@ -28,6 +29,7 @@ thiserror = { workspace = true }
tokio = { workspace = true, features = ["fs"] }
tracing = { workspace = true }
uuid = { workspace = true, features = ["v4", "serde"] }
gix-ignore = { version = "0.11.2", features = ["serde"] }
[dev-dependencies]
tempfile = { workspace = true }

View file

@ -32,17 +32,19 @@ use sd_utils::{
db::{maybe_missing, MissingFieldError},
error::{FileIOError, NonUtf8PathError},
};
use seed::SystemIndexerRule;
use serde::{Deserialize, Serialize};
use std::{
collections::{HashMap, HashSet},
fs::Metadata,
path::Path,
path::{Path, PathBuf},
sync::Arc,
};
use chrono::{DateTime, Utc};
use futures_concurrency::future::TryJoin;
use gix_ignore::{glob::pattern::Case, Search};
use globset::{Glob, GlobSet, GlobSetBuilder};
use rmp_serde::{decode, encode};
use rspc::ErrorCode;
@ -149,6 +151,9 @@ impl IndexerRuleCreateArgs {
parameters.into_iter().collect(),
))
}
RuleKind::IgnoredByGit => {
Ok(RulePerKind::IgnoredByGit(PathBuf::new(), Search::default()))
}
})
.collect::<Result<Vec<_>, _>>()?,
)?;
@ -184,13 +189,14 @@ pub enum RuleKind {
RejectFilesByGlob = 1,
AcceptIfChildrenDirectoriesArePresent = 2,
RejectIfChildrenDirectoriesArePresent = 3,
IgnoredByGit = 4,
}
impl RuleKind {
#[must_use]
pub const fn variant_count() -> usize {
// TODO: Use https://doc.rust-lang.org/std/mem/fn.variant_count.html if it ever gets stabilized
4
5
}
}
@ -202,7 +208,7 @@ impl RuleKind {
/// In case of `ParametersPerKind::AcceptIfChildrenDirectoriesArePresent` or
/// `ParametersPerKind::RejectIfChildrenDirectoriesArePresent`
/// first we change the data structure to a vector, then we serialize it.
#[derive(Debug)]
#[derive(Debug, Clone)]
pub enum RulePerKind {
// TODO: Add an indexer rule that filter files based on their extended attributes
// https://learn.microsoft.com/en-us/windows/win32/fileio/file-attribute-constants
@ -211,6 +217,7 @@ pub enum RulePerKind {
RejectFilesByGlob(Vec<Glob>, GlobSet),
AcceptIfChildrenDirectoriesArePresent(HashSet<String>),
RejectIfChildrenDirectoriesArePresent(HashSet<String>),
IgnoredByGit(PathBuf, Search),
}
impl RulePerKind {
@ -285,6 +292,10 @@ impl RulePerKind {
RuleKind::RejectFilesByGlob,
reject_by_glob(source, reject_glob_set),
)),
Self::IgnoredByGit(git_repo, patterns) => Ok((
RuleKind::IgnoredByGit,
accept_by_gitpattern(source.as_ref(), git_repo, patterns),
)),
}
}
@ -313,11 +324,29 @@ impl RulePerKind {
RuleKind::RejectFilesByGlob,
reject_by_glob(source, reject_glob_set),
)),
Self::IgnoredByGit(base_dir, patterns) => Ok((
RuleKind::IgnoredByGit,
accept_by_gitpattern(source.as_ref(), base_dir, patterns),
)),
}
}
}
#[derive(Debug, Serialize, Deserialize)]
fn accept_by_gitpattern(source: &Path, base_dir: &Path, search: &Search) -> bool {
let relative = source
.strip_prefix(base_dir)
.expect("`base_dir` should be our git repo, and `source` should be inside of it");
let Some(src) = relative.to_str().map(|s| s.as_bytes().into()) else {
return false;
};
search
.pattern_matching_relative_path(src, Some(source.is_dir()), Case::Fold)
.map_or(true, |rule| rule.pattern.is_negative())
}
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct IndexerRule {
pub id: Option<i32>,
pub name: String,
@ -387,7 +416,6 @@ impl IndexerRule {
#[derive(Debug, Clone, Default)]
pub struct IndexerRuler {
// TODO(fogodev): Use this RwLock later to acquire new rules while applying rules, like from a .gitignore file
rules: Arc<RwLock<Vec<IndexerRule>>>,
}
@ -436,6 +464,22 @@ impl IndexerRuler {
inner(&self.rules.read().await, source.as_ref(), metadata).await
}
/// Extend the indexer rules with the contents from an iterator of rules
pub async fn extend(&self, iter: impl IntoIterator<Item = IndexerRule> + Send) {
let mut indexer = self.rules.write().await;
indexer.extend(iter);
}
pub async fn has_system(&self, rule: &SystemIndexerRule) -> bool {
let rules = self.rules.read().await;
rules
.iter()
.map(|rule| (rule.id, rule.name.clone()))
.collect::<Vec<(Option<i32>, String)>>();
rules.iter().any(|inner_rule| rule == inner_rule)
}
}
impl TryFrom<&indexer_rule::Data> for IndexerRule {

View file

@ -1,10 +1,16 @@
use std::path::{Path, PathBuf};
use futures_concurrency::future::Join;
use gix_ignore::{glob::search::pattern::List, search::Ignore, Search};
use sd_prisma::prisma::{indexer_rule, PrismaClient};
use chrono::Utc;
use thiserror::Error;
use tokio::fs;
use uuid::Uuid;
use super::{IndexerRule, IndexerRuleError, RulePerKind};
use once_cell::sync::Lazy;
#[derive(Error, Debug)]
pub enum SeederError {
@ -12,14 +18,126 @@ pub enum SeederError {
IndexerRules(#[from] IndexerRuleError),
#[error("An error occurred with the database while applying migrations: {0}")]
DatabaseError(#[from] prisma_client_rust::QueryError),
#[error("Failed to parse indexer rules based on external system")]
InhirentedExternalRules,
}
#[derive(Debug)]
pub struct GitIgnoreRules {
rules: RulePerKind,
}
impl GitIgnoreRules {
pub async fn get_rules_if_in_git_repo(
library_root: &Path,
current: &Path,
) -> Option<Result<Self, SeederError>> {
let mut git_repo = None;
let mut ignores = Vec::new();
for ancestor in current
.ancestors()
.take_while(|&path| path.starts_with(library_root))
{
let git_ignore = ancestor.join(".gitignore");
// consider any `.gitignore` files that are inside a git repo
if matches!(fs::try_exists(&git_ignore).await, Ok(true)) {
ignores.push(git_ignore);
}
if Self::is_git_repo(ancestor).await {
git_repo.replace(ancestor);
break;
}
}
let git_repo = git_repo?;
Some(Self::parse_gitrepo(git_repo, ignores).await)
}
async fn parse_gitrepo(git_repo: &Path, gitignores: Vec<PathBuf>) -> Result<Self, SeederError> {
let mut search = Search::default();
let gitignores = gitignores
.into_iter()
.map(Self::parse_git_ignore)
.collect::<Vec<_>>()
.join()
.await;
search
.patterns
.extend(gitignores.into_iter().filter_map(Result::ok));
let git_exclude_rules = Self::parse_git_exclude(git_repo.join(".git")).await;
if let Ok(rules) = git_exclude_rules {
search.patterns.extend(rules);
}
Ok(Self {
rules: RulePerKind::IgnoredByGit(git_repo.to_owned(), search),
})
}
async fn parse_git_ignore(gitignore: PathBuf) -> Result<List<Ignore>, SeederError> {
tokio::task::spawn_blocking(move || {
let mut buf = Vec::with_capacity(30);
if let Ok(Some(patterns)) = List::from_file(gitignore, None, true, &mut buf) {
Ok(patterns)
} else {
Err(SeederError::InhirentedExternalRules)
}
})
.await
.map_err(|_| SeederError::InhirentedExternalRules)?
}
async fn parse_git_exclude(dot_git: PathBuf) -> Result<Vec<List<Ignore>>, SeederError> {
tokio::task::spawn_blocking(move || {
let mut buf = Vec::new();
Search::from_git_dir(dot_git.as_ref(), None, &mut buf)
.map(|search| search.patterns)
.map_err(|_| SeederError::InhirentedExternalRules)
})
.await
.map_err(|_| SeederError::InhirentedExternalRules)?
}
async fn is_git_repo(path: &Path) -> bool {
let path = path.join(".git");
tokio::task::spawn_blocking(move || path.is_dir())
.await
.unwrap_or_default()
}
}
impl From<GitIgnoreRules> for IndexerRule {
fn from(git: GitIgnoreRules) -> Self {
Self {
id: None,
name: ".gitignore'd".to_owned(),
default: true,
date_created: Utc::now(),
date_modified: Utc::now(),
rules: vec![git.rules],
}
}
}
#[derive(Debug)]
pub struct SystemIndexerRule {
name: &'static str,
rules: Vec<RulePerKind>,
default: bool,
}
impl PartialEq<IndexerRule> for SystemIndexerRule {
fn eq(&self, other: &IndexerRule) -> bool {
self.name == other.name
}
}
impl From<SystemIndexerRule> for IndexerRule {
fn from(rule: SystemIndexerRule) -> Self {
Self {
@ -33,14 +151,33 @@ impl From<SystemIndexerRule> for IndexerRule {
}
}
impl From<&SystemIndexerRule> for IndexerRule {
fn from(rule: &SystemIndexerRule) -> Self {
Self {
id: None,
name: rule.name.to_string(),
default: rule.default,
rules: rule.rules.clone(),
date_created: Utc::now(),
date_modified: Utc::now(),
}
}
}
/// Seeds system indexer rules into a new or existing library,
pub async fn new_or_existing_library(db: &PrismaClient) -> Result<(), SeederError> {
use indexer_rule::{date_created, date_modified, default, name, rules_per_kind};
// DO NOT REORDER THIS ARRAY!
for (i, rule) in [no_os_protected(), no_hidden(), no_git(), only_images()]
.into_iter()
.enumerate()
for (i, rule) in [
&NO_SYSTEM_FILES,
&NO_HIDDEN,
&NO_GIT,
&GITIGNORE,
&ONLY_IMAGES,
]
.into_iter()
.enumerate()
{
let pub_id = sd_utils::uuid_to_bytes(Uuid::from_u128(i as u128));
let rules = rmp_serde::to_vec_named(&rule.rules).map_err(IndexerRuleError::from)?;
@ -66,147 +203,140 @@ pub async fn new_or_existing_library(db: &PrismaClient) -> Result<(), SeederErro
Ok(())
}
#[must_use]
#[allow(clippy::missing_panics_doc)]
pub fn no_os_protected() -> SystemIndexerRule {
pub static NO_SYSTEM_FILES: Lazy<SystemIndexerRule> = Lazy::new(|| {
SystemIndexerRule {
// TODO: On windows, beside the listed files, any file with the FILE_ATTRIBUTE_SYSTEM should be considered a system file
// https://learn.microsoft.com/en-us/windows/win32/fileio/file-attribute-constants#FILE_ATTRIBUTE_SYSTEM
name: "No OS protected",
default: true,
rules: vec![
RulePerKind::new_reject_files_by_globs_str(
[
vec![
"**/.spacedrive",
],
// Globset, even on Windows, requires the use of / as a separator
// https://github.com/github/gitignore/blob/main/Global/Windows.gitignore
// https://learn.microsoft.com/en-us/windows/win32/fileio/naming-a-file
#[cfg(target_os = "windows")]
vec![
// Windows thumbnail cache files
"**/{Thumbs.db,Thumbs.db:encryptable,ehthumbs.db,ehthumbs_vista.db}",
// Dump file
"**/*.stackdump",
// Folder config file
"**/[Dd]esktop.ini",
// Recycle Bin used on file shares
"**/$RECYCLE.BIN",
// Chkdsk recovery directory
"**/FOUND.[0-9][0-9][0-9]",
// Reserved names
"**/{CON,PRN,AUX,NUL,COM0,COM1,COM2,COM3,COM4,COM5,COM6,COM7,COM8,COM9,LPT0,LPT1,LPT2,LPT3,LPT4,LPT5,LPT6,LPT7,LPT8,LPT9}",
"**/{CON,PRN,AUX,NUL,COM0,COM1,COM2,COM3,COM4,COM5,COM6,COM7,COM8,COM9,LPT0,LPT1,LPT2,LPT3,LPT4,LPT5,LPT6,LPT7,LPT8,LPT9}.*",
// User special files
"C:/Users/*/NTUSER.DAT*",
"C:/Users/*/ntuser.dat*",
"C:/Users/*/{ntuser.ini,ntuser.dat,NTUSER.DAT}",
// User special folders (most of these the user don't even have permission to access)
"C:/Users/*/{Cookies,AppData,NetHood,Recent,PrintHood,SendTo,Templates,Start Menu,Application Data,Local Settings,My Documents}",
// System special folders
"C:/{$Recycle.Bin,$WinREAgent,Documents and Settings,Program Files,Program Files (x86),ProgramData,Recovery,PerfLogs,Windows,Windows.old}",
// NTFS internal dir, can exists on any drive
"[A-Z]:/System Volume Information",
// System special files
"C:/{config,pagefile,hiberfil}.sys",
// Windows can create a swapfile on any drive
"[A-Z]:/swapfile.sys",
"C:/DumpStack.log.tmp",
],
// https://github.com/github/gitignore/blob/main/Global/macOS.gitignore
// https://developer.apple.com/library/archive/documentation/FileManagement/Conceptual/FileSystemProgrammingGuide/FileSystemOverview/FileSystemOverview.html#//apple_ref/doc/uid/TP40010672-CH2-SW14
#[cfg(any(target_os = "ios", target_os = "macos"))]
vec![
"**/.{DS_Store,AppleDouble,LSOverride}",
// Icon must end with two \r
"**/Icon\r\r",
// Thumbnails
"**/._*",
],
#[cfg(target_os = "macos")]
vec![
"/{System,Network,Library,Applications,.PreviousSystemInformation,.com.apple.templatemigration.boot-install}",
"/System/Volumes/Data/{System,Network,Library,Applications,.PreviousSystemInformation,.com.apple.templatemigration.boot-install}",
"/Users/*/{Library,Applications}",
"/System/Volumes/Data/Users/*/{Library,Applications}",
"**/*.photoslibrary/{database,external,private,resources,scope}",
// Files that might appear in the root of a volume
"**/.{DocumentRevisions-V100,fseventsd,Spotlight-V100,TemporaryItems,Trashes,VolumeIcon.icns,com.apple.timemachine.donotpresent}",
// Directories potentially created on remote AFP share
"**/.{AppleDB,AppleDesktop,apdisk}",
"**/{Network Trash Folder,Temporary Items}",
],
// https://github.com/github/gitignore/blob/main/Global/Linux.gitignore
#[cfg(target_os = "linux")]
vec![
"**/*~",
// temporary files which can be created if a process still has a handle open of a deleted file
"**/.fuse_hidden*",
// KDE directory preferences
"**/.directory",
// Linux trash folder which might appear on any partition or disk
"**/.Trash-*",
// .nfs files are created when an open file is removed but is still being accessed
"**/.nfs*",
],
#[cfg(target_os = "android")]
vec![
"**/.nomedia",
"**/.thumbnails",
],
// https://en.wikipedia.org/wiki/Unix_filesystem#Conventional_directory_layout
// https://en.wikipedia.org/wiki/Filesystem_Hierarchy_Standard
#[cfg(target_family = "unix")]
vec![
// Directories containing unix memory/device mapped files/dirs
"/{dev,sys,proc}",
// Directories containing special files for current running programs
"/{run,var,boot}",
// ext2-4 recovery directory
"**/lost+found",
],
]
.into_iter()
.flatten()
).expect("this is hardcoded and should always work"),
],
}
// TODO: On windows, beside the listed files, any file with the FILE_ATTRIBUTE_SYSTEM should be considered a system file
// https://learn.microsoft.com/en-us/windows/win32/fileio/file-attribute-constants#FILE_ATTRIBUTE_SYSTEM
name: "No System files",
default: true,
rules: vec![
RulePerKind::new_reject_files_by_globs_str(
[
vec![
"**/.spacedrive",
],
// Globset, even on Windows, requires the use of / as a separator
// https://github.com/github/gitignore/blob/main/Global/Windows.gitignore
// https://learn.microsoft.com/en-us/windows/win32/fileio/naming-a-file
#[cfg(target_os = "windows")]
vec![
// Windows thumbnail cache files
"**/{Thumbs.db,Thumbs.db:encryptable,ehthumbs.db,ehthumbs_vista.db}",
// Dump file
"**/*.stackdump",
// Folder config file
"**/[Dd]esktop.ini",
// Recycle Bin used on file shares
"**/$RECYCLE.BIN",
// Chkdsk recovery directory
"**/FOUND.[0-9][0-9][0-9]",
// Reserved names
"**/{CON,PRN,AUX,NUL,COM0,COM1,COM2,COM3,COM4,COM5,COM6,COM7,COM8,COM9,LPT0,LPT1,LPT2,LPT3,LPT4,LPT5,LPT6,LPT7,LPT8,LPT9}",
"**/{CON,PRN,AUX,NUL,COM0,COM1,COM2,COM3,COM4,COM5,COM6,COM7,COM8,COM9,LPT0,LPT1,LPT2,LPT3,LPT4,LPT5,LPT6,LPT7,LPT8,LPT9}.*",
// User special files
"C:/Users/*/NTUSER.DAT*",
"C:/Users/*/ntuser.dat*",
"C:/Users/*/{ntuser.ini,ntuser.dat,NTUSER.DAT}",
// User special folders (most of these the user don't even have permission to access)
"C:/Users/*/{Cookies,AppData,NetHood,Recent,PrintHood,SendTo,Templates,Start Menu,Application Data,Local Settings,My Documents}",
// System special folders
"C:/{$Recycle.Bin,$WinREAgent,Documents and Settings,Program Files,Program Files (x86),ProgramData,Recovery,PerfLogs,Windows,Windows.old}",
// NTFS internal dir, can exists on any drive
"[A-Z]:/System Volume Information",
// System special files
"C:/{config,pagefile,hiberfil}.sys",
// Windows can create a swapfile on any drive
"[A-Z]:/swapfile.sys",
"C:/DumpStack.log.tmp",
],
// https://github.com/github/gitignore/blob/main/Global/macOS.gitignore
// https://developer.apple.com/library/archive/documentation/FileManagement/Conceptual/FileSystemProgrammingGuide/FileSystemOverview/FileSystemOverview.html#//apple_ref/doc/uid/TP40010672-CH2-SW14
#[cfg(any(target_os = "ios", target_os = "macos"))]
vec![
"**/.{DS_Store,AppleDouble,LSOverride}",
// Icon must end with two \r
"**/Icon\r\r",
// Thumbnails
"**/._*",
],
#[cfg(target_os = "macos")]
vec![
"/{System,Network,Library,Applications,.PreviousSystemInformation,.com.apple.templatemigration.boot-install}",
"/System/Volumes/Data/{System,Network,Library,Applications,.PreviousSystemInformation,.com.apple.templatemigration.boot-install}",
"/Users/*/{Library,Applications}",
"/System/Volumes/Data/Users/*/{Library,Applications}",
"**/*.photoslibrary/{database,external,private,resources,scope}",
// Files that might appear in the root of a volume
"**/.{DocumentRevisions-V100,fseventsd,Spotlight-V100,TemporaryItems,Trashes,VolumeIcon.icns,com.apple.timemachine.donotpresent}",
// Directories potentially created on remote AFP share
"**/.{AppleDB,AppleDesktop,apdisk}",
"**/{Network Trash Folder,Temporary Items}",
],
// https://github.com/github/gitignore/blob/main/Global/Linux.gitignore
#[cfg(target_os = "linux")]
vec![
"**/*~",
// temporary files which can be created if a process still has a handle open of a deleted file
"**/.fuse_hidden*",
// KDE directory preferences
"**/.directory",
// Linux trash folder which might appear on any partition or disk
"**/.Trash-*",
// .nfs files are created when an open file is removed but is still being accessed
"**/.nfs*",
],
#[cfg(target_os = "android")]
vec![
"**/.nomedia",
"**/.thumbnails",
],
// https://en.wikipedia.org/wiki/Unix_filesystem#Conventional_directory_layout
// https://en.wikipedia.org/wiki/Filesystem_Hierarchy_Standard
#[cfg(target_family = "unix")]
vec![
// Directories containing unix memory/device mapped files/dirs
"/{dev,sys,proc}",
// Directories containing special files for current running programs
"/{run,var,boot}",
// ext2-4 recovery directory
"**/lost+found",
],
]
.into_iter()
.flatten()
).expect("this is hardcoded and should always work"),
],
}
});
#[must_use]
#[allow(clippy::missing_panics_doc)]
pub fn no_hidden() -> SystemIndexerRule {
SystemIndexerRule {
name: "No Hidden",
default: false,
rules: vec![RulePerKind::new_reject_files_by_globs_str(["**/.*"])
.expect("this is hardcoded and should always work")],
}
}
#[must_use]
#[allow(clippy::missing_panics_doc)]
fn no_git() -> SystemIndexerRule {
SystemIndexerRule {
name: "No Git",
default: false,
rules: vec![RulePerKind::new_reject_files_by_globs_str([
"**/{.git,.gitignore,.gitattributes,.gitkeep,.gitconfig,.gitmodules}",
])
pub static NO_HIDDEN: Lazy<SystemIndexerRule> = Lazy::new(|| SystemIndexerRule {
name: "No Hidden files",
default: false,
rules: vec![RulePerKind::new_reject_files_by_globs_str(["**/.*"])
.expect("this is hardcoded and should always work")],
}
}
});
#[must_use]
#[allow(clippy::missing_panics_doc)]
fn only_images() -> SystemIndexerRule {
SystemIndexerRule {
name: "Only Images",
default: false,
rules: vec![RulePerKind::new_accept_files_by_globs_str([
"*.{avif,bmp,gif,ico,jpeg,jpg,png,svg,tif,tiff,webp}",
])
.expect("this is hardcoded and should always work")],
}
}
pub static NO_GIT: Lazy<SystemIndexerRule> = Lazy::new(|| SystemIndexerRule {
name: "No Git files",
default: true,
rules: vec![RulePerKind::new_reject_files_by_globs_str([
"**/{.git,.gitignore,.gitattributes,.gitkeep,.gitconfig,.gitmodules}",
])
.expect("this is hardcoded and should always work")],
});
pub static GITIGNORE: Lazy<SystemIndexerRule> = Lazy::new(|| SystemIndexerRule {
name: "Gitignore",
default: true,
// Empty rules because this rule is only used to allow frontend to toggle GitIgnoreRules
rules: vec![],
});
pub static ONLY_IMAGES: Lazy<SystemIndexerRule> = Lazy::new(|| SystemIndexerRule {
name: "Only Images",
default: false,
rules: vec![RulePerKind::new_accept_files_by_globs_str([
"*.{avif,bmp,gif,ico,jpeg,jpg,png,svg,tif,tiff,webp}",
])
.expect("this is hardcoded and should always work")],
});

View file

@ -31,6 +31,9 @@ impl Serialize for RulePerKind {
"RejectIfChildrenDirectoriesArePresent",
children,
),
Self::IgnoredByGit(_, _) => {
unreachable!("git ignore rules are dynamic and not serialized")
}
}
}
}

View file

@ -0,0 +1,12 @@
-- Update duplicate names to make them unique
UPDATE "indexer_rule"
SET "name" = "name" || '_' || "id"
WHERE "name" IN (
SELECT "name"
FROM "indexer_rule"
GROUP BY "name"
HAVING COUNT(*) > 1
);
-- CreateIndex
CREATE UNIQUE INDEX "indexer_rule_name_key" ON "indexer_rule"("name");

View file

@ -624,7 +624,7 @@ model IndexerRule {
id Int @id @default(autoincrement())
pub_id Bytes @unique
name String?
name String? @unique
default Boolean?
rules_per_kind Bytes?
date_created DateTime?

View file

@ -64,6 +64,7 @@ impl Hash for OldIndexerJobInit {
}
}
}
/// `IndexerJobData` contains the state of the indexer job, which includes a `location_path` that
/// is cached and casted on `PathBuf` from `local_path` column in the `location` table. It also
/// contains some metadata for logging purposes.
@ -207,6 +208,7 @@ impl StatefulJob for OldIndexerJobInit {
errors,
paths_and_sizes,
} = walk(
&location_path,
&to_walk_path,
&indexer_rules,
update_notifier_fn(ctx),
@ -395,6 +397,7 @@ impl StatefulJob for OldIndexerJobInit {
errors,
paths_and_sizes,
} = keep_walking(
location_path,
to_walk_entry,
&data.indexer_rules,
update_notifier_fn(ctx),

View file

@ -79,6 +79,7 @@ pub async fn old_shallow(
let (walked, to_update, to_remove, errors, _s) = {
walk_single_dir(
location_path,
&to_walk_path,
&indexer_rules,
file_paths_db_fetcher_fn!(&db),

View file

@ -1,5 +1,8 @@
use sd_core_file_path_helper::{FilePathMetadata, IsolatedFilePathData};
use sd_core_indexer_rules::{IndexerRule, RuleKind};
use sd_core_indexer_rules::{
seed::{GitIgnoreRules, GITIGNORE},
IndexerRule, RuleKind,
};
use sd_core_prisma_helpers::{file_path_pub_and_cas_ids, file_path_walker};
use sd_prisma::prisma::file_path;
@ -9,6 +12,7 @@ use std::{
collections::{HashMap, HashSet, VecDeque},
future::Future,
hash::{Hash, Hasher},
ops::Deref,
path::{Path, PathBuf},
};
@ -115,7 +119,8 @@ where
/// a list of accepted entries. There are some useful comments in the implementation of this function
/// in case of doubts.
pub(super) async fn walk<FilePathDBFetcherFut, ToRemoveDbFetcherFut>(
root: impl AsRef<Path>,
library_root: impl AsRef<Path>,
current_dir: impl AsRef<Path>,
indexer_rules: &[IndexerRule],
mut update_notifier: impl FnMut(&Path, usize),
file_paths_db_fetcher: impl Fn(Vec<file_path::WhereParam>) -> FilePathDBFetcherFut,
@ -138,11 +143,11 @@ where
ToRemoveDbFetcherFut:
Future<Output = Result<Vec<file_path_pub_and_cas_ids::Data>, IndexerError>>,
{
let root = root.as_ref();
let current_dir = current_dir.as_ref();
let mut to_walk = VecDeque::with_capacity(TO_WALK_QUEUE_INITIAL_CAPACITY);
to_walk.push_back(ToWalkEntry {
path: root.to_path_buf(),
path: current_dir.to_path_buf(),
parent_dir_accepted_by_its_children: None,
maybe_parent: None,
});
@ -156,7 +161,8 @@ where
let last_indexed_count = indexed_paths.len();
let (entry_size, current_to_remove) = inner_walk_single_dir(
root,
library_root.as_ref(),
current_dir,
&entry,
indexer_rules,
&to_remove_db_fetcher,
@ -199,6 +205,7 @@ where
}
pub(super) async fn keep_walking<FilePathDBFetcherFut, ToRemoveDbFetcherFut>(
location_path: impl AsRef<Path>,
to_walk_entry: &ToWalkEntry,
indexer_rules: &[IndexerRule],
mut update_notifier: impl FnMut(&Path, usize),
@ -227,6 +234,7 @@ where
let mut errors = vec![];
let (to_walk_entry_size, to_remove) = inner_walk_single_dir(
location_path,
to_walk_entry.path.clone(),
to_walk_entry,
indexer_rules,
@ -265,7 +273,8 @@ where
}
pub(super) async fn walk_single_dir<FilePathDBFetcherFut, ToRemoveDbFetcherFut>(
root: impl AsRef<Path>,
location_path: impl AsRef<Path>,
current_dir: impl AsRef<Path>,
indexer_rules: &[IndexerRule],
file_paths_db_fetcher: impl Fn(Vec<file_path::WhereParam>) -> FilePathDBFetcherFut,
to_remove_db_fetcher: impl Fn(
@ -289,18 +298,18 @@ where
ToRemoveDbFetcherFut:
Future<Output = Result<Vec<file_path_pub_and_cas_ids::Data>, IndexerError>>,
{
let root = root.as_ref();
let current_directory = current_dir.as_ref();
let mut indexed_paths = HashSet::with_capacity(WALK_SINGLE_DIR_PATHS_BUFFER_INITIAL_CAPACITY);
if add_root {
let metadata = fs::metadata(root)
let metadata = fs::metadata(current_directory)
.await
.map_err(|e| FileIOError::from((root, e)))?;
.map_err(|e| FileIOError::from((current_directory, e)))?;
indexed_paths.insert(WalkingEntry {
iso_file_path: iso_file_path_factory(root, true)?,
maybe_metadata: Some(FilePathMetadata::from_path(root, &metadata)?),
iso_file_path: iso_file_path_factory(current_directory, true)?,
maybe_metadata: Some(FilePathMetadata::from_path(current_directory, &metadata)?),
});
}
@ -308,9 +317,10 @@ where
let mut errors = vec![];
let (root_size, to_remove) = inner_walk_single_dir(
root,
location_path,
current_directory,
&ToWalkEntry {
path: root.to_path_buf(),
path: current_directory.to_path_buf(),
parent_dir_accepted_by_its_children: None,
maybe_parent: None,
},
@ -430,7 +440,8 @@ struct WorkingTable<'a> {
}
async fn inner_walk_single_dir<ToRemoveDbFetcherFut>(
root: impl AsRef<Path>,
library_root: impl AsRef<Path>,
current_dir: impl AsRef<Path>,
ToWalkEntry {
path,
parent_dir_accepted_by_its_children,
@ -465,7 +476,17 @@ where
return (0, vec![]);
};
let root = root.as_ref();
let mut rules = indexer_rules.to_owned();
if rules.iter().any(|rule| GITIGNORE.deref() == rule) {
if let Some(pat) =
GitIgnoreRules::get_rules_if_in_git_repo(library_root.as_ref(), path).await
{
rules.extend(pat.into_iter().map(Into::into));
}
}
let current_dir = current_dir.as_ref();
// Just to make sure...
paths_buffer.clear();
@ -496,7 +517,7 @@ where
accept_by_children_dir
);
let Ok(rules_per_kind) = IndexerRule::apply_all(indexer_rules, &current_path)
let Ok(rules_per_kind) = IndexerRule::apply_all(&rules, &current_path)
.await
.map_err(|e| errors.push(e.into()))
else {
@ -515,6 +536,12 @@ where
continue 'entries;
}
if let Some(f) = rules_per_kind.get(&RuleKind::IgnoredByGit) {
if f.iter().any(|s| !s) {
continue 'entries;
}
}
let Ok(metadata) = entry
.metadata()
.await
@ -562,6 +589,14 @@ where
}
}
// Then we check if there's a git ignore rule for it
if let Some(accept) = rules_per_kind.get(&RuleKind::IgnoredByGit) {
if !accept.iter().any(|&r| r) {
trace!(dir=?current_path, "ignoring files because of git ignore");
continue 'entries;
}
}
// Then we mark this directory the be walked in too
if let Some(ref mut to_walk) = maybe_to_walk {
to_walk.push_back(ToWalkEntry {
@ -606,7 +641,7 @@ where
for ancestor in current_path
.ancestors()
.skip(1) // Skip the current directory as it was already indexed
.take_while(|&ancestor| ancestor != root)
.take_while(|&ancestor| ancestor != current_dir)
{
let Ok(iso_file_path) =
iso_file_path_factory(ancestor, true).map_err(|e| errors.push(e))
@ -821,7 +856,8 @@ mod tests {
let walk_result = walk(
root_path.to_path_buf(),
&[],
root_path.to_path_buf(),
&mut [],
|_, _| {},
|_| async { Ok(vec![]) },
|_, _| async { Ok(vec![]) },
@ -872,7 +908,7 @@ mod tests {
.into_iter()
.collect::<HashSet<_>>();
let only_photos_rule = &[new_indexer_rule(
let mut only_photos_rule = vec![new_indexer_rule(
"only photos".to_string(),
false,
vec![RulePerKind::AcceptFilesByGlob(
@ -886,7 +922,8 @@ mod tests {
let walk_result = walk(
root_path.to_path_buf(),
only_photos_rule,
root_path.to_path_buf(),
&mut only_photos_rule,
|_, _| {},
|_| async { Ok(vec![]) },
|_, _| async { Ok(vec![]) },
@ -934,7 +971,7 @@ mod tests {
WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project/Cargo.toml"), false), metadata },
WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project/src"), true), metadata },
WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project/src/main.rs"), false), metadata },
WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project/target"), true), metadata },
WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project/target/"), true), metadata },
WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project/target/debug"), true), metadata },
WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project/target/debug/main"), false), metadata },
WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("inner"), true), metadata },
@ -950,7 +987,7 @@ mod tests {
.into_iter()
.collect::<HashSet<_>>();
let git_repos = &[new_indexer_rule(
let mut git_repos = vec![new_indexer_rule(
"git repos".to_string(),
false,
vec![RulePerKind::AcceptIfChildrenDirectoriesArePresent(
@ -960,7 +997,8 @@ mod tests {
let walk_result = walk(
root_path.to_path_buf(),
git_repos,
root_path.to_path_buf(),
&mut git_repos,
|_, _| {},
|_| async { Ok(vec![]) },
|_, _| async { Ok(vec![]) },
@ -979,7 +1017,9 @@ mod tests {
let actual = walk_result.walked.collect::<HashSet<_>>();
if actual != expected {
panic!("difference: {:#?}", expected.difference(&actual));
let not_found = expected.difference(&actual);
let not_expected = actual.difference(&expected);
panic!("difference:\nexpected, but not found: {not_found:#?}\nfound, but not expected:{not_expected:#?}");
}
}
@ -1018,7 +1058,7 @@ mod tests {
.into_iter()
.collect::<HashSet<_>>();
let git_repos_no_deps_no_build_dirs = &[
let mut git_repos_no_deps_no_build_dirs = vec![
new_indexer_rule(
"git repos".to_string(),
false,
@ -1052,7 +1092,8 @@ mod tests {
let walk_result = walk(
root_path.to_path_buf(),
git_repos_no_deps_no_build_dirs,
root_path.to_path_buf(),
&mut git_repos_no_deps_no_build_dirs,
|_, _| {},
|_| async { Ok(vec![]) },
|_, _| async { Ok(vec![]) },
@ -1071,7 +1112,9 @@ mod tests {
let actual = walk_result.walked.collect::<HashSet<_>>();
if actual != expected {
panic!("difference: {:#?}", expected.difference(&actual));
let not_found = expected.difference(&actual);
let not_expected = actual.difference(&expected);
panic!("difference:\nexpected, but not found: {not_found:#?}\nfound, but not expected:{not_expected:#?}");
}
}
}

View file

@ -10,7 +10,7 @@ use crate::{
use sd_core_file_path_helper::{path_is_hidden, MetadataExt};
use sd_core_indexer_rules::{
seed::{no_hidden, no_os_protected},
seed::{NO_HIDDEN, NO_SYSTEM_FILES},
IndexerRule, RuleKind,
};
@ -21,6 +21,7 @@ use sd_utils::{chain_optional_iter, error::FileIOError};
use std::{
collections::HashMap,
io::ErrorKind,
ops::Deref,
path::{Path, PathBuf},
sync::Arc,
};
@ -123,8 +124,8 @@ pub async fn walk(
let task = tokio::spawn(async move {
let path = &path;
let rules = chain_optional_iter(
[IndexerRule::from(no_os_protected())],
[(!with_hidden_files).then(|| IndexerRule::from(no_hidden()))],
[IndexerRule::from(NO_SYSTEM_FILES.deref())],
[(!with_hidden_files).then(|| IndexerRule::from(NO_HIDDEN.deref()))],
);
let mut thumbnails_to_generate = vec![];

View file

@ -204,9 +204,9 @@ impl QuicTransport {
addr.set_port(
TcpListener::bind(addr)
.await
.map_err(|e| QuicTransportError::ListenerSetup(e))?
.map_err(QuicTransportError::ListenerSetup)?
.local_addr()
.map_err(|e| QuicTransportError::ListenerSetup(e))?
.map_err(QuicTransportError::ListenerSetup)?
.port(),
);
}
@ -230,8 +230,8 @@ impl QuicTransport {
.map_err(|e| QuicTransportError::SendChannelClosed(e.to_string()))?;
rx.await
.map_err(|e| QuicTransportError::ReceiveChannelClosed(e))
.and_then(|r| r.map_err(|e| QuicTransportError::InternalEvent(e)))
.map_err(QuicTransportError::ReceiveChannelClosed)
.and_then(|r| r.map_err(QuicTransportError::InternalEvent))
}
pub async fn shutdown(self) {

View file

@ -21,7 +21,8 @@ const ruleKinds: UnionToTuple<RuleKind> = [
'AcceptFilesByGlob',
'RejectFilesByGlob',
'AcceptIfChildrenDirectoriesArePresent',
'RejectIfChildrenDirectoriesArePresent'
'RejectIfChildrenDirectoriesArePresent',
'IgnoredByGit'
];
const ruleKindEnum = z.enum(ruleKinds);

View file

@ -571,7 +571,7 @@ export type Resolution = { width: number; height: number }
export type Response = { Start: { user_code: string; verification_url: string; verification_url_complete: string } } | "Complete" | { Error: string }
export type RuleKind = "AcceptFilesByGlob" | "RejectFilesByGlob" | "AcceptIfChildrenDirectoriesArePresent" | "RejectIfChildrenDirectoriesArePresent"
export type RuleKind = "AcceptFilesByGlob" | "RejectFilesByGlob" | "AcceptIfChildrenDirectoriesArePresent" | "RejectIfChildrenDirectoriesArePresent" | "IgnoredByGit"
export type SavedSearch = { id: number; pub_id: number[]; target: string | null; search: string | null; filters: string | null; name: string | null; icon: string | null; description: string | null; date_created: string | null; date_modified: string | null }