From 80fe8f98f1051412c87b376f61043fc9227a252a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?V=C3=ADtor=20Vasconcellos?= Date: Tue, 18 Apr 2023 06:25:57 +0000 Subject: [PATCH] [ENG-286] Set default indexer rules for location (#690) * Implement multiple Glob matches in the same indexer rule - Replace matching logic to use GlobSet instead of simple Glob - Add `No OS protected` indexer rule to avoid indexing OS protected files - Enable `No OS protected` indexer rule by default * Rust fmt * Reduce `No OS protected` rule Glob list by using OR matches * Add globs for android and ios files in `No OS protected` rule * Add some more unix special path to be ignored * Add a new property to IndexerRule to enable it by default when adding a new location: - Modify the Prisma schema to add the default property to the IndexerRule model. - Adjust the IndexerRule struct's create and save logic to consider the new property. - Adjust AddLocationDialog to properly load the default indexer rules from the backend. - Minorly improve IndexerRuleEditor to avoid adding duplicate entries to its assigned form field. - Add editorconfig entries for SQL and Prisma types. * rust fmt * Add Windows users special folders and files to `No OS protected` rule * Construct `No OS protected` globs from string vec - Don't repeat windows globs that require both a root and C: version * Apply review feedback and add error handling for `IndexerRuleEditor` - Revert manual changes made to init migration - Create external migration that adds a `default` prop in `IndexerRule` model - Remove redundant `useMemo` - Replace destructuring of `useQuery` - Provide feedback to user when a error occurs while querying indexer rules * useMemo only for `indexerRulesIds` and not the whole `defaultValues` object - Remove logic to accpet unix root path for windows indexer `No OS protected` rule --- .editorconfig | 12 ++ .../migration.sql | 16 +++ core/prisma/schema.prisma | 1 + core/src/location/indexer/rules.rs | 56 ++++++--- core/src/location/indexer/walk.rs | 23 ++-- core/src/util/seeder.rs | 116 ++++++++++++++++-- .../library/locations/AddLocationDialog.tsx | 30 +++-- .../library/locations/IndexerRuleEditor.tsx | 61 ++++----- packages/client/src/core.ts | 2 +- 9 files changed, 244 insertions(+), 73 deletions(-) create mode 100644 core/prisma/migrations/20230417192552_add_default_param_to_indexerrule/migration.sql diff --git a/.editorconfig b/.editorconfig index eaa6573cf..d0089e19f 100644 --- a/.editorconfig +++ b/.editorconfig @@ -54,3 +54,15 @@ indent_size = 4 indent_size = 4 insert_final_newline = false trim_trailing_whitespace = true + +# SQL +# https://www.sqlstyle.guide/ +[*.sql] +indent_size = 4 +indent_style = space + +# Prisma +# https://www.prisma.io/docs/reference/tools-and-interfaces/prisma-schema/data-model#formatting +[*.prisma] +indent_size = 4 +indent_style = space diff --git a/core/prisma/migrations/20230417192552_add_default_param_to_indexerrule/migration.sql b/core/prisma/migrations/20230417192552_add_default_param_to_indexerrule/migration.sql new file mode 100644 index 000000000..65f8e2783 --- /dev/null +++ b/core/prisma/migrations/20230417192552_add_default_param_to_indexerrule/migration.sql @@ -0,0 +1,16 @@ +-- RedefineTables +PRAGMA foreign_keys=OFF; +CREATE TABLE "new_indexer_rule" ( + "id" INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT, + "kind" INTEGER NOT NULL, + "name" TEXT NOT NULL, + "default" BOOLEAN NOT NULL DEFAULT false, + "parameters" BLOB NOT NULL, + "date_created" DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP, + "date_modified" DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP +); +INSERT INTO "new_indexer_rule" ("date_created", "date_modified", "id", "kind", "name", "parameters") SELECT "date_created", "date_modified", "id", "kind", "name", "parameters" FROM "indexer_rule"; +DROP TABLE "indexer_rule"; +ALTER TABLE "new_indexer_rule" RENAME TO "indexer_rule"; +PRAGMA foreign_key_check; +PRAGMA foreign_keys=ON; diff --git a/core/prisma/schema.prisma b/core/prisma/schema.prisma index 58d692df0..e8d9889ab 100644 --- a/core/prisma/schema.prisma +++ b/core/prisma/schema.prisma @@ -422,6 +422,7 @@ model IndexerRule { id Int @id @default(autoincrement()) kind Int name String + default Boolean @default(false) parameters Bytes date_created DateTime @default(now()) date_modified DateTime @default(now()) diff --git a/core/src/location/indexer/rules.rs b/core/src/location/indexer/rules.rs index c8c0b5e38..3789ed936 100644 --- a/core/src/location/indexer/rules.rs +++ b/core/src/location/indexer/rules.rs @@ -5,7 +5,7 @@ use crate::{ }; use chrono::{DateTime, Utc}; -use globset::Glob; +use globset::{Glob, GlobSet, GlobSetBuilder}; use int_enum::IntEnum; use rmp_serde; use rspc::Type; @@ -32,7 +32,10 @@ impl IndexerRuleCreateArgs { pub async fn create(self, library: &Library) -> Result { let parameters = match self.kind { RuleKind::AcceptFilesByGlob | RuleKind::RejectFilesByGlob => rmp_serde::to_vec( - &Glob::new(&serde_json::from_slice::(&self.parameters)?)?, + &serde_json::from_slice::>(&self.parameters)? + .into_iter() + .map(|s| Glob::new(&s)) + .collect::, _>>()?, )?, RuleKind::AcceptIfChildrenDirectoriesArePresent @@ -70,8 +73,11 @@ pub enum RuleKind { /// first we change the data structure to a vector, then we serialize it. #[derive(Debug)] pub enum ParametersPerKind { - AcceptFilesByGlob(Glob), - RejectFilesByGlob(Glob), + // TODO: Add an indexer rule that filter files based on their extended attributes + // https://learn.microsoft.com/en-us/windows/win32/fileio/file-attribute-constants + // https://en.wikipedia.org/wiki/Extended_file_attributes + AcceptFilesByGlob(Vec), + RejectFilesByGlob(Vec), AcceptIfChildrenDirectoriesArePresent(HashSet), RejectIfChildrenDirectoriesArePresent(HashSet), } @@ -109,17 +115,19 @@ pub struct IndexerRule { pub id: Option, pub kind: RuleKind, pub name: String, + pub default: bool, pub parameters: ParametersPerKind, pub date_created: DateTime, pub date_modified: DateTime, } impl IndexerRule { - pub fn new(kind: RuleKind, name: String, parameters: ParametersPerKind) -> Self { + pub fn new(kind: RuleKind, name: String, default: bool, parameters: ParametersPerKind) -> Self { Self { id: None, kind, name, + default, parameters, date_created: Utc::now(), date_modified: Utc::now(), @@ -140,7 +148,7 @@ impl IndexerRule { self.kind as i32, self.name, self.parameters.serialize()?, - vec![], + vec![indexer_rule::default::set(self.default)], ), vec![indexer_rule::date_modified::set(Utc::now().into())], ) @@ -153,7 +161,7 @@ impl IndexerRule { self.kind as i32, self.name, self.parameters.serialize()?, - vec![], + vec![indexer_rule::default::set(self.default)], ) .exec() .await?; @@ -173,6 +181,7 @@ impl TryFrom<&indexer_rule::Data> for IndexerRule { id: Some(data.id), kind, name: data.name.clone(), + default: data.default, parameters: match kind { RuleKind::AcceptFilesByGlob | RuleKind::RejectFilesByGlob => { let glob_str = rmp_serde::from_slice(&data.parameters)?; @@ -208,12 +217,24 @@ impl TryFrom for IndexerRule { } } -fn accept_by_glob(source: impl AsRef, glob: &Glob) -> Result { - Ok(glob.compile_matcher().is_match(source.as_ref())) +// TODO: memoize this +fn globset_from_globs(globs: &[Glob]) -> Result { + globs + .iter() + .fold(&mut GlobSetBuilder::new(), |builder, glob| { + builder.add(glob.to_owned()) + }) + .build() } -fn reject_by_glob(source: impl AsRef, reject_glob: &Glob) -> Result { - Ok(!reject_glob.compile_matcher().is_match(source.as_ref())) +fn accept_by_glob(source: impl AsRef, globs: &[Glob]) -> Result { + globset_from_globs(globs) + .map(|glob_set| glob_set.is_match(source.as_ref())) + .map_err(IndexerError::GlobBuilderError) +} + +fn reject_by_glob(source: impl AsRef, reject_globs: &[Glob]) -> Result { + accept_by_glob(source.as_ref(), reject_globs).map(|accept| !accept) } async fn accept_dir_for_its_children( @@ -267,7 +288,8 @@ mod tests { let rule = IndexerRule::new( RuleKind::RejectFilesByGlob, "ignore hidden files".to_string(), - ParametersPerKind::RejectFilesByGlob(Glob::new("**/.*").unwrap()), + false, + ParametersPerKind::RejectFilesByGlob(vec![Glob::new("**/.*").unwrap()]), ); assert!(!rule.apply(hidden).await.unwrap()); assert!(rule.apply(normal).await.unwrap()); @@ -286,7 +308,10 @@ mod tests { let rule = IndexerRule::new( RuleKind::RejectFilesByGlob, "ignore build directory".to_string(), - ParametersPerKind::RejectFilesByGlob(Glob::new("{**/target/*,**/target}").unwrap()), + false, + ParametersPerKind::RejectFilesByGlob(vec![ + Glob::new("{**/target/*,**/target}").unwrap() + ]), ); assert!(rule.apply(project_file).await.unwrap()); @@ -309,7 +334,8 @@ mod tests { let rule = IndexerRule::new( RuleKind::AcceptFilesByGlob, "only photos".to_string(), - ParametersPerKind::AcceptFilesByGlob(Glob::new("*.{jpg,png,jpeg}").unwrap()), + false, + ParametersPerKind::AcceptFilesByGlob(vec![Glob::new("*.{jpg,png,jpeg}").unwrap()]), ); assert!(!rule.apply(text).await.unwrap()); assert!(rule.apply(png).await.unwrap()); @@ -344,6 +370,7 @@ mod tests { let rule = IndexerRule::new( RuleKind::AcceptIfChildrenDirectoriesArePresent, "git projects".to_string(), + false, ParametersPerKind::AcceptIfChildrenDirectoriesArePresent(childrens), ); @@ -373,6 +400,7 @@ mod tests { let rule = IndexerRule::new( RuleKind::RejectIfChildrenDirectoriesArePresent, "git projects".to_string(), + false, ParametersPerKind::RejectIfChildrenDirectoriesArePresent(childrens), ); diff --git a/core/src/location/indexer/walk.rs b/core/src/location/indexer/walk.rs index 831dce7b6..eeb45c05d 100644 --- a/core/src/location/indexer/walk.rs +++ b/core/src/location/indexer/walk.rs @@ -557,7 +557,10 @@ mod tests { vec![IndexerRule::new( RuleKind::AcceptFilesByGlob, "only photos".to_string(), - ParametersPerKind::AcceptFilesByGlob(Glob::new("{*.png,*.jpg,*.jpeg}").unwrap()), + false, + ParametersPerKind::AcceptFilesByGlob(vec![ + Glob::new("{*.png,*.jpg,*.jpeg}").unwrap() + ]), )], )] .into_iter() @@ -615,6 +618,7 @@ mod tests { vec![IndexerRule::new( RuleKind::AcceptIfChildrenDirectoriesArePresent, "git repos".to_string(), + false, ParametersPerKind::AcceptIfChildrenDirectoriesArePresent( [".git".to_string()].into_iter().collect(), ), @@ -670,6 +674,7 @@ mod tests { vec![IndexerRule::new( RuleKind::AcceptIfChildrenDirectoriesArePresent, "git repos".to_string(), + false, ParametersPerKind::AcceptIfChildrenDirectoriesArePresent( [".git".to_string()].into_iter().collect(), ), @@ -681,16 +686,20 @@ mod tests { IndexerRule::new( RuleKind::RejectFilesByGlob, "reject node_modules".to_string(), - ParametersPerKind::RejectFilesByGlob( - Glob::new("{**/node_modules/*,**/node_modules}").unwrap(), - ), + false, + ParametersPerKind::RejectFilesByGlob(vec![Glob::new( + "{**/node_modules/*,**/node_modules}", + ) + .unwrap()]), ), IndexerRule::new( RuleKind::RejectFilesByGlob, "reject rust build dir".to_string(), - ParametersPerKind::RejectFilesByGlob( - Glob::new("{**/target/*,**/target}").unwrap(), - ), + false, + ParametersPerKind::RejectFilesByGlob(vec![Glob::new( + "{**/target/*,**/target}", + ) + .unwrap()]), ), ], ), diff --git a/core/src/util/seeder.rs b/core/src/util/seeder.rs index 81bd6c92c..7542cc415 100644 --- a/core/src/util/seeder.rs +++ b/core/src/util/seeder.rs @@ -21,14 +21,111 @@ pub async fn indexer_rules_seeder(client: &PrismaClient) -> Result<(), SeederErr for rule in [ IndexerRule::new( RuleKind::RejectFilesByGlob, - "Reject Hidden Files".to_string(), - ParametersPerKind::RejectFilesByGlob( - Glob::new("**/.*").map_err(IndexerError::GlobBuilderError)?, - ), + // TODO: On windows, beside the listed files, any file with the FILE_ATTRIBUTE_SYSTEM should be considered a system file + // https://learn.microsoft.com/en-us/windows/win32/fileio/file-attribute-constants#FILE_ATTRIBUTE_SYSTEM + "No OS protected".to_string(), + true, + ParametersPerKind::RejectFilesByGlob([ + vec![ + "**/.spacedrive", + ], + // Globset, even on Windows, requires the use of / as a separator + // https://github.com/github/gitignore/blob/main/Global/Windows.gitignore + #[cfg(target_os = "windows")] + vec![ + // Windows thumbnail cache files + "**/{Thumbs.db,Thumbs.db:encryptable,ehthumbs.db,ehthumbs_vista.db}", + // Dump file + "**/*.stackdump", + // Folder config file + "**/[Dd]esktop.ini", + // Recycle Bin used on file shares + "**/$RECYCLE.BIN", + // Chkdsk recovery directory + "**/FOUND.[0-9][0-9][0-9]", + // User special files + "C:/Users/*/NTUSER.DAT*", + "C:/Users/*/ntuser.dat*", + "C:/Users/*/{ntuser.ini,ntuser.dat,NTUSER.DAT}", + // User special folders (most of these the user dont even have permission to access) + "C:/Users/*/{Cookies,AppData,NetHood,Recent,PrintHood,SendTo,Templates,Start Menu,Application Data,Local Settings}", + // System special folders + "C:/{$Recycle.Bin,$WinREAgent,Documents and Settings,Program Files,Program Files (x86),ProgramData,Recovery,PerfLogs,Windows,Windows.old}", + // NTFS internal dir, can exists on any drive + "[A-Z]:/System Volume Information", + // System special files + "C:/{config,pagefile,hiberfil}.sys", + // Windows can create a swapfile on any drive + "[A-Z]:/swapfile.sys", + "C:/DumpStack.log.tmp", + ], + // https://github.com/github/gitignore/blob/main/Global/macOS.gitignore + // https://developer.apple.com/library/archive/documentation/FileManagement/Conceptual/FileSystemProgrammingGuide/FileSystemOverview/FileSystemOverview.html#//apple_ref/doc/uid/TP40010672-CH2-SW14 + #[cfg(any(target_os = "ios", target_os = "macos"))] + vec![ + "**/.{DS_Store,AppleDouble,LSOverride}", + // Icon must end with two \r + "**/Icon\r\r", + // Thumbnails + "**/._*", + ], + #[cfg(target_os = "macos")] + vec![ + "/{System,Network,Library,Applications}", + "/Users/*/{Library,Applications}", + // Files that might appear in the root of a volume + "**/.{DocumentRevisions-V100,fseventsd,Spotlight-V100,TemporaryItems,Trashes,VolumeIcon.icns,com.apple.timemachine.donotpresent}", + // Directories potentially created on remote AFP share + "**/.{AppleDB,AppleDesktop,apdisk}", + "**/{Network Trash Folder,Temporary Items}", + ], + // https://github.com/github/gitignore/blob/main/Global/Linux.gitignore + #[cfg(target_os = "linux")] + vec![ + "**/*~", + // temporary files which can be created if a process still has a handle open of a deleted file + "**/.fuse_hidden*", + // KDE directory preferences + "**/.directory", + // Linux trash folder which might appear on any partition or disk + "**/.Trash-*", + // .nfs files are created when an open file is removed but is still being accessed + "**/.nfs*", + ], + #[cfg(target_os = "android")] + vec![ + "**/.nomedia", + "**/.thumbnails", + ], + // https://en.wikipedia.org/wiki/Unix_filesystem#Conventional_directory_layout + // https://en.wikipedia.org/wiki/Filesystem_Hierarchy_Standard + #[cfg(target_family = "unix")] + vec![ + // Directories containing unix memory/device mapped files/dirs + "/{dev,sys,proc}", + // Directories containing special files for current running programs + "/{run,var,boot}", + // ext2-4 recovery directory + "**/lost+found", + ], + ] + .into_iter() + .flatten() + .map(Glob::new) + .collect::, _>>().map_err(IndexerError::GlobBuilderError)?), + ), + IndexerRule::new( + RuleKind::RejectFilesByGlob, + "No Hidden".to_string(), + true, + ParametersPerKind::RejectFilesByGlob(vec![ + Glob::new("**/.*").map_err(IndexerError::GlobBuilderError)? + ]), ), IndexerRule::new( RuleKind::AcceptIfChildrenDirectoriesArePresent, - "Git Repositories".into(), + "Only Git Repositories".into(), + false, ParametersPerKind::AcceptIfChildrenDirectoriesArePresent( [".git".to_string()].into_iter().collect(), ), @@ -36,10 +133,11 @@ pub async fn indexer_rules_seeder(client: &PrismaClient) -> Result<(), SeederErr IndexerRule::new( RuleKind::AcceptFilesByGlob, "Only Images".to_string(), - ParametersPerKind::AcceptFilesByGlob( - Glob::new("*.{jpg,png,jpeg,gif,webp}") - .map_err(IndexerError::GlobBuilderError)?, - ), + false, + ParametersPerKind::AcceptFilesByGlob(vec![Glob::new( + "*.{avif,bmp,gif,ico,jpeg,jpg,png,svg,tif,tiff,webp}", + ) + .map_err(IndexerError::GlobBuilderError)?]), ), ] { rule.save(client).await?; diff --git a/interface/app/$libraryId/settings/library/locations/AddLocationDialog.tsx b/interface/app/$libraryId/settings/library/locations/AddLocationDialog.tsx index fdde0c0fc..57011f5c7 100644 --- a/interface/app/$libraryId/settings/library/locations/AddLocationDialog.tsx +++ b/interface/app/$libraryId/settings/library/locations/AddLocationDialog.tsx @@ -1,9 +1,9 @@ import { ErrorMessage } from '@hookform/error-message'; import { RSPCError } from '@rspc/client'; import { useQueryClient } from '@tanstack/react-query'; -import { useEffect, useState } from 'react'; +import { useEffect, useMemo, useState } from 'react'; import { Controller } from 'react-hook-form'; -import { useLibraryMutation } from '@sd/client'; +import { useLibraryMutation, useLibraryQuery } from '@sd/client'; import { Dialog, UseDialogProps, useDialog } from '@sd/ui'; import { Input, useZodForm, z } from '@sd/ui/src/forms'; import { showAlertDialog } from '~/components/AlertDialog'; @@ -41,25 +41,31 @@ export const openDirectoryPickerDialog = async (platform: Platform): Promise { - const dialog = useDialog(props); +export const AddLocationDialog = ({ path, ...dialogProps }: Props) => { + const dialog = useDialog(dialogProps); const platform = usePlatform(); const queryClient = useQueryClient(); const createLocation = useLibraryMutation('locations.create'); const relinkLocation = useLibraryMutation('locations.relink'); + const listIndexerRules = useLibraryQuery(['locations.indexer_rules.list']); const addLocationToLibrary = useLibraryMutation('locations.addLibrary'); const [remoteError, setRemoteError] = useState(null); - const form = useZodForm({ - schema, - defaultValues: { - path: props.path, - indexerRulesIds: [] - } - }); + // This is required because indexRules is undefined on first render + const indexerRulesIds = useMemo( + () => listIndexerRules.data?.filter((rule) => rule.default).map((rule) => rule.id) ?? [], + [listIndexerRules.data] + ); + + const form = useZodForm({ schema, defaultValues: { path, indexerRulesIds } }); useEffect(() => { - // Clear custom remote error when user performs any change on the form + // Update form values when default value changes and the user hasn't made any changes + if (!form.formState.isDirty) form.reset({ path, indexerRulesIds }); + }, [form, path, indexerRulesIds]); + + useEffect(() => { + // TODO: Instead of clearing the error on every change, we should just validate with backend again const subscription = form.watch(() => { form.clearErrors(REMOTE_ERROR_FORM_FIELD); setRemoteError(null); diff --git a/interface/app/$libraryId/settings/library/locations/IndexerRuleEditor.tsx b/interface/app/$libraryId/settings/library/locations/IndexerRuleEditor.tsx index 7ff2cf6b4..3ee193f1e 100644 --- a/interface/app/$libraryId/settings/library/locations/IndexerRuleEditor.tsx +++ b/interface/app/$libraryId/settings/library/locations/IndexerRuleEditor.tsx @@ -22,38 +22,39 @@ export function IndexerRuleEditor({ field, editable }: IndexerRuleEditorProps) { - const listIndexerRules = useLibraryQuery(['locations.indexer_rules.list'], {}); - + const listIndexerRules = useLibraryQuery(['locations.indexer_rules.list']); + const indexRules = listIndexerRules.data; return ( - {listIndexerRules.data - ? listIndexerRules.data.map((rule) => { - const { id, name } = rule; - const enabled = field.value.includes(id); - return ( - - ); - }) - : editable ||

No indexer rules available

} + {indexRules ? ( + indexRules.map((rule) => { + const { id, name } = rule; + const enabled = field.value.includes(id); + return ( + + ); + }) + ) : ( +

+ {listIndexerRules.isError + ? 'Error while retriving indexer rules' + : 'No indexer rules available'} +

+ )} {/* {editable && (