[ENG-286] Set default indexer rules for location (#690)

* Implement multiple Glob matches in the same indexer rule
 - Replace matching logic to use GlobSet instead of simple Glob
 - Add `No OS protected` indexer rule to avoid indexing OS protected files
 - Enable `No OS protected` indexer rule by default

* Rust fmt

* Reduce `No OS protected` rule Glob list by using OR matches

* Add globs for android and ios files in `No OS protected` rule

* Add some more unix special path to be ignored

* Add a new property to IndexerRule to enable it by default when adding a new location:
- Modify the Prisma schema to add the default property to the IndexerRule model.
- Adjust the IndexerRule struct's create and save logic to consider the new property.
- Adjust AddLocationDialog to properly load the default indexer rules from the backend.
- Minorly improve IndexerRuleEditor to avoid adding duplicate entries to its assigned form field.
- Add editorconfig entries for SQL and Prisma types.

* rust fmt

* Add Windows users special folders and files to `No OS protected` rule

* Construct `No OS protected` globs from string vec
 - Don't repeat windows globs that require both a root and C: version

* Apply review feedback and add error handling for `IndexerRuleEditor`
- Revert manual changes made to init migration
- Create external migration that adds a `default` prop in `IndexerRule` model
- Remove redundant `useMemo`
- Replace destructuring of `useQuery`
- Provide feedback to user when a error occurs while querying indexer rules

* useMemo only for `indexerRulesIds` and not the whole `defaultValues` object
 - Remove logic to accpet unix root path for windows indexer `No OS protected` rule
This commit is contained in:
Vítor Vasconcellos 2023-04-18 06:25:57 +00:00 committed by GitHub
parent 8ea6a1c329
commit 80fe8f98f1
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 244 additions and 73 deletions

View file

@ -54,3 +54,15 @@ indent_size = 4
indent_size = 4
insert_final_newline = false
trim_trailing_whitespace = true
# SQL
# https://www.sqlstyle.guide/
[*.sql]
indent_size = 4
indent_style = space
# Prisma
# https://www.prisma.io/docs/reference/tools-and-interfaces/prisma-schema/data-model#formatting
[*.prisma]
indent_size = 4
indent_style = space

View file

@ -0,0 +1,16 @@
-- RedefineTables
PRAGMA foreign_keys=OFF;
CREATE TABLE "new_indexer_rule" (
"id" INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,
"kind" INTEGER NOT NULL,
"name" TEXT NOT NULL,
"default" BOOLEAN NOT NULL DEFAULT false,
"parameters" BLOB NOT NULL,
"date_created" DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
"date_modified" DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP
);
INSERT INTO "new_indexer_rule" ("date_created", "date_modified", "id", "kind", "name", "parameters") SELECT "date_created", "date_modified", "id", "kind", "name", "parameters" FROM "indexer_rule";
DROP TABLE "indexer_rule";
ALTER TABLE "new_indexer_rule" RENAME TO "indexer_rule";
PRAGMA foreign_key_check;
PRAGMA foreign_keys=ON;

View file

@ -422,6 +422,7 @@ model IndexerRule {
id Int @id @default(autoincrement())
kind Int
name String
default Boolean @default(false)
parameters Bytes
date_created DateTime @default(now())
date_modified DateTime @default(now())

View file

@ -5,7 +5,7 @@ use crate::{
};
use chrono::{DateTime, Utc};
use globset::Glob;
use globset::{Glob, GlobSet, GlobSetBuilder};
use int_enum::IntEnum;
use rmp_serde;
use rspc::Type;
@ -32,7 +32,10 @@ impl IndexerRuleCreateArgs {
pub async fn create(self, library: &Library) -> Result<indexer_rule::Data, IndexerError> {
let parameters = match self.kind {
RuleKind::AcceptFilesByGlob | RuleKind::RejectFilesByGlob => rmp_serde::to_vec(
&Glob::new(&serde_json::from_slice::<String>(&self.parameters)?)?,
&serde_json::from_slice::<Vec<String>>(&self.parameters)?
.into_iter()
.map(|s| Glob::new(&s))
.collect::<Result<Vec<Glob>, _>>()?,
)?,
RuleKind::AcceptIfChildrenDirectoriesArePresent
@ -70,8 +73,11 @@ pub enum RuleKind {
/// first we change the data structure to a vector, then we serialize it.
#[derive(Debug)]
pub enum ParametersPerKind {
AcceptFilesByGlob(Glob),
RejectFilesByGlob(Glob),
// TODO: Add an indexer rule that filter files based on their extended attributes
// https://learn.microsoft.com/en-us/windows/win32/fileio/file-attribute-constants
// https://en.wikipedia.org/wiki/Extended_file_attributes
AcceptFilesByGlob(Vec<Glob>),
RejectFilesByGlob(Vec<Glob>),
AcceptIfChildrenDirectoriesArePresent(HashSet<String>),
RejectIfChildrenDirectoriesArePresent(HashSet<String>),
}
@ -109,17 +115,19 @@ pub struct IndexerRule {
pub id: Option<i32>,
pub kind: RuleKind,
pub name: String,
pub default: bool,
pub parameters: ParametersPerKind,
pub date_created: DateTime<Utc>,
pub date_modified: DateTime<Utc>,
}
impl IndexerRule {
pub fn new(kind: RuleKind, name: String, parameters: ParametersPerKind) -> Self {
pub fn new(kind: RuleKind, name: String, default: bool, parameters: ParametersPerKind) -> Self {
Self {
id: None,
kind,
name,
default,
parameters,
date_created: Utc::now(),
date_modified: Utc::now(),
@ -140,7 +148,7 @@ impl IndexerRule {
self.kind as i32,
self.name,
self.parameters.serialize()?,
vec![],
vec![indexer_rule::default::set(self.default)],
),
vec![indexer_rule::date_modified::set(Utc::now().into())],
)
@ -153,7 +161,7 @@ impl IndexerRule {
self.kind as i32,
self.name,
self.parameters.serialize()?,
vec![],
vec![indexer_rule::default::set(self.default)],
)
.exec()
.await?;
@ -173,6 +181,7 @@ impl TryFrom<&indexer_rule::Data> for IndexerRule {
id: Some(data.id),
kind,
name: data.name.clone(),
default: data.default,
parameters: match kind {
RuleKind::AcceptFilesByGlob | RuleKind::RejectFilesByGlob => {
let glob_str = rmp_serde::from_slice(&data.parameters)?;
@ -208,12 +217,24 @@ impl TryFrom<indexer_rule::Data> for IndexerRule {
}
}
fn accept_by_glob(source: impl AsRef<Path>, glob: &Glob) -> Result<bool, IndexerError> {
Ok(glob.compile_matcher().is_match(source.as_ref()))
// TODO: memoize this
fn globset_from_globs(globs: &[Glob]) -> Result<GlobSet, globset::Error> {
globs
.iter()
.fold(&mut GlobSetBuilder::new(), |builder, glob| {
builder.add(glob.to_owned())
})
.build()
}
fn reject_by_glob(source: impl AsRef<Path>, reject_glob: &Glob) -> Result<bool, IndexerError> {
Ok(!reject_glob.compile_matcher().is_match(source.as_ref()))
fn accept_by_glob(source: impl AsRef<Path>, globs: &[Glob]) -> Result<bool, IndexerError> {
globset_from_globs(globs)
.map(|glob_set| glob_set.is_match(source.as_ref()))
.map_err(IndexerError::GlobBuilderError)
}
fn reject_by_glob(source: impl AsRef<Path>, reject_globs: &[Glob]) -> Result<bool, IndexerError> {
accept_by_glob(source.as_ref(), reject_globs).map(|accept| !accept)
}
async fn accept_dir_for_its_children(
@ -267,7 +288,8 @@ mod tests {
let rule = IndexerRule::new(
RuleKind::RejectFilesByGlob,
"ignore hidden files".to_string(),
ParametersPerKind::RejectFilesByGlob(Glob::new("**/.*").unwrap()),
false,
ParametersPerKind::RejectFilesByGlob(vec![Glob::new("**/.*").unwrap()]),
);
assert!(!rule.apply(hidden).await.unwrap());
assert!(rule.apply(normal).await.unwrap());
@ -286,7 +308,10 @@ mod tests {
let rule = IndexerRule::new(
RuleKind::RejectFilesByGlob,
"ignore build directory".to_string(),
ParametersPerKind::RejectFilesByGlob(Glob::new("{**/target/*,**/target}").unwrap()),
false,
ParametersPerKind::RejectFilesByGlob(vec![
Glob::new("{**/target/*,**/target}").unwrap()
]),
);
assert!(rule.apply(project_file).await.unwrap());
@ -309,7 +334,8 @@ mod tests {
let rule = IndexerRule::new(
RuleKind::AcceptFilesByGlob,
"only photos".to_string(),
ParametersPerKind::AcceptFilesByGlob(Glob::new("*.{jpg,png,jpeg}").unwrap()),
false,
ParametersPerKind::AcceptFilesByGlob(vec![Glob::new("*.{jpg,png,jpeg}").unwrap()]),
);
assert!(!rule.apply(text).await.unwrap());
assert!(rule.apply(png).await.unwrap());
@ -344,6 +370,7 @@ mod tests {
let rule = IndexerRule::new(
RuleKind::AcceptIfChildrenDirectoriesArePresent,
"git projects".to_string(),
false,
ParametersPerKind::AcceptIfChildrenDirectoriesArePresent(childrens),
);
@ -373,6 +400,7 @@ mod tests {
let rule = IndexerRule::new(
RuleKind::RejectIfChildrenDirectoriesArePresent,
"git projects".to_string(),
false,
ParametersPerKind::RejectIfChildrenDirectoriesArePresent(childrens),
);

View file

@ -557,7 +557,10 @@ mod tests {
vec![IndexerRule::new(
RuleKind::AcceptFilesByGlob,
"only photos".to_string(),
ParametersPerKind::AcceptFilesByGlob(Glob::new("{*.png,*.jpg,*.jpeg}").unwrap()),
false,
ParametersPerKind::AcceptFilesByGlob(vec![
Glob::new("{*.png,*.jpg,*.jpeg}").unwrap()
]),
)],
)]
.into_iter()
@ -615,6 +618,7 @@ mod tests {
vec![IndexerRule::new(
RuleKind::AcceptIfChildrenDirectoriesArePresent,
"git repos".to_string(),
false,
ParametersPerKind::AcceptIfChildrenDirectoriesArePresent(
[".git".to_string()].into_iter().collect(),
),
@ -670,6 +674,7 @@ mod tests {
vec![IndexerRule::new(
RuleKind::AcceptIfChildrenDirectoriesArePresent,
"git repos".to_string(),
false,
ParametersPerKind::AcceptIfChildrenDirectoriesArePresent(
[".git".to_string()].into_iter().collect(),
),
@ -681,16 +686,20 @@ mod tests {
IndexerRule::new(
RuleKind::RejectFilesByGlob,
"reject node_modules".to_string(),
ParametersPerKind::RejectFilesByGlob(
Glob::new("{**/node_modules/*,**/node_modules}").unwrap(),
),
false,
ParametersPerKind::RejectFilesByGlob(vec![Glob::new(
"{**/node_modules/*,**/node_modules}",
)
.unwrap()]),
),
IndexerRule::new(
RuleKind::RejectFilesByGlob,
"reject rust build dir".to_string(),
ParametersPerKind::RejectFilesByGlob(
Glob::new("{**/target/*,**/target}").unwrap(),
),
false,
ParametersPerKind::RejectFilesByGlob(vec![Glob::new(
"{**/target/*,**/target}",
)
.unwrap()]),
),
],
),

View file

@ -21,14 +21,111 @@ pub async fn indexer_rules_seeder(client: &PrismaClient) -> Result<(), SeederErr
for rule in [
IndexerRule::new(
RuleKind::RejectFilesByGlob,
"Reject Hidden Files".to_string(),
ParametersPerKind::RejectFilesByGlob(
Glob::new("**/.*").map_err(IndexerError::GlobBuilderError)?,
),
// TODO: On windows, beside the listed files, any file with the FILE_ATTRIBUTE_SYSTEM should be considered a system file
// https://learn.microsoft.com/en-us/windows/win32/fileio/file-attribute-constants#FILE_ATTRIBUTE_SYSTEM
"No OS protected".to_string(),
true,
ParametersPerKind::RejectFilesByGlob([
vec![
"**/.spacedrive",
],
// Globset, even on Windows, requires the use of / as a separator
// https://github.com/github/gitignore/blob/main/Global/Windows.gitignore
#[cfg(target_os = "windows")]
vec![
// Windows thumbnail cache files
"**/{Thumbs.db,Thumbs.db:encryptable,ehthumbs.db,ehthumbs_vista.db}",
// Dump file
"**/*.stackdump",
// Folder config file
"**/[Dd]esktop.ini",
// Recycle Bin used on file shares
"**/$RECYCLE.BIN",
// Chkdsk recovery directory
"**/FOUND.[0-9][0-9][0-9]",
// User special files
"C:/Users/*/NTUSER.DAT*",
"C:/Users/*/ntuser.dat*",
"C:/Users/*/{ntuser.ini,ntuser.dat,NTUSER.DAT}",
// User special folders (most of these the user dont even have permission to access)
"C:/Users/*/{Cookies,AppData,NetHood,Recent,PrintHood,SendTo,Templates,Start Menu,Application Data,Local Settings}",
// System special folders
"C:/{$Recycle.Bin,$WinREAgent,Documents and Settings,Program Files,Program Files (x86),ProgramData,Recovery,PerfLogs,Windows,Windows.old}",
// NTFS internal dir, can exists on any drive
"[A-Z]:/System Volume Information",
// System special files
"C:/{config,pagefile,hiberfil}.sys",
// Windows can create a swapfile on any drive
"[A-Z]:/swapfile.sys",
"C:/DumpStack.log.tmp",
],
// https://github.com/github/gitignore/blob/main/Global/macOS.gitignore
// https://developer.apple.com/library/archive/documentation/FileManagement/Conceptual/FileSystemProgrammingGuide/FileSystemOverview/FileSystemOverview.html#//apple_ref/doc/uid/TP40010672-CH2-SW14
#[cfg(any(target_os = "ios", target_os = "macos"))]
vec![
"**/.{DS_Store,AppleDouble,LSOverride}",
// Icon must end with two \r
"**/Icon\r\r",
// Thumbnails
"**/._*",
],
#[cfg(target_os = "macos")]
vec![
"/{System,Network,Library,Applications}",
"/Users/*/{Library,Applications}",
// Files that might appear in the root of a volume
"**/.{DocumentRevisions-V100,fseventsd,Spotlight-V100,TemporaryItems,Trashes,VolumeIcon.icns,com.apple.timemachine.donotpresent}",
// Directories potentially created on remote AFP share
"**/.{AppleDB,AppleDesktop,apdisk}",
"**/{Network Trash Folder,Temporary Items}",
],
// https://github.com/github/gitignore/blob/main/Global/Linux.gitignore
#[cfg(target_os = "linux")]
vec![
"**/*~",
// temporary files which can be created if a process still has a handle open of a deleted file
"**/.fuse_hidden*",
// KDE directory preferences
"**/.directory",
// Linux trash folder which might appear on any partition or disk
"**/.Trash-*",
// .nfs files are created when an open file is removed but is still being accessed
"**/.nfs*",
],
#[cfg(target_os = "android")]
vec![
"**/.nomedia",
"**/.thumbnails",
],
// https://en.wikipedia.org/wiki/Unix_filesystem#Conventional_directory_layout
// https://en.wikipedia.org/wiki/Filesystem_Hierarchy_Standard
#[cfg(target_family = "unix")]
vec![
// Directories containing unix memory/device mapped files/dirs
"/{dev,sys,proc}",
// Directories containing special files for current running programs
"/{run,var,boot}",
// ext2-4 recovery directory
"**/lost+found",
],
]
.into_iter()
.flatten()
.map(Glob::new)
.collect::<Result<Vec<Glob>, _>>().map_err(IndexerError::GlobBuilderError)?),
),
IndexerRule::new(
RuleKind::RejectFilesByGlob,
"No Hidden".to_string(),
true,
ParametersPerKind::RejectFilesByGlob(vec![
Glob::new("**/.*").map_err(IndexerError::GlobBuilderError)?
]),
),
IndexerRule::new(
RuleKind::AcceptIfChildrenDirectoriesArePresent,
"Git Repositories".into(),
"Only Git Repositories".into(),
false,
ParametersPerKind::AcceptIfChildrenDirectoriesArePresent(
[".git".to_string()].into_iter().collect(),
),
@ -36,10 +133,11 @@ pub async fn indexer_rules_seeder(client: &PrismaClient) -> Result<(), SeederErr
IndexerRule::new(
RuleKind::AcceptFilesByGlob,
"Only Images".to_string(),
ParametersPerKind::AcceptFilesByGlob(
Glob::new("*.{jpg,png,jpeg,gif,webp}")
.map_err(IndexerError::GlobBuilderError)?,
),
false,
ParametersPerKind::AcceptFilesByGlob(vec![Glob::new(
"*.{avif,bmp,gif,ico,jpeg,jpg,png,svg,tif,tiff,webp}",
)
.map_err(IndexerError::GlobBuilderError)?]),
),
] {
rule.save(client).await?;

View file

@ -1,9 +1,9 @@
import { ErrorMessage } from '@hookform/error-message';
import { RSPCError } from '@rspc/client';
import { useQueryClient } from '@tanstack/react-query';
import { useEffect, useState } from 'react';
import { useEffect, useMemo, useState } from 'react';
import { Controller } from 'react-hook-form';
import { useLibraryMutation } from '@sd/client';
import { useLibraryMutation, useLibraryQuery } from '@sd/client';
import { Dialog, UseDialogProps, useDialog } from '@sd/ui';
import { Input, useZodForm, z } from '@sd/ui/src/forms';
import { showAlertDialog } from '~/components/AlertDialog';
@ -41,25 +41,31 @@ export const openDirectoryPickerDialog = async (platform: Platform): Promise<nul
return path;
};
export const AddLocationDialog = (props: Props) => {
const dialog = useDialog(props);
export const AddLocationDialog = ({ path, ...dialogProps }: Props) => {
const dialog = useDialog(dialogProps);
const platform = usePlatform();
const queryClient = useQueryClient();
const createLocation = useLibraryMutation('locations.create');
const relinkLocation = useLibraryMutation('locations.relink');
const listIndexerRules = useLibraryQuery(['locations.indexer_rules.list']);
const addLocationToLibrary = useLibraryMutation('locations.addLibrary');
const [remoteError, setRemoteError] = useState<null | RemoteErrorFormMessage>(null);
const form = useZodForm({
schema,
defaultValues: {
path: props.path,
indexerRulesIds: []
}
});
// This is required because indexRules is undefined on first render
const indexerRulesIds = useMemo(
() => listIndexerRules.data?.filter((rule) => rule.default).map((rule) => rule.id) ?? [],
[listIndexerRules.data]
);
const form = useZodForm({ schema, defaultValues: { path, indexerRulesIds } });
useEffect(() => {
// Clear custom remote error when user performs any change on the form
// Update form values when default value changes and the user hasn't made any changes
if (!form.formState.isDirty) form.reset({ path, indexerRulesIds });
}, [form, path, indexerRulesIds]);
useEffect(() => {
// TODO: Instead of clearing the error on every change, we should just validate with backend again
const subscription = form.watch(() => {
form.clearErrors(REMOTE_ERROR_FORM_FIELD);
setRemoteError(null);

View file

@ -22,38 +22,39 @@ export function IndexerRuleEditor<T extends FieldType>({
field,
editable
}: IndexerRuleEditorProps<T>) {
const listIndexerRules = useLibraryQuery(['locations.indexer_rules.list'], {});
const listIndexerRules = useLibraryQuery(['locations.indexer_rules.list']);
const indexRules = listIndexerRules.data;
return (
<Card className="mb-2 flex flex-wrap justify-evenly">
{listIndexerRules.data
? listIndexerRules.data.map((rule) => {
const { id, name } = rule;
const enabled = field.value.includes(id);
return (
<Button
key={id}
size="sm"
onClick={() =>
field.onChange(
enabled
? field.value.filter(
(fieldValue) => fieldValue !== rule.id
)
: [...field.value, rule.id]
)
}
variant={enabled ? 'colored' : 'outline'}
className={clsx(
'm-1 flex-auto',
enabled && 'border-accent bg-accent'
)}
>
{name}
</Button>
);
})
: editable || <p>No indexer rules available</p>}
{indexRules ? (
indexRules.map((rule) => {
const { id, name } = rule;
const enabled = field.value.includes(id);
return (
<Button
key={id}
size="sm"
onClick={() =>
field.onChange(
enabled
? field.value.filter((fieldValue) => fieldValue !== rule.id)
: Array.from(new Set([...field.value, rule.id]))
)
}
variant={enabled ? 'colored' : 'outline'}
className={clsx('m-1 flex-auto', enabled && 'border-accent bg-accent')}
>
{name}
</Button>
);
})
) : (
<p className={clsx(listIndexerRules.isError && 'text-red-500')}>
{listIndexerRules.isError
? 'Error while retriving indexer rules'
: 'No indexer rules available'}
</p>
)}
{/* {editable && (
<Button
size="icon"

View file

@ -147,7 +147,7 @@ export type HashingAlgorithm = { name: "Argon2id", params: Params } | { name: "B
export type IdentifyUniqueFilesArgs = { id: number, path: string }
export type IndexerRule = { id: number, kind: number, name: string, parameters: number[], date_created: string, date_modified: string }
export type IndexerRule = { id: number, kind: number, name: string, default: boolean, parameters: number[], date_created: string, date_modified: string }
/**
* `IndexerRuleCreateArgs` is the argument received from the client using rspc to create a new indexer rule.