diff --git a/.gitignore b/.gitignore index aaa9db995..2b5dd4587 100644 --- a/.gitignore +++ b/.gitignore @@ -85,3 +85,4 @@ spacedrive vite.config.ts.* /test-data +/config.json diff --git a/Cargo.lock b/Cargo.lock index aa7f99917..babbb9b8d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -961,7 +961,7 @@ dependencies = [ "serde_path_to_error", "serde_urlencoded", "sha1", - "sync_wrapper", + "sync_wrapper 0.1.2", "tokio", "tokio-tungstenite", "tower", @@ -986,6 +986,18 @@ dependencies = [ "tower-service", ] +[[package]] +name = "backon" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c491fa80d69c03084223a4e73c378dd9f9a1e612eb54051213f88b2d5249b458" +dependencies = [ + "fastrand 2.0.1", + "futures-core", + "pin-project", + "tokio", +] + [[package]] name = "backtrace" version = "0.3.69" @@ -1272,7 +1284,7 @@ version = "0.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2cb03d1bed155d89dce0f845b7899b18a9a163e148fd004e1c28421a783e2d8e" dependencies = [ - "block-padding", + "block-padding 0.2.1", "cipher 0.3.0", ] @@ -1282,6 +1294,15 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8d696c370c750c948ada61c69a0ee2cbbb9c50b1019ddb86d9317157a99c2cae" +[[package]] +name = "block-padding" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8894febbff9f758034a5b8e12d87918f56dfc64a8e1fe757d65e29041538d93" +dependencies = [ + "generic-array 0.14.7", +] + [[package]] name = "block-sys" version = "0.2.1" @@ -1510,6 +1531,15 @@ dependencies = [ "rustversion", ] +[[package]] +name = "cbc" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26b52a9543ae338f279b96b0b9fed9c8093744685043739079ce85cd58f289a6" +dependencies = [ + "cipher 0.4.4", +] + [[package]] name = "cc" version = "1.0.83" @@ -1843,6 +1873,26 @@ version = "0.9.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c2459377285ad874054d797f3ccebf984978aa39129f6eafde5cdc8315b612f8" +[[package]] +name = "const-random" +version = "0.1.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87e00182fe74b066627d63b85fd550ac2998d4b0bd86bfed477a0ae4c7c71359" +dependencies = [ + "const-random-macro", +] + +[[package]] +name = "const-random-macro" +version = "0.1.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f9d839f2a20b0aee515dc581a6172f2321f96cab76c1a38a4c584a194955390e" +dependencies = [ + "getrandom 0.2.12", + "once_cell", + "tiny-keccak", +] + [[package]] name = "constant_time_eq" version = "0.3.0" @@ -2320,6 +2370,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fffa369a668c8af7dbf8b5e56c9f744fbd399949ed171606040001947de40b1c" dependencies = [ "const-oid", + "pem-rfc7468", "zeroize", ] @@ -2397,6 +2448,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" dependencies = [ "block-buffer 0.10.4", + "const-oid", "crypto-common 0.1.6", "subtle", ] @@ -2491,6 +2543,15 @@ dependencies = [ "syn 2.0.48", ] +[[package]] +name = "dlv-list" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "442039f5147480ba31067cb00ada1adae6892028e40e45fc5de7b7df6dcc1b5f" +dependencies = [ + "const-random", +] + [[package]] name = "dmmf" version = "0.1.0" @@ -2874,6 +2935,12 @@ version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "86d4de0081402f5e88cdac65c8dcdcc73118c1a7a465e2a05f0da05843a8ea33" +[[package]] +name = "flagset" +version = "0.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cdeb3aa5e95cf9aabc17f060cfa0ced7b83f042390760ca53bf09df9968acaa1" + [[package]] name = "flate2" version = "1.0.28" @@ -4118,6 +4185,7 @@ version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a0c10553d664a4d0bcff9f4215d0aac67a639cc68ef660840afe309b807bc9f5" dependencies = [ + "block-padding 0.3.3", "generic-array 0.14.7", ] @@ -4354,6 +4422,21 @@ dependencies = [ "serde_json", ] +[[package]] +name = "jsonwebtoken" +version = "9.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9ae10193d25051e74945f1ea2d0b42e03cc3b890f7e4cc5faa44997d808193f" +dependencies = [ + "base64 0.21.7", + "js-sys", + "pem", + "ring 0.17.7", + "serde", + "serde_json", + "simple_asn1", +] + [[package]] name = "kamadak-exif" version = "0.5.5" @@ -4429,6 +4512,9 @@ name = "lazy_static" version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" +dependencies = [ + "spin 0.5.2", +] [[package]] name = "lazycell" @@ -5308,7 +5394,7 @@ dependencies = [ "metrics 0.19.0", "metrics-util 0.13.0", "parking_lot 0.11.2", - "quanta", + "quanta 0.9.3", "thiserror", "tokio", "tracing", @@ -5341,7 +5427,7 @@ dependencies = [ "num_cpus", "ordered-float", "parking_lot 0.11.2", - "quanta", + "quanta 0.9.3", "radix_trie", "sketches-ddsketch", ] @@ -5359,7 +5445,7 @@ dependencies = [ "metrics 0.19.0", "num_cpus", "parking_lot 0.11.2", - "quanta", + "quanta 0.9.3", "sketches-ddsketch", ] @@ -5447,6 +5533,30 @@ dependencies = [ "tracing-subscriber", ] +[[package]] +name = "moka" +version = "0.12.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1911e88d5831f748a4097a43862d129e3c6fca831eecac9b8db6d01d93c9de2" +dependencies = [ + "async-lock 2.8.0", + "async-trait", + "crossbeam-channel", + "crossbeam-epoch", + "crossbeam-utils", + "futures-util", + "once_cell", + "parking_lot 0.12.1", + "quanta 0.12.3", + "rustc_version", + "skeptic", + "smallvec 1.13.1", + "tagptr", + "thiserror", + "triomphe", + "uuid", +] + [[package]] name = "multiaddr" version = "0.18.1" @@ -5807,6 +5917,23 @@ dependencies = [ "num-traits", ] +[[package]] +name = "num-bigint-dig" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc84195820f291c7697304f3cbdadd1cb7199c0efc917ff5eafd71225c136151" +dependencies = [ + "byteorder", + "lazy_static", + "libm", + "num-integer", + "num-iter", + "num-traits", + "rand 0.8.5", + "smallvec 1.13.1", + "zeroize", +] + [[package]] name = "num-complex" version = "0.4.4" @@ -5997,6 +6124,37 @@ dependencies = [ "windows-sys 0.42.0", ] +[[package]] +name = "opendal" +version = "0.45.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52c17c077f23fa2d2c25d9d22af98baa43b8bbe2ef0de80cf66339aa70401467" +dependencies = [ + "anyhow", + "async-trait", + "backon", + "base64 0.21.7", + "bytes", + "chrono", + "flagset", + "futures", + "getrandom 0.2.12", + "http", + "log", + "md-5", + "moka", + "once_cell", + "percent-encoding", + "quick-xml", + "reqsign", + "reqwest", + "serde", + "serde_json", + "sha2 0.10.8", + "tokio", + "uuid", +] + [[package]] name = "opener" version = "0.6.1" @@ -6101,6 +6259,16 @@ dependencies = [ "num-traits", ] +[[package]] +name = "ordered-multimap" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49203cdcae0030493bad186b28da2fa25645fa276a51b6fec8010d281e02ef79" +dependencies = [ + "dlv-list", + "hashbrown 0.14.3", +] + [[package]] name = "ordered-stream" version = "0.2.0" @@ -6333,6 +6501,16 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8835116a5c179084a830efb3adc117ab007512b535bc1a21c991d3b32a6b44dd" +[[package]] +name = "pbkdf2" +version = "0.12.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8ed6a7761f76e3b9f92dfb0a60a6a6477c61024b775147ff0973a02653abaf2" +dependencies = [ + "digest 0.10.7", + "hmac", +] + [[package]] name = "pdfium-render" version = "0.8.16" @@ -6376,6 +6554,15 @@ dependencies = [ "serde", ] +[[package]] +name = "pem-rfc7468" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88b39c9bfcfc231068454382784bb460aae594343fb030d46e9f50a645418412" +dependencies = [ + "base64ct", +] + [[package]] name = "percent-encoding" version = "2.3.1" @@ -6642,6 +6829,32 @@ version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ad78bf43dcf80e8f950c92b84f938a0fc7590b7f6866fbcbeca781609c115590" +[[package]] +name = "pkcs1" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8ffb9f10fa047879315e6625af03c164b16962a5368d724ed16323b68ace47f" +dependencies = [ + "der 0.7.8", + "pkcs8 0.10.2", + "spki 0.7.3", +] + +[[package]] +name = "pkcs5" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e847e2c91a18bfa887dd028ec33f2fe6f25db77db3619024764914affe8b69a6" +dependencies = [ + "aes 0.8.3", + "cbc", + "der 0.7.8", + "pbkdf2", + "scrypt", + "sha2 0.10.8", + "spki 0.7.3", +] + [[package]] name = "pkcs8" version = "0.9.0" @@ -6659,6 +6872,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f950b2377845cebe5cf8b5165cb3cc1a5e0fa5cfa3e1f7f55707d8fd82e0a7b7" dependencies = [ "der 0.7.8", + "pkcs5", + "rand_core 0.6.4", "spki 0.7.3", ] @@ -7115,12 +7330,27 @@ dependencies = [ "libc", "mach", "once_cell", - "raw-cpuid", + "raw-cpuid 10.7.0", "wasi 0.10.2+wasi-snapshot-preview1", "web-sys", "winapi", ] +[[package]] +name = "quanta" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e5167a477619228a0b284fac2674e3c388cba90631d7b7de620e6f1fcd08da5" +dependencies = [ + "crossbeam-utils", + "libc", + "once_cell", + "raw-cpuid 11.0.1", + "wasi 0.11.0+wasi-snapshot-preview1", + "web-sys", + "winapi", +] + [[package]] name = "query-connector" version = "0.1.0" @@ -7228,6 +7458,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1004a344b30a54e2ee58d66a71b32d2db2feb0a31f9a2d302bf0536f15de2a33" dependencies = [ "memchr", + "serde", ] [[package]] @@ -7453,6 +7684,15 @@ dependencies = [ "bitflags 1.3.2", ] +[[package]] +name = "raw-cpuid" +version = "11.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d86a7c4638d42c44551f4791a20e687dbb4c3de1f33c43dd71e355cd429def1" +dependencies = [ + "bitflags 2.4.1", +] + [[package]] name = "raw-window-handle" version = "0.5.2" @@ -7597,6 +7837,37 @@ dependencies = [ "winapi", ] +[[package]] +name = "reqsign" +version = "0.14.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43e319d9de9ff4d941abf4ac718897118b0fe04577ea3f8e0f5788971784eef5" +dependencies = [ + "anyhow", + "async-trait", + "base64 0.21.7", + "chrono", + "form_urlencoded", + "getrandom 0.2.12", + "hex", + "hmac", + "home", + "http", + "jsonwebtoken", + "log", + "once_cell", + "percent-encoding", + "quick-xml", + "rand 0.8.5", + "reqwest", + "rsa", + "rust-ini", + "serde", + "serde_json", + "sha1", + "sha2 0.10.8", +] + [[package]] name = "request-handlers" version = "0.1.0" @@ -7636,6 +7907,7 @@ dependencies = [ "http", "http-body", "hyper", + "hyper-rustls", "hyper-tls", "ipnet", "js-sys", @@ -7645,12 +7917,16 @@ dependencies = [ "once_cell", "percent-encoding", "pin-project-lite", + "rustls", + "rustls-native-certs", + "rustls-pemfile", "serde", "serde_json", "serde_urlencoded", "system-configuration", "tokio", "tokio-native-tls", + "tokio-rustls", "tokio-util", "tower-service", "url", @@ -7810,6 +8086,27 @@ version = "0.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3cd14fd5e3b777a7422cca79358c57a8f6e3a703d9ac187448d0daf220c2407f" +[[package]] +name = "rsa" +version = "0.9.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d0e5124fcb30e76a7e79bfee683a2746db83784b86289f6251b54b7950a0dfc" +dependencies = [ + "const-oid", + "digest 0.10.7", + "num-bigint-dig", + "num-integer", + "num-traits", + "pkcs1", + "pkcs8 0.10.2", + "rand_core 0.6.4", + "sha2 0.10.8", + "signature 2.2.0", + "spki 0.7.3", + "subtle", + "zeroize", +] + [[package]] name = "rspc" version = "0.1.4" @@ -7861,6 +8158,16 @@ dependencies = [ "smallvec 1.13.1", ] +[[package]] +name = "rust-ini" +version = "0.20.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3e0698206bcb8882bf2a9ecb4c1e7785db57ff052297085a6efd4fe42302068a" +dependencies = [ + "cfg-if", + "ordered-multimap", +] + [[package]] name = "rustc-demangle" version = "0.1.23" @@ -8005,6 +8312,15 @@ version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ef703b7cb59335eae2eb93ceb664c0eb7ea6bf567079d843e09420219668e072" +[[package]] +name = "salsa20" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97a22f5af31f73a954c10289c93e8a50cc23d971e80ee446f1f6f7137a088213" +dependencies = [ + "cipher 0.4.4", +] + [[package]] name = "same-file" version = "1.0.6" @@ -8096,6 +8412,17 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" +[[package]] +name = "scrypt" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0516a385866c09368f0b5bcd1caff3366aace790fcd46e2bb032697bb172fd1f" +dependencies = [ + "pbkdf2", + "salsa20", + "sha2 0.10.8", +] + [[package]] name = "sct" version = "0.7.1" @@ -8222,6 +8549,7 @@ dependencies = [ "normpath", "notify", "once_cell", + "opendal", "openssl", "openssl-sys", "pin-project-lite", @@ -8246,6 +8574,7 @@ dependencies = [ "sd-ffmpeg", "sd-file-ext", "sd-images", + "sd-indexer", "sd-media-metadata", "sd-p2p", "sd-p2p-block", @@ -8264,6 +8593,7 @@ dependencies = [ "static_assertions", "strum", "strum_macros", + "sync_wrapper 1.0.1", "sysinfo", "tar", "tempfile", @@ -8537,6 +8867,30 @@ dependencies = [ "tracing", ] +[[package]] +name = "sd-indexer" +version = "0.0.1" +dependencies = [ + "chrono", + "futures-util", + "globset", + "normpath", + "opendal", + "rmp-serde", + "rspc", + "sd-core-file-path-helper", + "sd-core-indexer-rules", + "sd-file-ext", + "sd-prisma", + "sd-utils", + "serde", + "specta", + "tempfile", + "thiserror", + "tokio", + "tracing", +] + [[package]] name = "sd-media-metadata" version = "0.0.0" @@ -8605,7 +8959,7 @@ dependencies = [ "specta", "stable-vec", "streamunordered", - "sync_wrapper", + "sync_wrapper 0.1.2", "thiserror", "tokio", "tokio-stream", @@ -9135,6 +9489,7 @@ version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "77549399552de45a898a580c1b41d445bf730df867cc44e6c0233bbc4b8329de" dependencies = [ + "digest 0.10.7", "rand_core 0.6.4", ] @@ -9144,6 +9499,18 @@ version = "0.3.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d66dc143e6b11c1eddc06d5c423cfc97062865baf299914ab64caa38182078fe" +[[package]] +name = "simple_asn1" +version = "0.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "adc4e5204eb1910f40f9cfa375f6f05b68c3abac4b6fd879c8ff5e7ae8a0a085" +dependencies = [ + "num-bigint", + "num-traits", + "thiserror", + "time", +] + [[package]] name = "simplecss" version = "0.2.1" @@ -9622,6 +9989,15 @@ version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2047c6ded9c721764247e62cd3b03c09ffc529b2ba5b10ec482ae507a4a70160" +[[package]] +name = "sync_wrapper" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7065abeca94b6a8a577f9bd45aa0867a2238b74e8eb67cf10d492bc39351394" +dependencies = [ + "futures-core", +] + [[package]] name = "synstructure" version = "0.12.6" @@ -10153,6 +10529,15 @@ dependencies = [ "time-core", ] +[[package]] +name = "tiny-keccak" +version = "2.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c9d3793400a45f954c52e73d068316d76b6f4e36977e3fcebb13a2721e80237" +dependencies = [ + "crunchy", +] + [[package]] name = "tiny-skia" version = "0.11.4" diff --git a/core/Cargo.toml b/core/Cargo.toml index fce732095..d8f1ec1d6 100644 --- a/core/Cargo.toml +++ b/core/Cargo.toml @@ -32,15 +32,15 @@ sd-ai = { path = "../crates/ai", optional = true } sd-cache = { path = "../crates/cache" } sd-cloud-api = { version = "0.1.0", path = "../crates/cloud-api" } sd-crypto = { path = "../crates/crypto", features = [ - "sys", - "tokio", + "sys", + "tokio", ], optional = true } sd-ffmpeg = { path = "../crates/ffmpeg", optional = true } sd-file-ext = { path = "../crates/file-ext" } sd-images = { path = "../crates/images", features = [ - "rspc", - "serde", - "specta", + "rspc", + "serde", + "specta", ] } sd-media-metadata = { path = "../crates/media-metadata" } sd-p2p = { path = "../crates/p2p", features = ["specta"] } @@ -50,6 +50,7 @@ sd-p2p-tunnel = { path = "../crates/p2p-tunnel" } sd-prisma = { path = "../crates/prisma" } sd-sync = { path = "../crates/sync" } sd-utils = { path = "../crates/utils" } +sd-indexer = { path = "../crates/sd-indexer" } # Workspace dependencies async-channel = { workspace = true } @@ -71,12 +72,12 @@ reqwest = { workspace = true, features = ["json", "native-tls-vendored"] } rmp-serde = { workspace = true } rmpv = { workspace = true } rspc = { workspace = true, features = [ - "axum", - "uuid", - "chrono", - "tracing", - "alpha", - "unstable", + "axum", + "uuid", + "chrono", + "tracing", + "alpha", + "unstable", ] } serde = { workspace = true, features = ["derive"] } serde_json = { workspace = true } @@ -86,12 +87,12 @@ strum_macros = { workspace = true } tempfile = { workspace = true } thiserror = { workspace = true } tokio = { workspace = true, features = [ - "sync", - "rt-multi-thread", - "io-util", - "macros", - "time", - "process", + "sync", + "rt-multi-thread", + "io-util", + "macros", + "time", + "process", ] } tokio-stream = { workspace = true, features = ["fs"] } tokio-util = { workspace = true, features = ["io"] } @@ -101,7 +102,6 @@ tracing-subscriber = { workspace = true, features = ["env-filter"] } uuid = { workspace = true, features = ["v4", "serde"] } webp = { workspace = true } - # Specific Core dependencies async-recursion = "1.0.5" async-stream = "0.3.5" @@ -121,7 +121,7 @@ int-enum = "0.5.0" libc = "0.2.153" mini-moka = "0.10.2" notify = { git = "https://github.com/notify-rs/notify.git", rev = "c3929ed114fbb0bc7457a9a498260461596b00ca", default-features = false, features = [ - "macos_fsevent", + "macos_fsevent", ] } rmp = "0.8.12" serde-hashkey = "0.4.5" @@ -132,6 +132,12 @@ static_assertions = "1.1.0" sysinfo = "0.29.10" tar = "0.4.40" tower-service = "0.3.2" +opendal = { version = "0.45.1", features = [ + "services-gdrive", + "services-s3", + "services-fs", +] } +sync_wrapper = { version = "1.0.1", features = ["futures"] } trash = "4.1.0" # Override features of transitive dependencies @@ -148,10 +154,10 @@ plist = "1" [target.'cfg(target_os = "ios")'.dependencies] icrate = { version = "0.1.0", features = [ - "Foundation", - "Foundation_NSFileManager", - "Foundation_NSString", - "Foundation_NSNumber", + "Foundation", + "Foundation_NSFileManager", + "Foundation_NSString", + "Foundation_NSNumber", ] } [dev-dependencies] diff --git a/core/src/api/locations.rs b/core/src/api/locations.rs index c88f0da0d..cdd30762d 100644 --- a/core/src/api/locations.rs +++ b/core/src/api/locations.rs @@ -2,8 +2,8 @@ use crate::{ invalidate_query, location::{ delete_location, find_location, indexer::OldIndexerJobInit, light_scan_location, - non_indexed::NonIndexedPathItem, relink_location, scan_location, scan_location_sub_path, - LocationCreateArgs, LocationError, LocationUpdateArgs, ScanState, + relink_location, scan_location, scan_location_sub_path, LocationCreateArgs, LocationError, + LocationUpdateArgs, ScanState, }, object::old_file_identifier::old_file_identifier_job::OldFileIdentifierJobInit, old_job::StatefulJob, @@ -17,6 +17,7 @@ use sd_core_prisma_helpers::{ }; use sd_cache::{CacheNode, Model, Normalise, NormalisedResult, NormalisedResults, Reference}; +use sd_indexer::NonIndexedPathItem; use sd_prisma::prisma::{file_path, indexer_rule, indexer_rules_in_location, location, SortOrder}; use std::path::{Path, PathBuf}; diff --git a/core/src/api/search/mod.rs b/core/src/api/search/mod.rs index b34576308..56f59fa59 100644 --- a/core/src/api/search/mod.rs +++ b/core/src/api/search/mod.rs @@ -1,24 +1,34 @@ +use std::{collections::HashMap, path::PathBuf}; + use crate::{ api::{locations::ExplorerItem, utils::library}, library::Library, - location::{non_indexed, LocationError}, - object::media::old_thumbnail::get_indexed_thumb_key, + location::LocationError, + object::{ + cas::generate_cas_id, + media::old_thumbnail::{ + get_ephemeral_thumb_key, get_indexed_thumb_key, BatchToProcess, GenerateThumbnailArgs, + }, + }, util::{unsafe_streamed_query, BatchedStream}, }; -use sd_core_prisma_helpers::{file_path_with_object, object_with_file_paths}; +use opendal::{services::Fs, Operator}; use sd_cache::{CacheNode, Model, Normalise, Reference}; -use sd_prisma::prisma::{self, PrismaClient}; - -use std::path::PathBuf; +use sd_core_indexer_rules::seed::{no_hidden, no_os_protected}; +use sd_core_indexer_rules::IndexerRule; +use sd_core_prisma_helpers::{file_path_with_object, object_with_file_paths}; +use sd_file_ext::kind::ObjectKind; +use sd_prisma::prisma::{self, location, PrismaClient}; +use sd_utils::chain_optional_iter; use async_stream::stream; use futures::StreamExt; -use itertools::Either; use rspc::{alpha::AlphaRouter, ErrorCode}; use serde::{Deserialize, Serialize}; use specta::Type; +use tracing::{error, warn}; pub mod file_path; pub mod media_data; @@ -85,87 +95,160 @@ impl SearchFilterArgs { pub fn mount() -> AlphaRouter { R.router() .procedure("ephemeralPaths", { - #[derive(Serialize, Deserialize, Type, Debug, Clone)] - #[serde(rename_all = "camelCase", tag = "field", content = "value")] - enum EphemeralPathOrder { - Name(SortOrder), - SizeInBytes(SortOrder), - DateCreated(SortOrder), - DateModified(SortOrder), + #[derive(Deserialize, Type, Debug, PartialEq, Eq)] + #[serde(rename_all = "camelCase")] + enum PathFrom { + Path, + // TODO: FTP + S3 + GDrive } #[derive(Deserialize, Type, Debug)] #[serde(rename_all = "camelCase")] struct EphemeralPathSearchArgs { - path: PathBuf, + from: PathFrom, + path: String, with_hidden_files: bool, - #[specta(optional)] - order: Option, } + #[derive(Serialize, Type, Debug)] struct EphemeralPathsResultItem { pub entries: Vec>, - pub errors: Vec, + pub errors: Vec, pub nodes: Vec, } R.with2(library()).subscription( |(node, library), EphemeralPathSearchArgs { - path, + from, + mut path, with_hidden_files, - order, }| async move { - let paths = - non_indexed::walk(path, with_hidden_files, node, library, |entries| { - macro_rules! order_match { - ($order:ident, [$(($variant:ident, |$i:ident| $func:expr)),+]) => {{ - match $order { - $(EphemeralPathOrder::$variant(order) => { - entries.sort_unstable_by(|path1, path2| { - let func = |$i: &non_indexed::Entry| $func; + let service = match from { + PathFrom::Path => { + let mut fs = Fs::default(); + fs.root("/"); + Operator::new(fs) + .map_err(|err| { + rspc::Error::new( + ErrorCode::InternalServerError, + err.to_string(), + ) + })? + .finish() + } + }; - let one = func(path1); - let two = func(path2); + let rules = chain_optional_iter( + [IndexerRule::from(no_os_protected())], + [(!with_hidden_files).then(|| IndexerRule::from(no_hidden()))], + ); - match order { - SortOrder::Desc => two.cmp(&one), - SortOrder::Asc => one.cmp(&two), - } - }); - })+ - } - }}; - } + // OpenDAL is specific about paths (and the rest of Spacedrive is not) + if !path.ends_with('/') { + path.push('/'); + } - if let Some(order) = order { - order_match!( - order, - [ - (Name, |p| p.name().to_lowercase()), - (SizeInBytes, |p| p.size_in_bytes()), - (DateCreated, |p| p.date_created()), - (DateModified, |p| p.date_modified()) - ] - ) - } - }) - .await?; + let stream = + sd_indexer::ephemeral(service, rules, &path) + .await + .map_err(|err| { + rspc::Error::new(ErrorCode::InternalServerError, err.to_string()) + })?; - let mut stream = BatchedStream::new(paths); + let mut stream = BatchedStream::new(stream); Ok(unsafe_streamed_query(stream! { + let mut to_generate = vec![]; + while let Some(result) = stream.next().await { // We optimize for the case of no errors because it should be way more common. let mut entries = Vec::with_capacity(result.len()); let mut errors = Vec::with_capacity(0); + // For this batch we check if any directories are actually locations, so the UI can link directly to them + let locations = library + .db + .location() + .find_many(vec![location::path::in_vec( + result.iter().filter_map(|e| match e { + Ok(e) if ObjectKind::from_i32(e.kind) == ObjectKind::Folder => Some(e.path.clone()), + _ => None + }).collect::>() + )]) + .exec() + .await + .and_then(|l| { + Ok(l.into_iter() + .filter_map(|item| item.path.clone().map(|l| (l, item))) + .collect::>()) + }) + .map_err(|err| error!("Looking up locations failed: {err:?}")) + .unwrap_or_default(); + for item in result { match item { - Ok(item) => entries.push(item), - Err(e) => match e { - Either::Left(e) => errors.push(e), - Either::Right(e) => errors.push(e.into()), + Ok(item) => { + let kind = ObjectKind::from_i32(item.kind); + let should_generate_thumbnail = { + #[cfg(feature = "ffmpeg")] + { + matches!( + kind, + ObjectKind::Image | ObjectKind::Video | ObjectKind::Document + ) + } + + #[cfg(not(feature = "ffmpeg"))] + { + matches!(kind, ObjectKind::Image | ObjectKind::Document) + } + }; + + // TODO: This requires all paths to be loaded before thumbnailing starts. + // TODO: This copies the existing functionality but will not fly with Cloud locations (as loading paths will be *way* slower) + // TODO: https://linear.app/spacedriveapp/issue/ENG-1719/cloud-thumbnailer + let thumbnail = if should_generate_thumbnail { + if from == PathFrom::Path { + let size = u64::from_be_bytes((&*item.size_in_bytes_bytes).try_into().expect("Invalid size")); + if let Ok(cas_id) = generate_cas_id(&item.path, size).await.map_err(|err| error!("Error generating cas id for '{:?}': {err:?}", item.path)) { + if ObjectKind::from_i32(item.kind) == ObjectKind::Document { + to_generate.push(GenerateThumbnailArgs::new( + item.extension.clone(), + cas_id.clone(), + PathBuf::from(&item.path), + )); + } else { + to_generate.push(GenerateThumbnailArgs::new( + item.extension.clone(), + cas_id.clone(), + PathBuf::from(&item.path), + )); + } + + Some(get_ephemeral_thumb_key(&cas_id)) + } else { + None + } + } else { + warn!("Thumbnailer not supported for cloud locations"); + None + } + } else { + None + }; + + entries.push(if let Some(item) = locations.get(&item.path) { + ExplorerItem::Location { + item: item.clone(), + } + } else { + ExplorerItem::NonIndexedPath { + thumbnail, + item, + } + }); }, + Err(e) => errors.push(e.to_string()), } } @@ -177,6 +260,16 @@ pub fn mount() -> AlphaRouter { nodes, }; } + + if to_generate.len() > 0 { + node.thumbnailer + .new_ephemeral_thumbnails_batch(BatchToProcess::new( + to_generate, + false, + false, + )) + .await; + } })) }, ) diff --git a/core/src/location/mod.rs b/core/src/location/mod.rs index e40337ed0..f99418ce9 100644 --- a/core/src/location/mod.rs +++ b/core/src/location/mod.rs @@ -14,6 +14,7 @@ use sd_core_file_path_helper::{ }; use sd_core_prisma_helpers::location_with_indexer_rules; +use sd_indexer::path::normalize_path; use sd_prisma::{ prisma::{file_path, indexer_rules_in_location, location, PrismaClient}, prisma_sync, @@ -27,13 +28,12 @@ use sd_utils::{ use std::{ collections::HashSet, - path::{Component, Path, PathBuf}, + path::{Path, PathBuf}, sync::Arc, }; use chrono::Utc; use futures::future::TryFutureExt; -use normpath::PathExt; use prisma_client_rust::{operator::and, or, QueryError}; use serde::{Deserialize, Serialize}; use serde_json::json; @@ -46,7 +46,6 @@ mod error; pub mod indexer; mod manager; pub mod metadata; -pub mod non_indexed; pub use error::LocationError; use indexer::OldIndexerJobInit; @@ -654,57 +653,6 @@ pub struct CreatedLocationResult { pub data: location_with_indexer_rules::Data, } -pub(crate) fn normalize_path(path: impl AsRef) -> io::Result<(String, String)> { - let mut path = path.as_ref().to_path_buf(); - let (location_path, normalized_path) = path - // Normalize path and also check if it exists - .normalize() - .and_then(|normalized_path| { - if cfg!(windows) { - // Use normalized path as main path on Windows - // This ensures we always receive a valid windows formatted path - // ex: /Users/JohnDoe/Downloads will become C:\Users\JohnDoe\Downloads - // Internally `normalize` calls `GetFullPathNameW` on Windows - // https://learn.microsoft.com/en-us/windows/win32/api/fileapi/nf-fileapi-getfullpathnamew - path = normalized_path.as_path().to_path_buf(); - } - - Ok(( - // TODO: Maybe save the path bytes instead of the string representation to avoid depending on UTF-8 - path.to_str().map(str::to_string).ok_or(io::Error::new( - io::ErrorKind::InvalidInput, - "Found non-UTF-8 path", - ))?, - normalized_path, - )) - })?; - - // Not needed on Windows because the normalization already handles it - if cfg!(not(windows)) { - // Replace location_path with normalize_path, when the first one ends in `.` or `..` - // This is required so localize_name doesn't panic - if let Some(component) = path.components().next_back() { - if matches!(component, Component::CurDir | Component::ParentDir) { - path = normalized_path.as_path().to_path_buf(); - } - } - } - - // Use `to_string_lossy` because a partially corrupted but identifiable name is better than nothing - let mut name = path.localize_name().to_string_lossy().to_string(); - - // Windows doesn't have a root directory - if cfg!(not(windows)) && name == "/" { - name = "Root".to_string() - } - - if name.replace(char::REPLACEMENT_CHARACTER, "") == "" { - name = "Unknown".to_string() - } - - Ok((location_path, name)) -} - async fn create_location( library @ Library { db, sync, .. }: &Library, location_pub_id: Uuid, diff --git a/core/src/location/non_indexed.rs b/core/src/location/non_indexed.rs deleted file mode 100644 index 79f0f8ce8..000000000 --- a/core/src/location/non_indexed.rs +++ /dev/null @@ -1,383 +0,0 @@ -use crate::{ - api::locations::ExplorerItem, - library::Library, - object::{ - cas::generate_cas_id, - media::old_thumbnail::{get_ephemeral_thumb_key, BatchToProcess, GenerateThumbnailArgs}, - }, - Node, -}; - -use sd_core_file_path_helper::{path_is_hidden, MetadataExt}; -use sd_core_indexer_rules::{ - seed::{no_hidden, no_os_protected}, - IndexerRule, RuleKind, -}; - -use sd_file_ext::{extensions::Extension, kind::ObjectKind}; -use sd_prisma::prisma::location; -use sd_utils::{chain_optional_iter, error::FileIOError}; - -use std::{ - collections::HashMap, - io::ErrorKind, - path::{Path, PathBuf}, - sync::Arc, -}; - -use chrono::{DateTime, Utc}; -use futures::Stream; -use itertools::Either; -use rspc::ErrorCode; -use serde::Serialize; -use specta::Type; -use thiserror::Error; -use tokio::{io, sync::mpsc, task::JoinError}; -use tokio_stream::wrappers::ReceiverStream; -use tracing::{error, span, warn, Level}; - -use super::normalize_path; - -#[derive(Debug, Error)] -pub enum NonIndexedLocationError { - #[error("path not found: {}", .0.display())] - NotFound(PathBuf), - - #[error(transparent)] - FileIO(#[from] FileIOError), - - #[error("database error: {0}")] - Database(#[from] prisma_client_rust::QueryError), - - #[error("error joining tokio task: {0}")] - TaskJoinError(#[from] JoinError), - - #[error("receiver shutdown error")] - SendError, -} - -impl From> for NonIndexedLocationError { - fn from(_: mpsc::error::SendError) -> Self { - Self::SendError - } -} - -impl From for rspc::Error { - fn from(err: NonIndexedLocationError) -> Self { - match err { - NonIndexedLocationError::NotFound(_) => { - rspc::Error::with_cause(ErrorCode::NotFound, err.to_string(), err) - } - _ => rspc::Error::with_cause(ErrorCode::InternalServerError, err.to_string(), err), - } - } -} - -impl> From<(P, io::Error)> for NonIndexedLocationError { - fn from((path, source): (P, io::Error)) -> Self { - if source.kind() == io::ErrorKind::NotFound { - Self::NotFound(path.as_ref().into()) - } else { - Self::FileIO(FileIOError::from((path, source))) - } - } -} - -#[derive(Serialize, Type, Debug)] -pub struct NonIndexedPathItem { - pub path: String, - pub name: String, - pub extension: String, - pub kind: i32, - pub is_dir: bool, - pub date_created: DateTime, - pub date_modified: DateTime, - pub size_in_bytes_bytes: Vec, - pub hidden: bool, -} - -// #[instrument(name = "non_indexed::walk", skip(sort_fn))] -pub async fn walk( - path: PathBuf, - with_hidden_files: bool, - node: Arc, - library: Arc, - sort_fn: impl FnOnce(&mut Vec) + Send, -) -> Result< - impl Stream>> + Send, - NonIndexedLocationError, -> { - let mut entries = get_all_entries(path.clone()).await?; - - { - let span = span!(Level::INFO, "sort_fn"); - let _enter = span.enter(); - - sort_fn(&mut entries); - } - - let (tx, rx) = mpsc::channel(128); - let tx2 = tx.clone(); - - // We wanna process and let the caller use the stream. - let task = tokio::spawn(async move { - let path = &path; - let rules = chain_optional_iter( - [IndexerRule::from(no_os_protected())], - [(!with_hidden_files).then(|| IndexerRule::from(no_hidden()))], - ); - - let mut thumbnails_to_generate = vec![]; - // Generating thumbnails for PDFs is kinda slow, so we're leaving them for last in the batch - let mut document_thumbnails_to_generate = vec![]; - let mut directories = vec![]; - - for entry in entries.into_iter() { - let (entry_path, name) = match normalize_path(entry.path) { - Ok(v) => v, - Err(e) => { - tx.send(Err(Either::Left( - NonIndexedLocationError::from((path, e)).into(), - ))) - .await?; - continue; - } - }; - - match IndexerRule::apply_all(&rules, &entry_path).await { - Ok(rule_results) => { - // No OS Protected and No Hidden rules, must always be from this kind, should panic otherwise - if rule_results[&RuleKind::RejectFilesByGlob] - .iter() - .any(|reject| !reject) - { - continue; - } - } - Err(e) => { - tx.send(Err(Either::Left(e.into()))).await?; - continue; - } - }; - - if entry.metadata.is_dir() { - directories.push((entry_path, name, entry.metadata)); - } else { - let path = Path::new(&entry_path); - - let Some(name) = path - .file_stem() - .and_then(|s| s.to_str().map(str::to_string)) - else { - warn!("Failed to extract name from path: {}", &entry_path); - continue; - }; - - let extension = path - .extension() - .and_then(|s| s.to_str().map(str::to_string)) - .unwrap_or_default(); - - let kind = Extension::resolve_conflicting(&path, false) - .await - .map(Into::into) - .unwrap_or(ObjectKind::Unknown); - - let should_generate_thumbnail = { - #[cfg(feature = "ffmpeg")] - { - matches!( - kind, - ObjectKind::Image | ObjectKind::Video | ObjectKind::Document - ) - } - - #[cfg(not(feature = "ffmpeg"))] - { - matches!(kind, ObjectKind::Image | ObjectKind::Document) - } - }; - - let thumbnail_key = if should_generate_thumbnail { - if let Ok(cas_id) = - generate_cas_id(&path, entry.metadata.len()) - .await - .map_err(|e| { - tx.send(Err(Either::Left( - NonIndexedLocationError::from((path, e)).into(), - ))) - }) { - if kind == ObjectKind::Document { - document_thumbnails_to_generate.push(GenerateThumbnailArgs::new( - extension.clone(), - cas_id.clone(), - path.to_path_buf(), - )); - } else { - thumbnails_to_generate.push(GenerateThumbnailArgs::new( - extension.clone(), - cas_id.clone(), - path.to_path_buf(), - )); - } - - Some(get_ephemeral_thumb_key(&cas_id)) - } else { - None - } - } else { - None - }; - - tx.send(Ok(ExplorerItem::NonIndexedPath { - thumbnail: thumbnail_key, - item: NonIndexedPathItem { - hidden: path_is_hidden(Path::new(&entry_path), &entry.metadata), - path: entry_path, - name, - extension, - kind: kind as i32, - is_dir: false, - date_created: entry.metadata.created_or_now().into(), - date_modified: entry.metadata.modified_or_now().into(), - size_in_bytes_bytes: entry.metadata.len().to_be_bytes().to_vec(), - }, - })) - .await?; - } - } - - thumbnails_to_generate.extend(document_thumbnails_to_generate); - - node.thumbnailer - .new_ephemeral_thumbnails_batch(BatchToProcess::new( - thumbnails_to_generate, - false, - false, - )) - .await; - - let mut locations = library - .db - .location() - .find_many(vec![location::path::in_vec( - directories - .iter() - .map(|(path, _, _)| path.clone()) - .collect(), - )]) - .exec() - .await? - .into_iter() - .flat_map(|location| { - location - .path - .clone() - .map(|location_path| (location_path, location)) - }) - .collect::>(); - - for (directory, name, metadata) in directories { - if let Some(location) = locations.remove(&directory) { - tx.send(Ok(ExplorerItem::Location { item: location })) - .await?; - } else { - tx.send(Ok(ExplorerItem::NonIndexedPath { - thumbnail: None, - item: NonIndexedPathItem { - hidden: path_is_hidden(Path::new(&directory), &metadata), - path: directory, - name, - extension: String::new(), - kind: ObjectKind::Folder as i32, - is_dir: true, - date_created: metadata.created_or_now().into(), - date_modified: metadata.modified_or_now().into(), - size_in_bytes_bytes: metadata.len().to_be_bytes().to_vec(), - }, - })) - .await?; - } - } - - Ok::<_, NonIndexedLocationError>(()) - }); - - tokio::spawn(async move { - match task.await { - Ok(Ok(())) => {} - Ok(Err(e)) => { - let _ = tx2.send(Err(Either::Left(e.into()))).await; - } - Err(e) => error!("error joining tokio task: {}", e), - } - }); - - Ok(ReceiverStream::new(rx)) -} - -#[derive(Debug)] -pub struct Entry { - path: PathBuf, - name: String, - // size_in_bytes: u64, - // date_created: - metadata: std::fs::Metadata, -} - -impl Entry { - pub fn name(&self) -> &str { - &self.name - } - - pub fn size_in_bytes(&self) -> u64 { - self.metadata.len() - } - - pub fn date_created(&self) -> DateTime { - self.metadata.created_or_now().into() - } - - pub fn date_modified(&self) -> DateTime { - self.metadata.modified_or_now().into() - } -} - -/// We get all of the FS entries first before we start processing on each of them. -/// -/// From my M1 Macbook Pro this: -/// - takes 11ms per 10 000 files -/// and -/// - consumes 0.16MB of RAM per 10 000 entries. -/// -/// The reason we collect these all up is so we can apply ordering, and then begin streaming the data as it's processed to the frontend. -// #[instrument(name = "get_all_entries")] -pub async fn get_all_entries(path: PathBuf) -> Result, NonIndexedLocationError> { - tokio::task::spawn_blocking(move || { - let path = &path; - let dir = std::fs::read_dir(path).map_err(|e| (path, e))?; - let mut entries = Vec::new(); - for entry in dir { - let entry = entry.map_err(|e| (path, e))?; - - // We must not keep `entry` around as we will quickly hit the OS limit on open file descriptors - entries.push(Entry { - path: entry.path(), - name: entry - .file_name() - .to_str() - .ok_or_else(|| { - ( - path, - io::Error::new(ErrorKind::Other, "error non UTF-8 path"), - ) - })? - .to_string(), - metadata: entry.metadata().map_err(|e| (path, e))?, - }); - } - - Ok(entries) - }) - .await? -} diff --git a/core/src/util/batched_stream.rs b/core/src/util/batched_stream.rs index 1cd350840..d39cc7152 100644 --- a/core/src/util/batched_stream.rs +++ b/core/src/util/batched_stream.rs @@ -31,7 +31,7 @@ impl BatchedStream { } } -impl Stream for BatchedStream { +impl Stream for BatchedStream { type Item = Vec; fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { diff --git a/core/src/util/unsafe_streamed_query.rs b/core/src/util/unsafe_streamed_query.rs index 8957097d8..8d7f2dbd5 100644 --- a/core/src/util/unsafe_streamed_query.rs +++ b/core/src/util/unsafe_streamed_query.rs @@ -1,9 +1,8 @@ -use std::pin::pin; - use async_stream::stream; use futures::{Stream, StreamExt}; use serde::Serialize; use specta::{reference::Reference, DataType, Type, TypeMap}; +use sync_wrapper::SyncStream; #[derive(Serialize)] #[serde(untagged)] @@ -27,13 +26,18 @@ impl Type for Output { } // Marked as unsafe as the types are a lie and this should always be used with `useUnsafeStreamedQuery` -pub fn unsafe_streamed_query(stream: S) -> impl Stream> { - stream! { - let mut stream = pin!(stream); +pub fn unsafe_streamed_query( + stream: S, +) -> impl Stream> + Send + Sync +where + S::Item: Send, +{ + SyncStream::new(stream! { + let mut stream = std::pin::pin!(stream); while let Some(v) = stream.next().await { yield Output::Data(v); } yield Output::Complete { __stream_complete: () }; - } + }) } diff --git a/crates/file-ext/src/kind.rs b/crates/file-ext/src/kind.rs index 058e206a4..2c21a3052 100644 --- a/crates/file-ext/src/kind.rs +++ b/crates/file-ext/src/kind.rs @@ -60,3 +60,38 @@ pub enum ObjectKind { /// Label Label = 26, } + +impl ObjectKind { + pub fn from_i32(value: i32) -> Self { + match value { + 0 => ObjectKind::Unknown, + 1 => ObjectKind::Document, + 2 => ObjectKind::Folder, + 3 => ObjectKind::Text, + 4 => ObjectKind::Package, + 5 => ObjectKind::Image, + 6 => ObjectKind::Audio, + 7 => ObjectKind::Video, + 8 => ObjectKind::Archive, + 9 => ObjectKind::Executable, + 10 => ObjectKind::Alias, + 11 => ObjectKind::Encrypted, + 12 => ObjectKind::Key, + 13 => ObjectKind::Link, + 14 => ObjectKind::WebPageArchive, + 15 => ObjectKind::Widget, + 16 => ObjectKind::Album, + 17 => ObjectKind::Collection, + 18 => ObjectKind::Font, + 19 => ObjectKind::Mesh, + 20 => ObjectKind::Code, + 21 => ObjectKind::Database, + 22 => ObjectKind::Book, + 23 => ObjectKind::Config, + 24 => ObjectKind::Dotfile, + 25 => ObjectKind::Screenshot, + 26 => ObjectKind::Label, + _ => ObjectKind::Unknown, + } + } +} diff --git a/crates/sd-indexer/Cargo.toml b/crates/sd-indexer/Cargo.toml new file mode 100644 index 000000000..65e18084e --- /dev/null +++ b/crates/sd-indexer/Cargo.toml @@ -0,0 +1,30 @@ +[package] +name = "sd-indexer" +version = "0.0.1" +license.workspace = true +edition.workspace = true +repository.workspace = true +publish = false + +[dependencies] +sd-utils = { path = "../utils" } +sd-file-ext = { path = "../file-ext" } +sd-core-file-path-helper = { path = "../../core/crates/file-path-helper" } +sd-core-indexer-rules = { path = "../../core/crates/indexer-rules" } + +chrono.workspace = true +futures-util = "0.3.30" +globset = { version = "0.4.14", features = ["serde1"] } +opendal = "0.45.1" +serde = { workspace = true, features = ["derive"] } +specta.workspace = true +thiserror.workspace = true +tracing.workspace = true +rmp-serde = "1.1.2" + +# TODO: Remove these +rspc.workspace = true +tokio = { workspace = true, features = ["fs"] } +sd-prisma = { path = "../prisma" } +tempfile.workspace = true +normpath = { workspace = true, features = ["localization"] } diff --git a/crates/sd-indexer/src/ephemeral.rs b/crates/sd-indexer/src/ephemeral.rs new file mode 100644 index 000000000..3b38d3b9d --- /dev/null +++ b/crates/sd-indexer/src/ephemeral.rs @@ -0,0 +1,212 @@ +use std::{ + future::ready, + io::{self, ErrorKind}, + path::PathBuf, +}; + +use chrono::{DateTime, Utc}; +use futures_util::{Stream, StreamExt, TryFutureExt}; +use opendal::{Operator, Scheme}; +use sd_core_file_path_helper::path_is_hidden; +use sd_core_indexer_rules::{IndexerRule, RuleKind}; +use sd_file_ext::{extensions::Extension, kind::ObjectKind}; +use serde::Serialize; +use specta::Type; + +use crate::stream::TaskStream; + +#[derive(Serialize, Type, Debug)] +pub struct NonIndexedPathItem { + pub path: String, + pub name: String, + pub extension: String, + pub kind: i32, // TODO: Use `ObjectKind` instead + // TODO: Use `kind` instead and drop this + pub is_dir: bool, + pub date_created: DateTime, + pub date_modified: DateTime, + pub size_in_bytes_bytes: Vec, + pub hidden: bool, +} + +pub async fn ephemeral( + opendal: Operator, + rules: Vec, + path: &str, +) -> opendal::Result>> { + let is_fs = opendal.info().scheme() == Scheme::Fs; + let base_path = PathBuf::from(opendal.info().root()); + let mut lister = opendal.lister(path).await?; + + Ok(TaskStream::new(move |tx| async move { + let rules = &*rules; + while let Some(entry) = lister.next().await { + let base_path = base_path.clone(); + let result = ready(entry) + .map_err(|err| io::Error::new(ErrorKind::Other, format!("OpenDAL: {err:?}"))) + .and_then(|entry| async move { + let path = base_path.join(entry.path()); + + let extension = (!path.is_dir()) + .then(|| { + path.extension() + .and_then(|s| s.to_str().map(str::to_string)) + .unwrap_or_default() + }) + .unwrap_or_default(); + + // Only Windows supports normalised files without FS access. + // For now we only do normalisation for local files. + let (relative_path, name) = if is_fs { + crate::path::normalize_path(&path).map_err(|err| { + io::Error::new( + ErrorKind::Other, + format!("Error normalising path '{path:?}': {err:?}"), + ) + })? + } else { + unreachable!(); + // ( + // path.file_stem() + // .and_then(|s| s.to_str().map(str::to_string)) + // .ok_or_else(|| { + // io::Error::new( + // ErrorKind::Other, + // "error on file '{path:?}: non UTF-8", + // ) + // })? + // .to_string(), + // path.to_str() + // .expect("non UTF-8 path - is unreachable") + // .to_string(), + // ) + }; + + let kind = if entry.metadata().is_dir() { + ObjectKind::Folder + } else if is_fs { + Extension::resolve_conflicting(&path, false) + .await + .map(Into::into) + .unwrap_or(ObjectKind::Unknown) + } else { + // TODO: Determine kind of remote files - https://linear.app/spacedriveapp/issue/ENG-1718/fix-objectkind-of-remote-files + ObjectKind::Unknown + }; + + let name = (kind != ObjectKind::Folder) + .then(|| { + path.file_stem() + .and_then(|s| s.to_str().map(str::to_string)) + }) + .flatten() + .unwrap_or(name); + + let mut path = path + .to_str() + .expect("comes from string so this is impossible") + .to_string(); + + // OpenDAL will *always* end in a `/` for directories, we strip it here so we can give the path to Tokio. + if path.ends_with('/') && path.len() > 1 { + path.pop(); + } + + let result = IndexerRule::apply_all(rules, &path).await.map_err(|err| { + io::Error::new( + ErrorKind::Other, + format!("Error running indexer rules on file '{path:?}': {err:?}"), + ) + })?; + + // No OS Protected and No Hidden rules, must always be from this kind, should panic otherwise + if result[&RuleKind::RejectFilesByGlob] + .iter() + .any(|reject| !reject) + { + return Ok(None); // Skip this file + }; + + // TODO: OpenDAL last modified time - https://linear.app/spacedriveapp/issue/ENG-1717/fix-modified-time + // TODO: OpenDAL hidden files - https://linear.app/spacedriveapp/issue/ENG-1720/fix-hidden-files + let (hidden, date_created, date_modified, size) = if is_fs { + let metadata = tokio::fs::metadata(&path).await.map_err(|err| { + io::Error::new( + ErrorKind::Other, + format!("Error getting metadata for '{path:?}': {err:?}"), + ) + })?; + + ( + path_is_hidden(&path, &metadata), + metadata + .created() + .map_err(|err| { + io::Error::new( + ErrorKind::Other, + format!("Error determining created time for '{path:?}': {err:?}"), + ) + })? + .into(), + metadata + .modified() + .map_err(|err| { + io::Error::new( + ErrorKind::Other, + format!("Error determining modified time for '{path:?}': {err:?}"), + ) + })? + .into(), + metadata.len(), + ) + } else { + (false, Default::default(), Default::default(), 0) + }; + + // TODO: Fix this - https://linear.app/spacedriveapp/issue/ENG-1725/fix-last-modified + #[allow(clippy::redundant_locals)] + let date_modified = date_modified; + // entry.metadata().last_modified().ok_or_else(|| { + // io::Error::new( + // ErrorKind::Other, + // format!("Error getting modified time for '{path:?}'"), + // ) + // })?; + + #[allow(clippy::redundant_locals)] + // TODO: Fix this - https://linear.app/spacedriveapp/issue/ENG-1726/fix-file-size + let size = size; + + Ok(Some(NonIndexedPathItem { + path: relative_path, + name, + extension, + kind: kind as i32, + is_dir: kind == ObjectKind::Folder, + date_created, + date_modified, + // TODO + // entry + // .metadata() + // .content_length() + size_in_bytes_bytes: size.to_be_bytes().to_vec(), + hidden, + })) + }) + .await; + + if tx + .send(match result { + Ok(Some(item)) => Ok(item), + Ok(None) => continue, + Err(err) => Err(err), + }) + .await + .is_err() + { + // Stream has been dropped. + continue; + } + } + })) +} diff --git a/crates/sd-indexer/src/lib.rs b/crates/sd-indexer/src/lib.rs new file mode 100644 index 000000000..c34480996 --- /dev/null +++ b/crates/sd-indexer/src/lib.rs @@ -0,0 +1,5 @@ +mod ephemeral; +pub mod path; +mod stream; + +pub use ephemeral::*; diff --git a/crates/sd-indexer/src/path.rs b/crates/sd-indexer/src/path.rs new file mode 100644 index 000000000..9803c84a2 --- /dev/null +++ b/crates/sd-indexer/src/path.rs @@ -0,0 +1,57 @@ +use std::{ + io, + path::{Component, Path}, +}; + +use normpath::PathExt; + +pub fn normalize_path(path: impl AsRef) -> io::Result<(String, String)> { + let mut path = path.as_ref().to_path_buf(); + let (location_path, normalized_path) = path + // Normalize path and also check if it exists + .normalize() + .and_then(|normalized_path| { + if cfg!(windows) { + // Use normalized path as main path on Windows + // This ensures we always receive a valid windows formatted path + // ex: /Users/JohnDoe/Downloads will become C:\Users\JohnDoe\Downloads + // Internally `normalize` calls `GetFullPathNameW` on Windows + // https://learn.microsoft.com/en-us/windows/win32/api/fileapi/nf-fileapi-getfullpathnamew + path = normalized_path.as_path().to_path_buf(); + } + + Ok(( + // TODO: Maybe save the path bytes instead of the string representation to avoid depending on UTF-8 + path.to_str().map(str::to_string).ok_or(io::Error::new( + io::ErrorKind::InvalidInput, + "Found non-UTF-8 path", + ))?, + normalized_path, + )) + })?; + + // Not needed on Windows because the normalization already handles it + if cfg!(not(windows)) { + // Replace location_path with normalize_path, when the first one ends in `.` or `..` + // This is required so localize_name doesn't panic + if let Some(component) = path.components().next_back() { + if matches!(component, Component::CurDir | Component::ParentDir) { + path = normalized_path.as_path().to_path_buf(); + } + } + } + + // Use `to_string_lossy` because a partially corrupted but identifiable name is better than nothing + let mut name = path.localize_name().to_string_lossy().to_string(); + + // Windows doesn't have a root directory + if cfg!(not(windows)) && name == "/" { + name = "Root".to_string() + } + + if name.replace(char::REPLACEMENT_CHARACTER, "") == "" { + name = "Unknown".to_string() + } + + Ok((location_path, name)) +} diff --git a/crates/sd-indexer/src/stream.rs b/crates/sd-indexer/src/stream.rs new file mode 100644 index 000000000..0922c3399 --- /dev/null +++ b/crates/sd-indexer/src/stream.rs @@ -0,0 +1,40 @@ +use std::{ + pin::Pin, + task::{Context, Poll}, +}; + +use futures_util::Future; +use tokio::sync::mpsc; + +/// Construct a stream from a Tokio task. +/// Similar to `tokio_stream::stream!` but not a macro for better DX. +pub struct TaskStream { + task: tokio::task::JoinHandle<()>, + receiver: mpsc::Receiver, +} + +impl TaskStream { + pub fn new(task: impl FnOnce(mpsc::Sender) -> F + Send + 'static) -> Self { + let (tx, rx) = mpsc::channel(256); + Self { + task: tokio::spawn(async move { + task(tx).await; + }), + receiver: rx, + } + } +} + +impl futures_util::Stream for TaskStream { + type Item = T; + + fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll> { + self.receiver.poll_recv(cx) + } +} + +impl Drop for TaskStream { + fn drop(&mut self) { + self.task.abort(); + } +} diff --git a/interface/app/$libraryId/ephemeral.tsx b/interface/app/$libraryId/ephemeral.tsx index a71544aa9..f5eb7fa95 100644 --- a/interface/app/$libraryId/ephemeral.tsx +++ b/interface/app/$libraryId/ephemeral.tsx @@ -1,16 +1,17 @@ -import { type AlphaClient } from '@oscartbeaumont-sd/rspc-client/v2'; import { ArrowLeft, ArrowRight, Info } from '@phosphor-icons/react'; import * as Dialog from '@radix-ui/react-dialog'; import { iconNames } from '@sd/assets/util'; import clsx from 'clsx'; import { memo, Suspense, useDeferredValue, useMemo } from 'react'; +import { match } from 'ts-pattern'; import { ExplorerItem, getExplorerItemData, + ItemData, + SortOrder, useLibraryContext, useNormalisedCache, - useUnsafeStreamedQuery, - type EphemeralPathOrder + useUnsafeStreamedQuery } from '@sd/client'; import { Button, Tooltip } from '@sd/ui'; import { PathParamsSchema, type PathParams } from '~/app/route-schemas'; @@ -41,6 +42,12 @@ import { useTopBarContext } from './TopBar/Context'; import { TopBarPortal } from './TopBar/Portal'; import TopBarButton from './TopBar/TopBarButton'; +export type EphemeralPathOrder = + | { field: 'name'; value: SortOrder } + | { field: 'sizeInBytes'; value: SortOrder } + | { field: 'dateCreated'; value: SortOrder } + | { field: 'dateModified'; value: SortOrder }; + const NOTICE_ITEMS: { icon: keyof typeof iconNames; name: string }[] = [ { icon: 'Folder', @@ -188,9 +195,9 @@ const EphemeralExplorer = memo((props: { args: PathParams }) => { { library_id: libraryCtx.library.uuid, arg: { + from: 'path', path: path ?? (os === 'windows' ? 'C:\\' : '/'), - withHiddenFiles: settingsSnapshot.showHiddenFiles, - order: settingsSnapshot.order + withHiddenFiles: settingsSnapshot.showHiddenFiles } } ], @@ -225,8 +232,52 @@ const EphemeralExplorer = memo((props: { args: PathParams }) => { } } + // We sort on the frontend, as the backend streams in entries from cloud locations out of order + const order = settingsSnapshot.order; + if (order !== null) { + const getValue = match(order.field) + .with('name', () => (a: ItemData) => a.name) + .with('sizeInBytes', () => (a: ItemData) => a.size.original) + .with( + 'dateCreated', + () => (a: ItemData) => (a.dateCreated !== null ? new Date(a.dateCreated) : null) + ) + .with( + 'dateModified', + () => (a: ItemData) => + a.dateModified !== null ? new Date(a.dateModified) : null + ) + .exhaustive(); + + return ret.sort((a, b) => { + const aData = getExplorerItemData(a); + const bData = getExplorerItemData(b); + + let result = 0; + + // Put hidden files first (if the files have a hidden property) + if ( + 'hidden' in a.item && + 'hidden' in b.item && + a.item.hidden !== null && + b.item.hidden !== null + ) + result = +b.item.hidden - +a.item.hidden; + + // Group files before folders (within the hidden groups) + result = result || +(aData.kind === 'Folder') - +(bData.kind === 'Folder'); + + // Finally sort by the user defined property & flip the result for descending order if needed + const valueA = getValue(aData); + const valueB = getValue(bData); + result = result || compare(valueA, valueB) * (order.value === 'Asc' ? 1 : -1); + + return result; + }); + } + return ret; - }, [entries, settingsSnapshot.layoutMode]); + }, [entries, settingsSnapshot.layoutMode, settingsSnapshot.order]); const explorer = useExplorer({ items, @@ -276,3 +327,20 @@ export const Component = () => { ); }; + +// Compare two values and return a number based on their relative order +function compare(a: T, b: T) { + if (a !== null && b !== null) { + if (typeof a === 'string') { + return a.localeCompare(b as string); + } else { + // We must avoid equality as Date doesn't support them but if a > b & b > a then a === b + return a < b ? -1 : a > b ? 1 : 0; + } + } + + if (a === null && b !== null) return -1; + if (a !== null && b === null) return 1; + + return 0; +} diff --git a/packages/client/src/core.ts b/packages/client/src/core.ts index b96a2809c..5d160ec19 100644 --- a/packages/client/src/core.ts +++ b/packages/client/src/core.ts @@ -235,11 +235,9 @@ export type EphemeralFileCreateContextTypes = "empty" | "text" export type EphemeralFileSystemOps = { sources: string[]; target_dir: string } -export type EphemeralPathOrder = { field: "name"; value: SortOrder } | { field: "sizeInBytes"; value: SortOrder } | { field: "dateCreated"; value: SortOrder } | { field: "dateModified"; value: SortOrder } +export type EphemeralPathSearchArgs = { from: PathFrom; path: string; withHiddenFiles: boolean } -export type EphemeralPathSearchArgs = { path: string; withHiddenFiles: boolean; order?: EphemeralPathOrder | null } - -export type EphemeralPathsResultItem = { entries: Reference[]; errors: Error[]; nodes: CacheNode[] } +export type EphemeralPathsResultItem = { entries: Reference[]; errors: string[]; nodes: CacheNode[] } export type EphemeralRenameFileArgs = { kind: EphemeralRenameKind } @@ -249,13 +247,6 @@ export type EphemeralRenameMany = { from_pattern: FromPattern; to_pattern: strin export type EphemeralRenameOne = { from_path: string; to: string } -export type Error = { code: ErrorCode; message: string } - -/** - * TODO - */ -export type ErrorCode = "BadRequest" | "Unauthorized" | "Forbidden" | "NotFound" | "Timeout" | "Conflict" | "PreconditionFailed" | "PayloadTooLarge" | "MethodNotSupported" | "ClientClosedRequest" | "InternalServerError" - export type ExplorerItem = { type: "Path"; thumbnail: string[] | null; item: FilePathWithObject } | { type: "Object"; thumbnail: string[] | null; item: ObjectWithFilePaths } | { type: "Location"; item: Location } | { type: "NonIndexedPath"; thumbnail: string[] | null; item: NonIndexedPathItem } | { type: "SpacedropPeer"; item: PeerMetadata } | { type: "Label"; thumbnails: string[][]; item: LabelWithObjects } export type ExplorerLayout = "grid" | "list" | "media" @@ -538,6 +529,8 @@ export type P2PDiscoveryState = "Everyone" | "ContactsOnly" | "Disabled" export type P2PEvent = { type: "PeerChange"; identity: RemoteIdentity; connection: ConnectionMethod; discovery: DiscoveryMethod; metadata: PeerMetadata } | { type: "PeerDelete"; identity: RemoteIdentity } | { type: "SpacedropRequest"; id: string; identity: RemoteIdentity; peer_name: string; files: string[] } | { type: "SpacedropProgress"; id: string; percent: number } | { type: "SpacedropTimedOut"; id: string } | { type: "SpacedropRejected"; id: string } +export type PathFrom = "path" + export type PeerMetadata = { name: string; operating_system: OperatingSystem | null; device_model: HardwareModel | null; version: string | null } export type PlusCode = string diff --git a/packages/client/src/lib/explorerItem.ts b/packages/client/src/lib/explorerItem.ts index f6ed99734..a30212d71 100644 --- a/packages/client/src/lib/explorerItem.ts +++ b/packages/client/src/lib/explorerItem.ts @@ -33,14 +33,11 @@ export function getExplorerItemData(data?: ExplorerItem | null): ItemData { switch (data.type) { // the getItemObject and getItemFilePath type-guards mean we can handle the following types in one case case 'Object': - case 'NonIndexedPath': case 'Path': { // handle object const object = getItemObject(data); if (object?.kind) itemData.kind = ObjectKind[object?.kind] ?? 'Unknown'; - else if (data.type === 'NonIndexedPath') - itemData.kind = ObjectKind[data.item.kind] ?? 'Unknown'; // Objects only have dateCreated and dateAccessed itemData.dateCreated = object?.date_created ?? null; @@ -69,6 +66,37 @@ export function getExplorerItemData(data?: ExplorerItem | null): ItemData { } break; } + case 'NonIndexedPath': { + if (data.item?.kind) itemData.kind = ObjectKind[data.item?.kind] ?? 'Unknown'; + else if (data.type === 'NonIndexedPath') + itemData.kind = ObjectKind[data.item.kind] ?? 'Unknown'; + + // Objects only have dateCreated and dateAccessed + itemData.dateCreated = data.item?.date_created ?? null; + // handle thumbnail based on provided key + // This could be better, but for now we're mapping the backend property to two different local properties (thumbnailKey, thumbnailKeys) for backward compatibility + if (data.thumbnail) { + itemData.thumbnailKey = data.thumbnail; + itemData.thumbnailKeys = [data.thumbnail]; + } + + itemData.hasLocalThumbnail = !!data.thumbnail; + // handle file path + const filePath = getItemFilePath(data); + if (filePath) { + itemData.name = filePath.name; + itemData.fullName = getFullName(filePath.name, filePath.extension); + itemData.size = byteSize(filePath.size_in_bytes_bytes); + itemData.isDir = filePath.is_dir ?? false; + itemData.extension = filePath.extension?.toLocaleLowerCase() ?? null; + // + if ('cas_id' in filePath) itemData.casId = filePath.cas_id; + if ('location_id' in filePath) itemData.locationId = filePath.location_id; + if ('date_indexed' in filePath) itemData.dateIndexed = filePath.date_indexed; + if ('date_modified' in filePath) itemData.dateModified = filePath.date_modified; + } + break; + } // the following types do not have a file_path or an object associated, and must be handled from scratch case 'Location': { const location = getItemLocation(data);