[ENG-1775] Scan location using new jobs (#2476)

* First draft on task system usage, still missing job system

* Scan location roughly working, a ton of stuff to fix yet

* Updating some deps due to crashes and bugs

* Exposing non critical errors to frontend

* Getting active job reports from job system

* Using boxed opaque type to avoid a downcast issue with generics

* Task system issues discovered on race conditions

* Enable debug

* Fix job report in the job manager

* Fix race condition on steal tasks

* Fixed race condition on task suspend

* Some fixes on job progress reporting and save

* Fixed many race conditions and a hard deadlock
Also some progress report polishing

* Ignore .ts and .mts video files for now

* Some better logs

* bruh

* Internal deadlocks and excess of communication in the task system
- Also better logs

* Bunch of fixes and optimizations

* WIP at fixing file identifier

* Fixed file identifier job
- still need to work on its progress report frontend

* A bunch of polishing

* Fixed some bugs and did more polishing

* Cleanup

* Bridging old and new job systems

* A ton of fixes

* A bunch of bugs related to shutdown and resume

* Indexer and watcher bugs

* Log normalizing

* Fixing CI

* Change error! to warn! on non critical errors log

* Fix redirect to new location

* Type annotation

* Bogus merge resolution on cargo lock
This commit is contained in:
Ericson "Fogo" Soares 2024-06-17 18:30:57 -03:00 committed by GitHub
parent f8ed254a22
commit bdc242a852
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
183 changed files with 14270 additions and 16347 deletions

1
.vscode/launch.json vendored
View file

@ -11,6 +11,7 @@
"cargo": {
"args": [
"build",
"--profile=dev-debug",
"--manifest-path=./apps/desktop/src-tauri/Cargo.toml",
"--no-default-features"
],

346
Cargo.lock generated
View file

@ -282,7 +282,7 @@ dependencies = [
"proc-macro2",
"quote",
"syn 1.0.109",
"synstructure",
"synstructure 0.12.6",
]
[[package]]
@ -383,9 +383,9 @@ dependencies = [
[[package]]
name = "async-signal"
version = "0.2.7"
version = "0.2.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "329972aa325176e89114919f2a80fdae4f4c040f66a370b1a1159c6c0f94e7aa"
checksum = "794f185324c2f00e771cd9f1ae8b5ac68be2ca7abb129a87afd6e86d228bc54d"
dependencies = [
"async-io",
"async-lock",
@ -826,9 +826,9 @@ dependencies = [
[[package]]
name = "aws-smithy-runtime"
version = "1.5.5"
version = "1.5.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d0d3965f6417a92a6d1009c5958a67042f57e46342afb37ca58f9ad26744ec73"
checksum = "8508de54f34b8feca6638466c2bd2de9d1df5bf79c578de9a649b72d644006b3"
dependencies = [
"aws-smithy-async",
"aws-smithy-http",
@ -840,6 +840,7 @@ dependencies = [
"http 0.2.12",
"http-body 0.4.6",
"http-body 1.0.0",
"httparse",
"hyper 0.14.29",
"hyper-rustls 0.24.2",
"once_cell",
@ -852,9 +853,9 @@ dependencies = [
[[package]]
name = "aws-smithy-runtime-api"
version = "1.6.2"
version = "1.6.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4179bd8a1c943e1aceb46c5b9fc014a561bd6c35a2153e816ba29076ee49d245"
checksum = "aa6dbabc7629fab4e4467f95f119c2e1a9b00b44c893affa98e23b040a0e2567"
dependencies = [
"aws-smithy-async",
"aws-smithy-types",
@ -869,9 +870,9 @@ dependencies = [
[[package]]
name = "aws-smithy-types"
version = "1.1.10"
version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5b6764ba7e1c5ede1c9f9e4046645534f06c2581402461c559b481a420330a83"
checksum = "cfe321a6b21f5d8eabd0ade9c55d3d0335f3c3157fc2b3e87f05f34b539e4df5"
dependencies = [
"base64-simd",
"bytes",
@ -1306,7 +1307,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "05efc5cfd9110c8416e471df0e96702d58690178e206e61b7173706673c93706"
dependencies = [
"memchr",
"regex-automata 0.4.6",
"regex-automata 0.4.7",
"serde",
]
@ -1618,9 +1619,9 @@ dependencies = [
[[package]]
name = "clap"
version = "4.5.6"
version = "4.5.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a9689a29b593160de5bc4aacab7b5d54fb52231de70122626c178e6a368994c7"
checksum = "5db83dced34638ad474f39f250d7fea9598bdd239eaced1bdf45d597da0f433f"
dependencies = [
"clap_builder",
"clap_derive",
@ -1628,9 +1629,9 @@ dependencies = [
[[package]]
name = "clap_builder"
version = "4.5.6"
version = "4.5.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2e5387378c84f6faa26890ebf9f0a92989f8873d4d380467bcd0d8d8620424df"
checksum = "f7e204572485eb3fbf28f871612191521df159bc3e15a9f5064c66dba3a8c05f"
dependencies = [
"anstream",
"anstyle",
@ -2749,7 +2750,7 @@ dependencies = [
"proc-macro2",
"quote",
"syn 1.0.109",
"synstructure",
"synstructure 0.12.6",
]
[[package]]
@ -3015,6 +3016,17 @@ dependencies = [
"futures-util",
]
[[package]]
name = "futures-buffered"
version = "0.2.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "02dcae03ee5afa5ea17b1aebc793806b8ddfc6dc500e0b8e8e1eb30b9dad22c0"
dependencies = [
"futures-core",
"futures-util",
"pin-project-lite",
]
[[package]]
name = "futures-channel"
version = "0.3.30"
@ -3027,11 +3039,12 @@ dependencies = [
[[package]]
name = "futures-concurrency"
version = "7.6.0"
version = "7.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "51ee14e256b9143bfafbf2fddeede6f396650bacf95d06fc1b3f2b503df129a0"
checksum = "4b14ac911e85d57c5ea6eef76d7b4d4a3177ecd15f4bea2e61927e9e3823e19f"
dependencies = [
"bitvec",
"futures-buffered",
"futures-core",
"futures-lite 1.13.0",
"pin-project",
@ -3542,8 +3555,8 @@ dependencies = [
"aho-corasick 1.1.3",
"bstr",
"log",
"regex-automata 0.4.6",
"regex-syntax 0.8.3",
"regex-automata 0.4.7",
"regex-syntax 0.8.4",
"serde",
]
@ -4009,12 +4022,12 @@ dependencies = [
[[package]]
name = "http-body-util"
version = "0.1.1"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0475f8b2ac86659c21b64320d5d653f9efe42acd2a4e560073ec61a155a34f1d"
checksum = "793429d76616a256bcb62c2a2ec2bed781c8307e797e2598c50010f2bee2544f"
dependencies = [
"bytes",
"futures-core",
"futures-util",
"http 1.1.0",
"http-body 1.0.0",
"pin-project-lite",
@ -4223,6 +4236,124 @@ dependencies = [
"objc2",
]
[[package]]
name = "icu_collections"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "db2fa452206ebee18c4b5c2274dbf1de17008e874b4dc4f0aea9d01ca79e4526"
dependencies = [
"displaydoc",
"yoke",
"zerofrom",
"zerovec",
]
[[package]]
name = "icu_locid"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "13acbb8371917fc971be86fc8057c41a64b521c184808a698c02acc242dbf637"
dependencies = [
"displaydoc",
"litemap",
"tinystr",
"writeable",
"zerovec",
]
[[package]]
name = "icu_locid_transform"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "01d11ac35de8e40fdeda00d9e1e9d92525f3f9d887cdd7aa81d727596788b54e"
dependencies = [
"displaydoc",
"icu_locid",
"icu_locid_transform_data",
"icu_provider",
"tinystr",
"zerovec",
]
[[package]]
name = "icu_locid_transform_data"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fdc8ff3388f852bede6b579ad4e978ab004f139284d7b28715f773507b946f6e"
[[package]]
name = "icu_normalizer"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "19ce3e0da2ec68599d193c93d088142efd7f9c5d6fc9b803774855747dc6a84f"
dependencies = [
"displaydoc",
"icu_collections",
"icu_normalizer_data",
"icu_properties",
"icu_provider",
"smallvec",
"utf16_iter",
"utf8_iter",
"write16",
"zerovec",
]
[[package]]
name = "icu_normalizer_data"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f8cafbf7aa791e9b22bec55a167906f9e1215fd475cd22adfcf660e03e989516"
[[package]]
name = "icu_properties"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1f8ac670d7422d7f76b32e17a5db556510825b29ec9154f235977c9caba61036"
dependencies = [
"displaydoc",
"icu_collections",
"icu_locid_transform",
"icu_properties_data",
"icu_provider",
"tinystr",
"zerovec",
]
[[package]]
name = "icu_properties_data"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "67a8effbc3dd3e4ba1afa8ad918d5684b8868b3b26500753effea8d2eed19569"
[[package]]
name = "icu_provider"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6ed421c8a8ef78d3e2dbc98a973be2f3770cb42b606e3ab18d6237c4dfde68d9"
dependencies = [
"displaydoc",
"icu_locid",
"icu_provider_macros",
"stable_deref_trait",
"tinystr",
"writeable",
"yoke",
"zerofrom",
"zerovec",
]
[[package]]
name = "icu_provider_macros"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1ec89e9337638ecdc08744df490b221a7399bf8d164eb52a665454e60e075ad6"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.66",
]
[[package]]
name = "ident_case"
version = "1.0.1"
@ -4241,12 +4372,14 @@ dependencies = [
[[package]]
name = "idna"
version = "0.5.0"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "634d9b1461af396cad843f47fdba5597a4f9e6ddd4bfb6ff5d85028c25cb12f6"
checksum = "4716a3a0933a1d01c2f72450e89596eb51dd34ef3c211ccd875acdf1f8fe47ed"
dependencies = [
"unicode-bidi",
"unicode-normalization",
"icu_normalizer",
"icu_properties",
"smallvec",
"utf8_iter",
]
[[package]]
@ -4512,9 +4645,9 @@ checksum = "f8478577c03552c21db0e2724ffb8986a5ce7af88107e6be5d2ee6e158c12800"
[[package]]
name = "iter_tools"
version = "0.17.0"
version = "0.18.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "55f9f40b3308a2367d5201430790786748b3e038982317dd880677c0f7b3f3f0"
checksum = "f85582248e8796b1d7146eabe9f70c5b9de4db16bf934ca893581d33c66403b6"
dependencies = [
"itertools 0.11.0",
]
@ -5417,6 +5550,12 @@ version = "0.4.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89"
[[package]]
name = "litemap"
version = "0.7.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "643cb0b8d4fcc284004d5fd0d67ccf61dfffadb7f75e1e71bc420f4688a3a704"
[[package]]
name = "litrs"
version = "0.4.1"
@ -8087,14 +8226,14 @@ dependencies = [
[[package]]
name = "regex"
version = "1.10.4"
version = "1.10.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c117dbdfde9c8308975b6a18d71f3f385c89461f7b3fb054288ecf2a2058ba4c"
checksum = "b91213439dad192326a0d7c6ee3955910425f441d7038e0d6933b0aec5c4517f"
dependencies = [
"aho-corasick 1.1.3",
"memchr",
"regex-automata 0.4.6",
"regex-syntax 0.8.3",
"regex-automata 0.4.7",
"regex-syntax 0.8.4",
]
[[package]]
@ -8108,20 +8247,20 @@ dependencies = [
[[package]]
name = "regex-automata"
version = "0.4.6"
version = "0.4.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "86b83b8b9847f9bf95ef68afb0b8e6cdb80f498442f5179a29fad448fcc1eaea"
checksum = "38caf58cc5ef2fed281f89292ef23f6365465ed9a41b7a7754eb4e26496c92df"
dependencies = [
"aho-corasick 1.1.3",
"memchr",
"regex-syntax 0.8.3",
"regex-syntax 0.8.4",
]
[[package]]
name = "regex-lite"
version = "0.1.5"
version = "0.1.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "30b661b2f27137bdbc16f00eda72866a92bb28af1753ffbd56744fb6e2e9cd8e"
checksum = "53a49587ad06b26609c52e423de037e7f57f20d53535d66e08c695f347df952a"
[[package]]
name = "regex-syntax"
@ -8131,9 +8270,9 @@ checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1"
[[package]]
name = "regex-syntax"
version = "0.8.3"
version = "0.8.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "adad44e29e4c806119491a7f06f03de4d1af22c3a680dd47f1e6e179439d1f56"
checksum = "7a66a03ae7c801facd77a29370b4faec201768915ac14a721ba36f20bc9c209b"
[[package]]
name = "renderdoc-sys"
@ -8864,6 +9003,7 @@ dependencies = [
"sd-p2p-tunnel",
"sd-prisma",
"sd-sync",
"sd-task-system",
"sd-utils",
"serde",
"serde-hashkey",
@ -8984,7 +9124,10 @@ version = "0.1.0"
dependencies = [
"prisma-client-rust",
"sd-prisma",
"sd-utils",
"serde",
"specta",
"uuid",
]
[[package]]
@ -9329,6 +9472,7 @@ dependencies = [
"tokio",
"tokio-stream",
"tracing",
"tracing-subscriber",
"tracing-test",
"uuid",
]
@ -10227,6 +10371,17 @@ dependencies = [
"unicode-xid",
]
[[package]]
name = "synstructure"
version = "0.13.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c8af7666ab7b6390ab78131fb5b0fce11d6b7a6951602017c35fa82800708971"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.66",
]
[[package]]
name = "sys-locale"
version = "0.3.1"
@ -10851,6 +11006,16 @@ dependencies = [
"strict-num",
]
[[package]]
name = "tinystr"
version = "0.7.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9117f5d4db391c1cf6927e7bea3db74b9a1c1add8f7eda9ffd5364f40f57b82f"
dependencies = [
"displaydoc",
"zerovec",
]
[[package]]
name = "tinyvec"
version = "1.6.0"
@ -11516,12 +11681,12 @@ dependencies = [
[[package]]
name = "url"
version = "2.5.0"
version = "2.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "31e6302e3bb753d46e83516cae55ae196fc0c309407cf11ab35cc51a4c2a4633"
checksum = "f7c25da092f0a868cdf09e8674cd3b7ef3a7d92a24253e663a2fb85e2496de56"
dependencies = [
"form_urlencoded",
"idna 0.5.0",
"idna 1.0.0",
"percent-encoding",
"serde",
]
@ -11603,6 +11768,12 @@ version = "0.7.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9"
[[package]]
name = "utf16_iter"
version = "1.0.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c8232dd3cdaed5356e0f716d285e4b40b932ac434100fe9b7e0e8e935b9e6246"
[[package]]
name = "utf16string"
version = "0.2.0"
@ -11612,6 +11783,12 @@ dependencies = [
"byteorder",
]
[[package]]
name = "utf8_iter"
version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be"
[[package]]
name = "utf8parse"
version = "0.2.2"
@ -12521,6 +12698,18 @@ dependencies = [
"windows-sys 0.48.0",
]
[[package]]
name = "write16"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d1890f4022759daae28ed4fe62859b1236caebfc61ede2f63ed4e695f3f6d936"
[[package]]
name = "writeable"
version = "0.5.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1e9df38ee2d2c3c5948ea468a8406ff0db0b29ae1ffde1bcf20ef305bcc95c51"
[[package]]
name = "wry"
version = "0.39.5"
@ -12635,12 +12824,12 @@ dependencies = [
[[package]]
name = "xdg-home"
version = "1.1.0"
version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "21e5a325c3cb8398ad6cf859c1135b25dd29e186679cf2da7581d9679f63b38e"
checksum = "ca91dcf8f93db085f3a0a29358cd0b9d670915468f4290e8b85d118a34211ab8"
dependencies = [
"libc",
"winapi",
"windows-sys 0.52.0",
]
[[package]]
@ -12710,6 +12899,30 @@ dependencies = [
"time",
]
[[package]]
name = "yoke"
version = "0.7.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6c5b1314b079b0930c31e3af543d8ee1757b1951ae1e1565ec704403a7240ca5"
dependencies = [
"serde",
"stable_deref_trait",
"yoke-derive",
"zerofrom",
]
[[package]]
name = "yoke-derive"
version = "0.7.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "28cc31741b18cb6f1d5ff12f5b7523e3d6eb0852bbbad19d73905511d9849b95"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.66",
"synstructure 0.13.1",
]
[[package]]
name = "zbus"
version = "4.0.1"
@ -12789,6 +13002,27 @@ dependencies = [
"syn 2.0.66",
]
[[package]]
name = "zerofrom"
version = "0.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "91ec111ce797d0e0784a1116d0ddcdbea84322cd79e5d5ad173daeba4f93ab55"
dependencies = [
"zerofrom-derive",
]
[[package]]
name = "zerofrom-derive"
version = "0.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0ea7b4a3637ea8669cedf0f1fd5c286a17f3de97b8dd5a70a6c167a1730e63a5"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.66",
"synstructure 0.13.1",
]
[[package]]
name = "zeroize"
version = "1.8.1"
@ -12809,6 +13043,28 @@ dependencies = [
"syn 2.0.66",
]
[[package]]
name = "zerovec"
version = "0.10.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bb2cc8827d6c0994478a15c53f374f46fbd41bea663d809b14744bc42e6b109c"
dependencies = [
"yoke",
"zerofrom",
"zerovec-derive",
]
[[package]]
name = "zerovec-derive"
version = "0.10.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "97cf56601ee5052b4417d90c8755c6683473c926039908196cf35d99f893ebe7"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.66",
]
[[package]]
name = "zip"
version = "0.6.6"

View file

@ -114,6 +114,17 @@ lto = false
codegen-units = 256
incremental = true
[profile.dev-debug]
inherits = "dev"
# Enables debugger
split-debuginfo = "none"
opt-level = 0
debug = "full"
strip = "none"
lto = "off"
codegen-units = 256
incremental = true
# Set the settings for build scripts and proc-macros.
[profile.dev.build-override]
opt-level = 3
@ -123,6 +134,13 @@ opt-level = 3
opt-level = 3
incremental = false
# Set the default for dependencies, except workspace members.
[profile.dev-debug.package."*"]
inherits = "dev"
opt-level = 3
debug = "full"
incremental = false
# Optimize release builds
[profile.release]
panic = "abort" # Strip expensive panic clean-up logic

View file

@ -32,7 +32,7 @@ async fn app_ready(app_handle: AppHandle) {
#[tauri::command(async)]
#[specta::specta]
// If this erorrs, we don't have FDA and we need to re-prompt for it
// If this errors, we don't have FDA and we need to re-prompt for it
async fn request_fda_macos() {
DiskAccess::request_fda().expect("Unable to request full disk access");
}

View file

@ -45,9 +45,11 @@ function constructServerUrl(urlSuffix: string) {
export const platform = {
platform: 'tauri',
getThumbnailUrlByThumbKey: (keyParts) =>
getThumbnailUrlByThumbKey: (thumbKey) =>
constructServerUrl(
`/thumbnail/${keyParts.map((i) => encodeURIComponent(i)).join('/')}.webp`
`/thumbnail/${encodeURIComponent(
thumbKey.base_directory_str
)}/${encodeURIComponent(thumbKey.shard_hex)}/${encodeURIComponent(thumbKey.cas_id)}.webp`
),
getFileUrl: (libraryId, locationLocalId, filePathId) =>
constructServerUrl(`/file/${libraryId}/${locationLocalId}/${filePathId}`),

View file

@ -76,8 +76,8 @@ pub fn handle_core_msg(
let new_node = match Node::new(data_dir, sd_core::Env::new(CLIENT_ID)).await {
Ok(node) => node,
Err(err) => {
error!("failed to initialise node: {}", err);
Err(e) => {
error!(?e, "Failed to initialize node;");
callback(Err(query));
return;
}
@ -94,8 +94,8 @@ pub fn handle_core_msg(
false => from_value::<Request>(v).map(|v| vec![v]),
}) {
Ok(v) => v,
Err(err) => {
error!("failed to decode JSON-RPC request: {}", err); // Don't use tracing here because it's before the `Node` is initialised which sets that config!
Err(e) => {
error!(?e, "Failed to decode JSON-RPC request;");
callback(Err(query));
return;
}
@ -133,8 +133,8 @@ pub fn spawn_core_event_listener(callback: impl Fn(String) + Send + 'static) {
while let Some(event) = rx.next().await {
let data = match to_string(&event) {
Ok(json) => json,
Err(err) => {
error!("Failed to serialize event: {err}");
Err(e) => {
error!(?e, "Failed to serialize event;");
continue;
}
};

View file

@ -1,24 +1,25 @@
import { DocumentDirectoryPath } from '@dr.pogodin/react-native-fs';
import { getIcon } from '@sd/assets/util';
import { Image } from 'expo-image';
import { useEffect, useLayoutEffect, useMemo, useState, type PropsWithChildren } from 'react';
import { View } from 'react-native';
import {
getExplorerItemData,
getItemFilePath,
getItemLocation,
isDarkTheme,
ThumbKey,
type ExplorerItem
} from '@sd/client';
import { Image } from 'expo-image';
import { useEffect, useLayoutEffect, useMemo, useState, type PropsWithChildren } from 'react';
import { View } from 'react-native';
import { flattenThumbnailKey, useExplorerStore } from '~/stores/explorerStore';
import { tw } from '../../lib/tailwind';
// NOTE: `file://` is required for Android to load local files!
export const getThumbnailUrlByThumbKey = (thumbKey: string[]) => {
return `file://${DocumentDirectoryPath}/thumbnails/${thumbKey
.map((i) => encodeURIComponent(i))
.join('/')}.webp`;
export const getThumbnailUrlByThumbKey = (thumbKey: ThumbKey) => {
return `file://${DocumentDirectoryPath}/thumbnails/${encodeURIComponent(
thumbKey.base_directory_str
)}/${encodeURIComponent(thumbKey.shard_hex)}/${encodeURIComponent(thumbKey.cas_id)}.webp`;
};
const FileThumbWrapper = ({ children, size = 1 }: PropsWithChildren<{ size: number }>) => (

View file

@ -1,4 +1,3 @@
import { JobProgressEvent, JobReport, useJobInfo } from '@sd/client';
import {
Copy,
Fingerprint,
@ -11,13 +10,14 @@ import {
} from 'phosphor-react-native';
import { memo } from 'react';
import { View, ViewStyle } from 'react-native';
import { JobProgressEvent, Report, useJobInfo } from '@sd/client';
import { tw, twStyle } from '~/lib/tailwind';
import { ProgressBar } from '../animation/ProgressBar';
import JobContainer from './JobContainer';
type JobProps = {
job: JobReport;
job: Report;
isChild?: boolean;
containerStyle?: ViewStyle;
progress: JobProgressEvent | null;

View file

@ -1,19 +1,19 @@
import { Folder } from '@sd/assets/icons';
import {
getJobNiceActionName,
getTotalTasks,
JobGroup,
JobProgressEvent,
JobReport,
useLibraryMutation,
useRspcLibraryContext,
useTotalElapsedTimeText
} from '@sd/client';
import dayjs from 'dayjs';
import { DotsThreeVertical, Eye, Pause, Play, Stop, Trash } from 'phosphor-react-native';
import { SetStateAction, useMemo, useState } from 'react';
import { Animated, Pressable, View } from 'react-native';
import { Swipeable } from 'react-native-gesture-handler';
import {
getJobNiceActionName,
getTotalTasks,
JobGroup,
JobProgressEvent,
Report,
useLibraryMutation,
useRspcLibraryContext,
useTotalElapsedTimeText
} from '@sd/client';
import { tw, twStyle } from '~/lib/tailwind';
import { AnimatedHeight } from '../animation/layout';
@ -64,7 +64,12 @@ export default function ({ group, progress }: JobGroupProps) {
{ transform: [{ translateX: translate }] }
]}
>
<Options showChildJobs={showChildJobs} setShowChildJobs={setShowChildJobs} activeJob={runningJob} group={group} />
<Options
showChildJobs={showChildJobs}
setShowChildJobs={setShowChildJobs}
activeJob={runningJob}
group={group}
/>
</Animated.View>
);
};
@ -169,22 +174,20 @@ const toastErrorSuccess = (
};
interface OptionsProps {
activeJob?: JobReport;
activeJob?: Report;
group: JobGroup;
showChildJobs: boolean;
setShowChildJobs: React.Dispatch<SetStateAction<boolean>>
setShowChildJobs: React.Dispatch<SetStateAction<boolean>>;
}
function Options({ activeJob, group, setShowChildJobs, showChildJobs }: OptionsProps) {
const rspc = useRspcLibraryContext();
const clearJob = useLibraryMutation(
['jobs.clear'], {
onSuccess: () => {
rspc.queryClient.invalidateQueries(['jobs.reports']);
}
})
const clearJob = useLibraryMutation(['jobs.clear'], {
onSuccess: () => {
rspc.queryClient.invalidateQueries(['jobs.reports']);
}
});
const resumeJob = useLibraryMutation(
['jobs.resume'],
@ -208,8 +211,7 @@ function Options({ activeJob, group, setShowChildJobs, showChildJobs }: OptionsP
group.jobs.forEach((job) => {
clearJob.mutate(job.id);
//only one toast for all jobs
if (job.id === group.id)
toast.success('Job has been removed');
if (job.id === group.id) toast.success('Job has been removed');
});
};
@ -217,35 +219,68 @@ function Options({ activeJob, group, setShowChildJobs, showChildJobs }: OptionsP
<>
{/* Resume */}
{(group.status === 'Queued' || group.status === 'Paused' || isJobPaused) && (
<Button style={tw`h-7 w-7`} variant="outline" size="sm" onPress={() => resumeJob.mutate(group.id)}>
<Button
style={tw`h-7 w-7`}
variant="outline"
size="sm"
onPress={() =>
resumeJob.mutate(
group.running_job_id != null ? group.running_job_id : group.id
)
}
>
<Play size={16} color="white" />
</Button>
)}
{/* TODO: This should remove the job from panel */}
{!activeJob !== undefined ? (
<Menu
containerStyle={tw`max-w-25`}
trigger={
<View style={tw`flex h-7 w-7 flex-row items-center justify-center rounded-md border border-app-inputborder`}>
<DotsThreeVertical size={16} color="white" />
</View>
}
>
<MenuItem
style={twStyle(showChildJobs ? 'rounded bg-app-screen/50' : 'bg-transparent')}
onSelect={() => setShowChildJobs(!showChildJobs)}
text="Expand" icon={Eye}/>
<MenuItem onSelect={clearJobHandler} text='Remove' icon={Trash}/>
</Menu>
) : (
{activeJob !== undefined ? (
<View style={tw`flex flex-row gap-2`}>
<Button style={tw`h-7 w-7`} variant="outline" size="sm" onPress={() => pauseJob.mutate(group.id)}>
<Button
style={tw`h-7 w-7`}
variant="outline"
size="sm"
onPress={() =>
pauseJob.mutate(
group.running_job_id != null ? group.running_job_id : group.id
)
}
>
<Pause size={16} color="white" />
</Button>
<Button style={tw`h-7 w-7`} variant="outline" size="sm" onPress={() => cancelJob.mutate(group.id)}>
<Button
style={tw`h-7 w-7`}
variant="outline"
size="sm"
onPress={() =>
cancelJob.mutate(
group.running_job_id != null ? group.running_job_id : group.id
)
}
>
<Stop size={16} color="white" />
</Button>
</View>
) : (
<Menu
containerStyle={tw`max-w-25`}
trigger={
<View
style={tw`flex h-7 w-7 flex-row items-center justify-center rounded-md border border-app-inputborder`}
>
<DotsThreeVertical size={16} color="white" />
</View>
}
>
<MenuItem
style={twStyle(
showChildJobs ? 'rounded bg-app-screen/50' : 'bg-transparent'
)}
onSelect={() => setShowChildJobs(!showChildJobs)}
text="Expand"
icon={Eye}
/>
<MenuItem onSelect={clearJobHandler} text="Remove" icon={Trash} />
</Menu>
)}
</>
);

View file

@ -1,4 +1,4 @@
import { resetStore } from '@sd/client';
import { ThumbKey, resetStore } from '@sd/client';
import { proxy, useSnapshot } from 'valtio';
import { proxySet } from 'valtio/utils';
@ -26,14 +26,14 @@ const state = {
orderDirection: 'Asc' as 'Asc' | 'Desc'
};
export function flattenThumbnailKey(thumbKey: string[]) {
return thumbKey.join('/');
export function flattenThumbnailKey(thumbKey: ThumbKey) {
return `${thumbKey.base_directory_str}/${thumbKey.shard_hex}/${thumbKey.cas_id}`;
}
const store = proxy({
...state,
reset: () => resetStore(store, state),
addNewThumbnail: (thumbKey: string[]) => {
addNewThumbnail: (thumbKey: ThumbKey) => {
store.newThumbnails.add(flattenThumbnailKey(thumbKey));
},
// this should be done when the explorer query is refreshed

View file

@ -42,8 +42,10 @@ const spacedriveURL = (() => {
const platform: Platform = {
platform: 'web',
getThumbnailUrlByThumbKey: (keyParts) =>
`${spacedriveURL}/thumbnail/${keyParts.map((i) => encodeURIComponent(i)).join('/')}.webp`,
getThumbnailUrlByThumbKey: (thumbKey) =>
`${spacedriveURL}/thumbnail/${encodeURIComponent(
thumbKey.base_directory_str
)}/${encodeURIComponent(thumbKey.shard_hex)}/${encodeURIComponent(thumbKey.cas_id)}.webp`,
getFileUrl: (libraryId, locationLocalId, filePathId) =>
`${spacedriveURL}/file/${encodeURIComponent(libraryId)}/${encodeURIComponent(
locationLocalId

View file

@ -47,6 +47,7 @@ sd-p2p-proto = { path = "../crates/p2p/crates/proto" }
sd-p2p-tunnel = { path = "../crates/p2p/crates/tunnel" }
sd-prisma = { path = "../crates/prisma" }
sd-sync = { path = "../crates/sync" }
sd-task-system = { path = "../crates/task-system" }
sd-utils = { path = "../crates/utils" }
# Workspace dependencies

View file

@ -1,3 +1,5 @@
use sd_core_prisma_helpers::CasId;
use std::path::Path;
use blake3::Hasher;
@ -6,6 +8,7 @@ use tokio::{
fs::{self, File},
io::{self, AsyncReadExt, AsyncSeekExt, SeekFrom},
};
use tracing::{instrument, trace, Level};
const SAMPLE_COUNT: u64 = 4;
const SAMPLE_SIZE: u64 = 1024 * 10;
@ -20,20 +23,29 @@ const_assert!((HEADER_OR_FOOTER_SIZE * 2 + SAMPLE_COUNT * SAMPLE_SIZE) < MINIMUM
// Asserting that the sample size is larger than header/footer size, as the same buffer is used for both
const_assert!(SAMPLE_SIZE > HEADER_OR_FOOTER_SIZE);
#[instrument(
skip(path),
ret(level = Level::TRACE),
err,
fields(path = %path.as_ref().display()
))]
// SAFETY: Casts here are safe, they're hardcoded values we have some const assertions above to make sure they're correct
#[allow(clippy::cast_possible_truncation)]
#[allow(clippy::cast_possible_wrap)]
pub async fn generate_cas_id(
path: impl AsRef<Path> + Send,
size: u64,
) -> Result<String, io::Error> {
) -> Result<CasId<'static>, io::Error> {
let mut hasher = Hasher::new();
hasher.update(&size.to_le_bytes());
if size <= MINIMUM_FILE_SIZE {
trace!("File is small, hashing the whole file");
// For small files, we hash the whole file
hasher.update(&fs::read(path).await?);
} else {
trace!("File bigger than threshold, hashing samples");
let mut file = File::open(path).await?;
let mut buf = vec![0; SAMPLE_SIZE as usize].into_boxed_slice();
@ -64,5 +76,5 @@ pub async fn generate_cas_id(
hasher.update(&buf[..HEADER_OR_FOOTER_SIZE as usize]);
}
Ok(hasher.finalize().to_hex()[..16].to_string())
Ok(hasher.finalize().to_hex()[..16].to_string().into())
}

File diff suppressed because it is too large Load diff

View file

@ -1,12 +1,20 @@
use crate::utils::sub_path;
use crate::{utils::sub_path, OuterContext};
use sd_core_file_path_helper::{FilePathError, IsolatedFilePathData};
use sd_core_prisma_helpers::CasId;
use sd_file_ext::{extensions::Extension, kind::ObjectKind};
use sd_prisma::prisma::{file_path, location};
use sd_task_system::{TaskDispatcher, TaskHandle};
use sd_utils::{db::MissingFieldError, error::FileIOError};
use std::{fs::Metadata, path::Path};
use std::{
collections::{hash_map::Entry, HashMap},
fs::Metadata,
mem,
path::Path,
sync::Arc,
};
use prisma_client_rust::{or, QueryError};
use rspc::ErrorCode;
@ -20,11 +28,13 @@ pub mod job;
mod shallow;
mod tasks;
use cas_id::generate_cas_id;
pub use cas_id::generate_cas_id;
pub use job::FileIdentifier;
pub use shallow::shallow;
use tasks::FilePathToCreateOrLinkObject;
// we break these tasks into chunks of 100 to improve performance
const CHUNK_SIZE: usize = 100;
@ -44,17 +54,18 @@ pub enum Error {
}
impl From<Error> for rspc::Error {
fn from(err: Error) -> Self {
match err {
fn from(e: Error) -> Self {
match e {
Error::SubPath(sub_path_err) => sub_path_err.into(),
_ => Self::with_cause(ErrorCode::InternalServerError, err.to_string(), err),
_ => Self::with_cause(ErrorCode::InternalServerError, e.to_string(), e),
}
}
}
#[derive(thiserror::Error, Debug, Serialize, Deserialize, Type)]
pub enum NonCriticalError {
#[derive(thiserror::Error, Debug, Serialize, Deserialize, Type, Clone)]
#[serde(rename_all = "snake_case")]
pub enum NonCriticalFileIdentifierError {
#[error("failed to extract file metadata: {0}")]
FailedToExtractFileMetadata(String),
#[cfg(target_os = "windows")]
@ -66,7 +77,7 @@ pub enum NonCriticalError {
#[derive(Debug, Clone)]
pub struct FileMetadata {
pub cas_id: Option<String>,
pub cas_id: Option<CasId<'static>>,
pub kind: ObjectKind,
pub fs_metadata: Metadata,
}
@ -87,10 +98,14 @@ impl FileMetadata {
.await
.map_err(|e| FileIOError::from((&path, e)))?;
assert!(
!fs_metadata.is_dir(),
"We can't generate cas_id for directories"
);
if fs_metadata.is_dir() {
trace!(path = %path.display(), "Skipping directory;");
return Ok(Self {
cas_id: None,
kind: ObjectKind::Folder,
fs_metadata,
});
}
// derive Object kind
let kind = Extension::resolve_conflicting(&path, false)
@ -108,8 +123,10 @@ impl FileMetadata {
};
trace!(
"Analyzed file: <path='{}', cas_id={cas_id:?}, object_kind={kind}>",
path.display()
path = %path.display(),
?cas_id,
%kind,
"Analyzed file;",
);
Ok(Self {
@ -140,7 +157,7 @@ fn orphan_path_filters_shallow(
)),
file_path::size_in_bytes_bytes::not(Some(0u64.to_be_bytes().to_vec())),
],
[file_path_id.map(file_path::id::gte)],
[file_path_id.map(file_path::id::gt)],
)
}
@ -161,7 +178,7 @@ fn orphan_path_filters_deep(
],
[
// this is a workaround for the cursor not working properly
file_path_id.map(file_path::id::gte),
file_path_id.map(file_path::id::gt),
maybe_sub_iso_file_path.as_ref().map(|sub_iso_file_path| {
file_path::materialized_path::starts_with(
sub_iso_file_path
@ -172,3 +189,91 @@ fn orphan_path_filters_deep(
],
)
}
async fn dispatch_object_processor_tasks<Iter, Dispatcher>(
file_paths_by_cas_id: Iter,
ctx: &impl OuterContext,
dispatcher: &Dispatcher,
with_priority: bool,
) -> Result<Vec<TaskHandle<crate::Error>>, Dispatcher::DispatchError>
where
Iter: IntoIterator<Item = (CasId<'static>, Vec<FilePathToCreateOrLinkObject>)> + Send,
Iter::IntoIter: Send,
Dispatcher: TaskDispatcher<crate::Error>,
{
let mut current_batch = HashMap::<_, Vec<_>>::new();
let mut tasks = vec![];
let mut current_batch_size = 0;
for (cas_id, objects_to_create_or_link) in file_paths_by_cas_id {
if objects_to_create_or_link.len() >= CHUNK_SIZE {
tasks.push(
dispatcher
.dispatch(tasks::ObjectProcessor::new(
HashMap::from([(cas_id, objects_to_create_or_link)]),
Arc::clone(ctx.db()),
Arc::clone(ctx.sync()),
with_priority,
))
.await?,
);
} else {
current_batch_size += objects_to_create_or_link.len();
match current_batch.entry(cas_id) {
Entry::Occupied(entry) => {
entry.into_mut().extend(objects_to_create_or_link);
}
Entry::Vacant(entry) => {
entry.insert(objects_to_create_or_link);
}
}
if current_batch_size >= CHUNK_SIZE {
tasks.push(
dispatcher
.dispatch(tasks::ObjectProcessor::new(
mem::take(&mut current_batch),
Arc::clone(ctx.db()),
Arc::clone(ctx.sync()),
with_priority,
))
.await?,
);
current_batch_size = 0;
}
}
}
if !current_batch.is_empty() {
tasks.push(
dispatcher
.dispatch(tasks::ObjectProcessor::new(
current_batch,
Arc::clone(ctx.db()),
Arc::clone(ctx.sync()),
with_priority,
))
.await?,
);
}
Ok(tasks)
}
fn accumulate_file_paths_by_cas_id(
input: HashMap<CasId<'static>, Vec<FilePathToCreateOrLinkObject>>,
accumulator: &mut HashMap<CasId<'static>, Vec<FilePathToCreateOrLinkObject>>,
) {
for (cas_id, file_paths) in input {
match accumulator.entry(cas_id) {
Entry::<_, Vec<_>>::Occupied(entry) => {
entry.into_mut().extend(file_paths);
}
Entry::Vacant(entry) => {
entry.insert(file_paths);
}
}
}
}

View file

@ -1,6 +1,6 @@
use crate::{
file_identifier, utils::sub_path::maybe_get_iso_file_path_from_sub_path, Error,
NonCriticalError, OuterContext,
NonCriticalError, OuterContext, UpdateEvent,
};
use sd_core_file_path_helper::IsolatedFilePathData;
@ -8,34 +8,40 @@ use sd_core_prisma_helpers::file_path_for_file_identifier;
use sd_prisma::prisma::{file_path, location, SortOrder};
use sd_task_system::{
BaseTaskDispatcher, CancelTaskOnDrop, TaskDispatcher, TaskOutput, TaskStatus,
BaseTaskDispatcher, CancelTaskOnDrop, TaskDispatcher, TaskHandle, TaskOutput, TaskStatus,
};
use sd_utils::db::maybe_missing;
use std::{
collections::HashMap,
path::{Path, PathBuf},
sync::Arc,
};
use futures_concurrency::future::FutureGroup;
use lending_stream::{LendingStream, StreamExt};
use tracing::{debug, warn};
use futures::{stream::FuturesUnordered, StreamExt};
use tracing::{debug, instrument, trace, warn};
use super::{
orphan_path_filters_shallow,
tasks::{
extract_file_metadata, object_processor, ExtractFileMetadataTask, ObjectProcessorTask,
},
accumulate_file_paths_by_cas_id, dispatch_object_processor_tasks, orphan_path_filters_shallow,
tasks::{self, identifier, object_processor},
CHUNK_SIZE,
};
#[instrument(
skip_all,
fields(
location_id = location.id,
location_path = ?location.path,
sub_path = %sub_path.as_ref().display()
)
err,
)]
pub async fn shallow(
location: location::Data,
sub_path: impl AsRef<Path> + Send,
dispatcher: BaseTaskDispatcher<Error>,
ctx: impl OuterContext,
dispatcher: &BaseTaskDispatcher<Error>,
ctx: &impl OuterContext,
) -> Result<Vec<NonCriticalError>, Error> {
let sub_path = sub_path.as_ref();
let db = ctx.db();
let location_path = maybe_missing(&location.path, "location.path")
@ -45,22 +51,25 @@ pub async fn shallow(
let location = Arc::new(location);
let sub_iso_file_path =
maybe_get_iso_file_path_from_sub_path(location.id, &Some(sub_path), &*location_path, db)
.await
.map_err(file_identifier::Error::from)?
.map_or_else(
|| {
IsolatedFilePathData::new(location.id, &*location_path, &*location_path, true)
.map_err(file_identifier::Error::from)
},
Ok,
)?;
let sub_iso_file_path = maybe_get_iso_file_path_from_sub_path::<file_identifier::Error>(
location.id,
Some(sub_path.as_ref()),
&*location_path,
db,
)
.await?
.map_or_else(
|| {
IsolatedFilePathData::new(location.id, &*location_path, &*location_path, true)
.map_err(file_identifier::Error::from)
},
Ok,
)?;
let mut orphans_count = 0;
let mut last_orphan_file_path_id = None;
let mut pending_running_tasks = FutureGroup::new();
let mut identifier_tasks = vec![];
loop {
#[allow(clippy::cast_possible_wrap)]
@ -87,70 +96,89 @@ pub async fn shallow(
orphans_count += orphan_paths.len() as u64;
last_orphan_file_path_id = Some(last_orphan.id);
pending_running_tasks.insert(CancelTaskOnDrop(
dispatcher
.dispatch(ExtractFileMetadataTask::new(
Arc::clone(&location),
Arc::clone(&location_path),
orphan_paths,
true,
))
.await,
));
let Ok(tasks) = dispatcher
.dispatch(tasks::Identifier::new(
Arc::clone(&location),
Arc::clone(&location_path),
orphan_paths,
true,
Arc::clone(ctx.db()),
Arc::clone(ctx.sync()),
))
.await
else {
debug!("Task system is shutting down while a shallow file identifier was in progress");
return Ok(vec![]);
};
identifier_tasks.push(tasks);
}
if orphans_count == 0 {
debug!(
"No orphans found on <location_id={}, sub_path='{}'>",
location.id,
sub_path.display()
);
trace!("No orphans found");
return Ok(vec![]);
}
let errors = process_tasks(pending_running_tasks, dispatcher, ctx).await?;
Ok(errors)
process_tasks(identifier_tasks, dispatcher, ctx).await
}
async fn process_tasks(
pending_running_tasks: FutureGroup<CancelTaskOnDrop<Error>>,
dispatcher: BaseTaskDispatcher<Error>,
ctx: impl OuterContext,
identifier_tasks: Vec<TaskHandle<Error>>,
dispatcher: &BaseTaskDispatcher<Error>,
ctx: &impl OuterContext,
) -> Result<Vec<NonCriticalError>, Error> {
let mut pending_running_tasks = pending_running_tasks.lend_mut();
let total_identifier_tasks = identifier_tasks.len();
let db = ctx.db();
let sync = ctx.sync();
let mut pending_running_tasks = identifier_tasks
.into_iter()
.map(CancelTaskOnDrop::new)
.collect::<FuturesUnordered<_>>();
let mut errors = vec![];
let mut completed_identifier_tasks = 0;
let mut file_paths_accumulator = HashMap::new();
while let Some((pending_running_tasks, task_result)) = pending_running_tasks.next().await {
while let Some(task_result) = pending_running_tasks.next().await {
match task_result {
Ok(TaskStatus::Done((_, TaskOutput::Out(any_task_output)))) => {
// We only care about ExtractFileMetadataTaskOutput because we need to dispatch further tasks
// and the ObjectProcessorTask only gives back some metrics not much important for
// shallow file identifier
if any_task_output.is::<extract_file_metadata::Output>() {
let extract_file_metadata::Output {
identified_files,
if any_task_output.is::<identifier::Output>() {
let identifier::Output {
file_path_ids_with_new_object,
file_paths_by_cas_id,
errors: more_errors,
..
} = *any_task_output.downcast().expect("just checked");
completed_identifier_tasks += 1;
ctx.report_update(UpdateEvent::NewIdentifiedObjects {
file_path_ids: file_path_ids_with_new_object,
});
accumulate_file_paths_by_cas_id(
file_paths_by_cas_id,
&mut file_paths_accumulator,
);
errors.extend(more_errors);
if !identified_files.is_empty() {
pending_running_tasks.insert(CancelTaskOnDrop(
dispatcher
.dispatch(ObjectProcessorTask::new(
identified_files,
Arc::clone(db),
Arc::clone(sync),
true,
))
.await,
));
if total_identifier_tasks == completed_identifier_tasks {
let Ok(tasks) = dispatch_object_processor_tasks(
file_paths_accumulator.drain(),
ctx,
dispatcher,
true,
)
.await
else {
debug!("Task system is shutting down while a shallow file identifier was in progress");
continue;
};
pending_running_tasks.extend(tasks.into_iter().map(CancelTaskOnDrop::new));
}
} else {
let object_processor::Output {
@ -158,21 +186,21 @@ async fn process_tasks(
..
} = *any_task_output.downcast().expect("just checked");
ctx.report_update(crate::UpdateEvent::NewIdentifiedObjects {
ctx.report_update(UpdateEvent::NewIdentifiedObjects {
file_path_ids: file_path_ids_with_new_object,
});
}
}
Ok(TaskStatus::Done((task_id, TaskOutput::Empty))) => {
warn!("Task <id='{task_id}'> returned an empty output");
warn!(%task_id, "Task returned an empty output");
}
Ok(TaskStatus::Shutdown(_)) => {
debug!(
"Spacedrive is shutting down while a shallow file identifier was in progress"
);
return Ok(vec![]);
continue;
}
Ok(TaskStatus::Error(e)) => {
@ -181,7 +209,7 @@ async fn process_tasks(
Ok(TaskStatus::Canceled | TaskStatus::ForcedAbortion) => {
warn!("Task was cancelled or aborted on shallow file identifier");
return Ok(vec![]);
return Ok(errors);
}
Err(e) => {

View file

@ -1,267 +0,0 @@
use crate::{
file_identifier::{self, FileMetadata},
Error, NonCriticalError,
};
use sd_core_file_path_helper::IsolatedFilePathData;
use sd_core_prisma_helpers::file_path_for_file_identifier;
use sd_prisma::prisma::location;
use sd_task_system::{
ExecStatus, Interrupter, InterruptionKind, IntoAnyTaskOutput, SerializableTask, Task, TaskId,
};
use sd_utils::error::FileIOError;
use std::{
collections::HashMap, future::IntoFuture, mem, path::PathBuf, pin::pin, sync::Arc,
time::Duration,
};
use futures::stream::{self, FuturesUnordered, StreamExt};
use futures_concurrency::stream::Merge;
use serde::{Deserialize, Serialize};
use tokio::time::Instant;
use tracing::error;
use uuid::Uuid;
use super::IdentifiedFile;
#[derive(Debug, Serialize, Deserialize)]
pub struct ExtractFileMetadataTask {
id: TaskId,
location: Arc<location::Data>,
location_path: Arc<PathBuf>,
file_paths_by_id: HashMap<Uuid, file_path_for_file_identifier::Data>,
identified_files: HashMap<Uuid, IdentifiedFile>,
extract_metadata_time: Duration,
errors: Vec<NonCriticalError>,
with_priority: bool,
}
#[derive(Debug)]
pub struct Output {
pub identified_files: HashMap<Uuid, IdentifiedFile>,
pub extract_metadata_time: Duration,
pub errors: Vec<NonCriticalError>,
}
impl ExtractFileMetadataTask {
#[must_use]
pub fn new(
location: Arc<location::Data>,
location_path: Arc<PathBuf>,
file_paths: Vec<file_path_for_file_identifier::Data>,
with_priority: bool,
) -> Self {
Self {
id: TaskId::new_v4(),
location,
location_path,
identified_files: HashMap::with_capacity(file_paths.len()),
file_paths_by_id: file_paths
.into_iter()
.map(|file_path| {
// SAFETY: This should never happen
(
Uuid::from_slice(&file_path.pub_id).expect("file_path.pub_id is invalid!"),
file_path,
)
})
.collect(),
extract_metadata_time: Duration::ZERO,
errors: Vec::new(),
with_priority,
}
}
}
#[async_trait::async_trait]
impl Task<Error> for ExtractFileMetadataTask {
fn id(&self) -> TaskId {
self.id
}
fn with_priority(&self) -> bool {
self.with_priority
}
async fn run(&mut self, interrupter: &Interrupter) -> Result<ExecStatus, Error> {
// `Processed` is larger than `Interrupt`, but it's much more common
// so we ignore the size difference to optimize for usage
#[allow(clippy::large_enum_variant)]
enum StreamMessage {
Processed(Uuid, Result<FileMetadata, FileIOError>),
Interrupt(InterruptionKind),
}
let Self {
location,
location_path,
file_paths_by_id,
identified_files,
extract_metadata_time,
errors,
..
} = self;
let start_time = Instant::now();
if !file_paths_by_id.is_empty() {
let extraction_futures = file_paths_by_id
.iter()
.filter_map(|(file_path_id, file_path)| {
try_iso_file_path_extraction(
location.id,
*file_path_id,
file_path,
Arc::clone(location_path),
errors,
)
})
.map(|(file_path_id, iso_file_path, location_path)| async move {
StreamMessage::Processed(
file_path_id,
FileMetadata::new(&*location_path, &iso_file_path).await,
)
})
.collect::<FuturesUnordered<_>>();
let mut msg_stream = pin!((
extraction_futures,
stream::once(interrupter.into_future()).map(StreamMessage::Interrupt)
)
.merge());
while let Some(msg) = msg_stream.next().await {
match msg {
StreamMessage::Processed(file_path_pub_id, res) => {
let file_path = file_paths_by_id
.remove(&file_path_pub_id)
.expect("file_path must be here");
match res {
Ok(FileMetadata { cas_id, kind, .. }) => {
identified_files.insert(
file_path_pub_id,
IdentifiedFile {
file_path,
cas_id,
kind,
},
);
}
Err(e) => {
handle_non_critical_errors(
location.id,
file_path_pub_id,
&e,
errors,
);
}
}
if file_paths_by_id.is_empty() {
// All files have been processed so we can end this merged stream and don't keep waiting an
// interrupt signal
break;
}
}
StreamMessage::Interrupt(kind) => {
*extract_metadata_time += start_time.elapsed();
return Ok(match kind {
InterruptionKind::Pause => ExecStatus::Paused,
InterruptionKind::Cancel => ExecStatus::Canceled,
});
}
}
}
}
Ok(ExecStatus::Done(
Output {
identified_files: mem::take(identified_files),
extract_metadata_time: *extract_metadata_time + start_time.elapsed(),
errors: mem::take(errors),
}
.into_output(),
))
}
}
fn handle_non_critical_errors(
location_id: location::id::Type,
file_path_pub_id: Uuid,
e: &FileIOError,
errors: &mut Vec<NonCriticalError>,
) {
error!("Failed to extract file metadata <location_id={location_id}, file_path_pub_id='{file_path_pub_id}'>: {e:#?}");
let formatted_error = format!("<file_path_pub_id='{file_path_pub_id}', error={e}>");
#[cfg(target_os = "windows")]
{
// Handle case where file is on-demand (NTFS only)
if e.source.raw_os_error().map_or(false, |code| code == 362) {
errors.push(
file_identifier::NonCriticalError::FailedToExtractMetadataFromOnDemandFile(
formatted_error,
)
.into(),
);
} else {
errors.push(
file_identifier::NonCriticalError::FailedToExtractFileMetadata(formatted_error)
.into(),
);
}
}
#[cfg(not(target_os = "windows"))]
{
errors.push(
file_identifier::NonCriticalError::FailedToExtractFileMetadata(formatted_error).into(),
);
}
}
fn try_iso_file_path_extraction(
location_id: location::id::Type,
file_path_pub_id: Uuid,
file_path: &file_path_for_file_identifier::Data,
location_path: Arc<PathBuf>,
errors: &mut Vec<NonCriticalError>,
) -> Option<(Uuid, IsolatedFilePathData<'static>, Arc<PathBuf>)> {
IsolatedFilePathData::try_from((location_id, file_path))
.map(IsolatedFilePathData::to_owned)
.map(|iso_file_path| (file_path_pub_id, iso_file_path, location_path))
.map_err(|e| {
error!("Failed to extract isolated file path data: {e:#?}");
errors.push(
file_identifier::NonCriticalError::FailedToExtractIsolatedFilePathData(format!(
"<file_path_pub_id='{file_path_pub_id}', error={e}>"
))
.into(),
);
})
.ok()
}
impl SerializableTask<Error> for ExtractFileMetadataTask {
type SerializeError = rmp_serde::encode::Error;
type DeserializeError = rmp_serde::decode::Error;
type DeserializeCtx = ();
async fn serialize(self) -> Result<Vec<u8>, Self::SerializeError> {
rmp_serde::to_vec_named(&self)
}
async fn deserialize(
data: &[u8],
(): Self::DeserializeCtx,
) -> Result<Self, Self::DeserializeError> {
rmp_serde::from_slice(data)
}
}

View file

@ -0,0 +1,508 @@
use crate::{
file_identifier::{self, FileMetadata},
Error, NonCriticalError,
};
use sd_core_file_path_helper::IsolatedFilePathData;
use sd_core_prisma_helpers::{file_path_for_file_identifier, CasId, FilePathPubId};
use sd_core_sync::Manager as SyncManager;
use sd_file_ext::kind::ObjectKind;
use sd_prisma::{
prisma::{file_path, location, PrismaClient},
prisma_sync,
};
use sd_sync::OperationFactory;
use sd_task_system::{
ExecStatus, Interrupter, InterruptionKind, IntoAnyTaskOutput, SerializableTask, Task, TaskId,
};
use sd_utils::{error::FileIOError, msgpack};
use std::{
collections::HashMap, future::IntoFuture, mem, path::PathBuf, pin::pin, sync::Arc,
time::Duration,
};
use futures::stream::{self, FuturesUnordered, StreamExt};
use futures_concurrency::{future::TryJoin, stream::Merge};
use serde::{Deserialize, Serialize};
use tokio::time::Instant;
use tracing::{error, instrument, trace, Level};
use super::{create_objects_and_update_file_paths, FilePathToCreateOrLinkObject};
#[derive(Debug, Serialize, Deserialize)]
struct IdentifiedFile {
file_path: file_path_for_file_identifier::Data,
cas_id: CasId<'static>,
kind: ObjectKind,
}
impl IdentifiedFile {
pub fn new(
file_path: file_path_for_file_identifier::Data,
cas_id: impl Into<CasId<'static>>,
kind: ObjectKind,
) -> Self {
Self {
file_path,
cas_id: cas_id.into(),
kind,
}
}
}
#[derive(Debug)]
pub struct Identifier {
// Task control
id: TaskId,
with_priority: bool,
// Received input args
location: Arc<location::Data>,
location_path: Arc<PathBuf>,
file_paths_by_id: HashMap<FilePathPubId, file_path_for_file_identifier::Data>,
// Inner state
identified_files: HashMap<FilePathPubId, IdentifiedFile>,
file_paths_without_cas_id: Vec<FilePathToCreateOrLinkObject>,
// Out collector
output: Output,
// Dependencies
db: Arc<PrismaClient>,
sync: Arc<SyncManager>,
}
/// Output from the `[Identifier]` task
#[derive(Debug, Default, Serialize, Deserialize)]
pub struct Output {
/// To send to frontend for priority reporting of new objects
pub file_path_ids_with_new_object: Vec<file_path::id::Type>,
/// Files that need to be aggregate between many identifier tasks to be processed by the
/// object processor tasks
pub file_paths_by_cas_id: HashMap<CasId<'static>, Vec<FilePathToCreateOrLinkObject>>,
/// Collected metric about time elapsed extracting metadata from file system
pub extract_metadata_time: Duration,
/// Collected metric about time spent saving objects on disk
pub save_db_time: Duration,
/// Total number of objects already created as they didn't have `cas_id`, like directories or empty files
pub created_objects_count: u64,
/// Total number of files that we were able to identify
pub total_identified_files: u64,
/// Non critical errors that happened during the task execution
pub errors: Vec<NonCriticalError>,
}
#[async_trait::async_trait]
impl Task<Error> for Identifier {
fn id(&self) -> TaskId {
self.id
}
fn with_priority(&self) -> bool {
self.with_priority
}
#[instrument(
skip(self, interrupter),
fields(
task_id = %self.id,
location_id = %self.location.id,
location_path = %self.location_path.display(),
files_count = %self.file_paths_by_id.len(),
),
ret(level = Level::TRACE),
err,
)]
#[allow(clippy::blocks_in_conditions)] // Due to `err` on `instrument` macro above
async fn run(&mut self, interrupter: &Interrupter) -> Result<ExecStatus, Error> {
// `Processed` is larger than `Interrupt`, but it's much more common
// so we ignore the size difference to optimize for usage
#[allow(clippy::large_enum_variant)]
enum StreamMessage {
Processed(FilePathPubId, Result<FileMetadata, FileIOError>),
Interrupt(InterruptionKind),
}
let Self {
location,
location_path,
file_paths_by_id,
file_paths_without_cas_id,
identified_files,
output,
..
} = self;
if !file_paths_by_id.is_empty() {
let start_time = Instant::now();
let extraction_futures = file_paths_by_id
.iter()
.filter_map(|(file_path_id, file_path)| {
try_iso_file_path_extraction(
location.id,
file_path_id.clone(),
file_path,
Arc::clone(location_path),
&mut output.errors,
)
})
.map(|(file_path_id, iso_file_path, location_path)| async move {
StreamMessage::Processed(
file_path_id,
FileMetadata::new(&*location_path, &iso_file_path).await,
)
})
.collect::<FuturesUnordered<_>>();
let mut msg_stream = pin!((
extraction_futures,
stream::once(interrupter.into_future()).map(StreamMessage::Interrupt)
)
.merge());
while let Some(msg) = msg_stream.next().await {
match msg {
StreamMessage::Processed(file_path_pub_id, res) => {
let file_path = file_paths_by_id
.remove(&file_path_pub_id)
.expect("file_path must be here");
trace!(
files_remaining = file_paths_by_id.len(),
%file_path_pub_id,
"Processed file;",
);
match res {
Ok(FileMetadata {
cas_id: Some(cas_id),
kind,
..
}) => {
identified_files.insert(
file_path_pub_id,
IdentifiedFile::new(file_path, cas_id, kind),
);
}
Ok(FileMetadata {
cas_id: None, kind, ..
}) => {
let file_path_for_file_identifier::Data {
id,
pub_id,
date_created,
..
} = file_path;
file_paths_without_cas_id.push(FilePathToCreateOrLinkObject {
id,
file_path_pub_id: pub_id.into(),
kind,
created_at: date_created,
});
}
Err(e) => {
handle_non_critical_errors(
file_path_pub_id,
&e,
&mut output.errors,
);
}
}
if file_paths_by_id.is_empty() {
trace!("All files have been processed");
// All files have been processed so we can end this merged stream
// and don't keep waiting an interrupt signal
break;
}
}
StreamMessage::Interrupt(kind) => {
trace!(?kind, "Interrupted;");
output.extract_metadata_time += start_time.elapsed();
return Ok(match kind {
InterruptionKind::Pause => ExecStatus::Paused,
InterruptionKind::Cancel => ExecStatus::Canceled,
});
}
}
}
output.extract_metadata_time = start_time.elapsed();
output.total_identified_files =
identified_files.len() as u64 + file_paths_without_cas_id.len() as u64;
trace!(
identified_files_count = identified_files.len(),
"All files have been processed, saving cas_ids to db...;"
);
let start_time = Instant::now();
// Assign cas_id to each file path
let ((), file_path_ids_with_new_object) = (
assign_cas_id_to_file_paths(identified_files, &self.db, &self.sync),
create_objects_and_update_file_paths(
file_paths_without_cas_id.drain(..),
&self.db,
&self.sync,
),
)
.try_join()
.await?;
output.save_db_time = start_time.elapsed();
output.created_objects_count = file_path_ids_with_new_object.len() as u64;
output.file_path_ids_with_new_object =
file_path_ids_with_new_object.into_keys().collect();
output.file_paths_by_cas_id = identified_files.drain().fold(
HashMap::new(),
|mut map,
(
file_path_pub_id,
IdentifiedFile {
cas_id,
kind,
file_path:
file_path_for_file_identifier::Data {
id, date_created, ..
},
},
)| {
map.entry(cas_id)
.or_insert_with(|| Vec::with_capacity(1))
.push(FilePathToCreateOrLinkObject {
id,
file_path_pub_id,
kind,
created_at: date_created,
});
map
},
);
trace!(save_db_time = ?output.save_db_time, "Cas_ids saved to db;");
}
Ok(ExecStatus::Done(mem::take(output).into_output()))
}
}
impl Identifier {
#[must_use]
pub fn new(
location: Arc<location::Data>,
location_path: Arc<PathBuf>,
file_paths: Vec<file_path_for_file_identifier::Data>,
with_priority: bool,
db: Arc<PrismaClient>,
sync: Arc<SyncManager>,
) -> Self {
Self {
id: TaskId::new_v4(),
location,
location_path,
identified_files: HashMap::with_capacity(file_paths.len()),
file_paths_without_cas_id: Vec::with_capacity(file_paths.len()),
file_paths_by_id: file_paths
.into_iter()
.map(|file_path| (file_path.pub_id.as_slice().into(), file_path))
.collect(),
output: Output::default(),
with_priority,
db,
sync,
}
}
}
#[instrument(skip_all, err, fields(identified_files_count = identified_files.len()))]
async fn assign_cas_id_to_file_paths(
identified_files: &HashMap<FilePathPubId, IdentifiedFile>,
db: &PrismaClient,
sync: &SyncManager,
) -> Result<(), file_identifier::Error> {
// Assign cas_id to each file path
sync.write_ops(
db,
identified_files
.iter()
.map(|(pub_id, IdentifiedFile { cas_id, .. })| {
(
sync.shared_update(
prisma_sync::file_path::SyncId {
pub_id: pub_id.to_db(),
},
file_path::cas_id::NAME,
msgpack!(cas_id),
),
db.file_path()
.update(
file_path::pub_id::equals(pub_id.to_db()),
vec![file_path::cas_id::set(cas_id.into())],
)
// We don't need any data here, just the id avoids receiving the entire object
// as we can't pass an empty select macro call
.select(file_path::select!({ id })),
)
})
.unzip::<_, _, _, Vec<_>>(),
)
.await?;
Ok(())
}
#[instrument(skip(errors))]
fn handle_non_critical_errors(
file_path_pub_id: FilePathPubId,
e: &FileIOError,
errors: &mut Vec<NonCriticalError>,
) {
let formatted_error = format!("<file_path_pub_id='{file_path_pub_id}', error={e}>");
#[cfg(target_os = "windows")]
{
// Handle case where file is on-demand (NTFS only)
if e.source.raw_os_error().map_or(false, |code| code == 362) {
errors.push(
file_identifier::NonCriticalFileIdentifierError::FailedToExtractMetadataFromOnDemandFile(
formatted_error,
)
.into(),
);
} else {
errors.push(
file_identifier::NonCriticalFileIdentifierError::FailedToExtractFileMetadata(
formatted_error,
)
.into(),
);
}
}
#[cfg(not(target_os = "windows"))]
{
errors.push(
file_identifier::NonCriticalFileIdentifierError::FailedToExtractFileMetadata(
formatted_error,
)
.into(),
);
}
}
#[instrument(
skip(location_id, file_path, location_path, errors),
fields(
file_path_id = file_path.id,
materialized_path = ?file_path.materialized_path,
name = ?file_path.name,
extension = ?file_path.extension,
)
)]
fn try_iso_file_path_extraction(
location_id: location::id::Type,
file_path_pub_id: FilePathPubId,
file_path: &file_path_for_file_identifier::Data,
location_path: Arc<PathBuf>,
errors: &mut Vec<NonCriticalError>,
) -> Option<(FilePathPubId, IsolatedFilePathData<'static>, Arc<PathBuf>)> {
IsolatedFilePathData::try_from((location_id, file_path))
.map(IsolatedFilePathData::to_owned)
.map_err(|e| {
error!(?e, "Failed to extract isolated file path data;");
errors.push(
file_identifier::NonCriticalFileIdentifierError::FailedToExtractIsolatedFilePathData(format!(
"<file_path_pub_id='{file_path_pub_id}', error={e}>"
))
.into(),
);
})
.map(|iso_file_path| (file_path_pub_id, iso_file_path, location_path))
.ok()
}
#[derive(Serialize, Deserialize)]
struct SaveState {
id: TaskId,
location: Arc<location::Data>,
location_path: Arc<PathBuf>,
file_paths_by_id: HashMap<FilePathPubId, file_path_for_file_identifier::Data>,
identified_files: HashMap<FilePathPubId, IdentifiedFile>,
file_paths_without_cas_id: Vec<FilePathToCreateOrLinkObject>,
output: Output,
with_priority: bool,
}
impl SerializableTask<Error> for Identifier {
type SerializeError = rmp_serde::encode::Error;
type DeserializeError = rmp_serde::decode::Error;
type DeserializeCtx = (Arc<PrismaClient>, Arc<SyncManager>);
async fn serialize(self) -> Result<Vec<u8>, Self::SerializeError> {
let Self {
id,
location,
location_path,
file_paths_by_id,
identified_files,
file_paths_without_cas_id,
output,
with_priority,
..
} = self;
rmp_serde::to_vec_named(&SaveState {
id,
location,
location_path,
file_paths_by_id,
identified_files,
file_paths_without_cas_id,
output,
with_priority,
})
}
async fn deserialize(
data: &[u8],
(db, sync): Self::DeserializeCtx,
) -> Result<Self, Self::DeserializeError> {
rmp_serde::from_slice::<SaveState>(data).map(
|SaveState {
id,
location,
location_path,
file_paths_by_id,
identified_files,
file_paths_without_cas_id,
output,
with_priority,
}| Self {
id,
with_priority,
location,
location_path,
file_paths_by_id,
identified_files,
file_paths_without_cas_id,
output,
db,
sync,
},
)
}
}

View file

@ -1,18 +1,171 @@
use sd_core_prisma_helpers::file_path_for_file_identifier;
use crate::file_identifier;
use sd_core_prisma_helpers::{file_path_id, FilePathPubId, ObjectPubId};
use sd_core_sync::Manager as SyncManager;
use sd_file_ext::kind::ObjectKind;
use sd_prisma::{
prisma::{file_path, object, PrismaClient},
prisma_sync,
};
use sd_sync::{CRDTOperation, OperationFactory};
use sd_utils::msgpack;
use std::collections::{HashMap, HashSet};
use chrono::{DateTime, FixedOffset};
use prisma_client_rust::Select;
use serde::{Deserialize, Serialize};
use tracing::{instrument, trace, Level};
pub mod extract_file_metadata;
pub mod identifier;
pub mod object_processor;
pub use extract_file_metadata::ExtractFileMetadataTask;
pub use object_processor::ObjectProcessorTask;
pub use identifier::Identifier;
pub use object_processor::ObjectProcessor;
/// This object has all needed data to create a new `object` for a `file_path` or link an existing one.
#[derive(Debug, Serialize, Deserialize)]
pub(super) struct IdentifiedFile {
pub(super) file_path: file_path_for_file_identifier::Data,
pub(super) cas_id: Option<String>,
pub(super) kind: ObjectKind,
pub(super) struct FilePathToCreateOrLinkObject {
id: file_path::id::Type,
file_path_pub_id: FilePathPubId,
kind: ObjectKind,
created_at: Option<DateTime<FixedOffset>>,
}
#[instrument(skip(sync, db))]
fn connect_file_path_to_object<'db>(
file_path_pub_id: &FilePathPubId,
object_pub_id: &ObjectPubId,
db: &'db PrismaClient,
sync: &SyncManager,
) -> (CRDTOperation, Select<'db, file_path_id::Data>) {
trace!("Connecting");
(
sync.shared_update(
prisma_sync::file_path::SyncId {
pub_id: file_path_pub_id.to_db(),
},
file_path::object::NAME,
msgpack!(prisma_sync::object::SyncId {
pub_id: object_pub_id.to_db(),
}),
),
db.file_path()
.update(
file_path::pub_id::equals(file_path_pub_id.to_db()),
vec![file_path::object::connect(object::pub_id::equals(
object_pub_id.to_db(),
))],
)
// selecting just id to avoid fetching the whole object
.select(file_path_id::select()),
)
}
#[instrument(skip_all, ret(level = Level::TRACE), err)]
async fn create_objects_and_update_file_paths(
files_and_kinds: impl IntoIterator<Item = FilePathToCreateOrLinkObject> + Send,
db: &PrismaClient,
sync: &SyncManager,
) -> Result<HashMap<file_path::id::Type, ObjectPubId>, file_identifier::Error> {
trace!("Preparing objects");
let (object_create_args, file_path_args) = files_and_kinds
.into_iter()
.map(
|FilePathToCreateOrLinkObject {
id,
file_path_pub_id,
kind,
created_at,
}| {
let object_pub_id = ObjectPubId::new();
let kind = kind as i32;
let (sync_params, db_params) = [
(
(object::date_created::NAME, msgpack!(created_at)),
object::date_created::set(created_at),
),
(
(object::kind::NAME, msgpack!(kind)),
object::kind::set(Some(kind)),
),
]
.into_iter()
.unzip::<_, _, Vec<_>, Vec<_>>();
(
(
sync.shared_create(
prisma_sync::object::SyncId {
pub_id: object_pub_id.to_db(),
},
sync_params,
),
object::create_unchecked(object_pub_id.to_db(), db_params),
),
(
(id, object_pub_id.clone()),
connect_file_path_to_object(&file_path_pub_id, &object_pub_id, db, sync),
),
)
},
)
.unzip::<_, _, Vec<_>, Vec<_>>();
let (mut object_pub_id_by_file_path_id, file_path_update_args) = file_path_args
.into_iter()
.unzip::<_, _, HashMap<_, _>, Vec<_>>(
);
trace!(
new_objects_count = object_create_args.len(),
"Creating new Objects!;",
);
// create new object records with assembled values
let created_objects_count = sync
.write_ops(db, {
let (sync, db_params) = object_create_args
.into_iter()
.unzip::<_, _, Vec<_>, Vec<_>>();
(
sync.into_iter().flatten().collect(),
db.object().create_many(db_params),
)
})
.await?;
trace!(%created_objects_count, "Created new Objects;");
if created_objects_count > 0 {
trace!("Updating file paths with created objects");
let updated_file_path_ids = sync
.write_ops(
db,
file_path_update_args
.into_iter()
.unzip::<_, _, Vec<_>, Vec<_>>(),
)
.await
.map(|file_paths| {
file_paths
.into_iter()
.map(|file_path_id::Data { id }| id)
.collect::<HashSet<_>>()
})?;
object_pub_id_by_file_path_id
.retain(|file_path_id, _| updated_file_path_ids.contains(file_path_id));
Ok(object_pub_id_by_file_path_id)
} else {
trace!("No objects created, skipping file path updates");
Ok(HashMap::new())
}
}

View file

@ -1,98 +1,76 @@
use crate::{file_identifier, Error};
use sd_core_prisma_helpers::{
file_path_for_file_identifier, file_path_pub_id, object_for_file_identifier,
};
use sd_core_prisma_helpers::{file_path_id, object_for_file_identifier, CasId, ObjectPubId};
use sd_core_sync::Manager as SyncManager;
use sd_prisma::{
prisma::{file_path, object, PrismaClient},
prisma_sync,
};
use sd_sync::{CRDTOperation, OperationFactory};
use sd_prisma::prisma::{file_path, object, PrismaClient};
use sd_task_system::{
check_interruption, ExecStatus, Interrupter, IntoAnyTaskOutput, SerializableTask, Task, TaskId,
};
use sd_utils::{msgpack, uuid_to_bytes};
use std::{
collections::{HashMap, HashSet},
mem,
sync::Arc,
time::Duration,
};
use std::{collections::HashMap, mem, sync::Arc, time::Duration};
use prisma_client_rust::Select;
use serde::{Deserialize, Serialize};
use tokio::time::Instant;
use tracing::{debug, trace};
use uuid::Uuid;
use tracing::{instrument, trace, Level};
use super::IdentifiedFile;
use super::{
connect_file_path_to_object, create_objects_and_update_file_paths, FilePathToCreateOrLinkObject,
};
#[derive(Debug)]
pub struct ObjectProcessorTask {
pub struct ObjectProcessor {
// Task control
id: TaskId,
with_priority: bool,
// Received input args
file_paths_by_cas_id: HashMap<CasId<'static>, Vec<FilePathToCreateOrLinkObject>>,
// Inner state
stage: Stage,
// Out collector
output: Output,
// Dependencies
db: Arc<PrismaClient>,
sync: Arc<SyncManager>,
identified_files: HashMap<Uuid, IdentifiedFile>,
output: Output,
stage: Stage,
with_priority: bool,
}
#[derive(Debug, Serialize, Deserialize)]
pub struct SaveState {
id: TaskId,
identified_files: HashMap<Uuid, IdentifiedFile>,
output: Output,
stage: Stage,
with_priority: bool,
}
#[derive(Debug, Serialize, Deserialize, Default)]
pub struct Output {
pub file_path_ids_with_new_object: Vec<file_path::id::Type>,
pub assign_cas_ids_time: Duration,
pub fetch_existing_objects_time: Duration,
pub assign_to_existing_object_time: Duration,
pub create_object_time: Duration,
pub created_objects_count: u64,
pub linked_objects_count: u64,
}
#[derive(Debug, Serialize, Deserialize)]
enum Stage {
Starting,
FetchExistingObjects,
AssignFilePathsToExistingObjects {
existing_objects_by_cas_id: HashMap<String, object_for_file_identifier::Data>,
existing_objects_by_cas_id: HashMap<CasId<'static>, ObjectPubId>,
},
CreateObjects,
}
impl ObjectProcessorTask {
#[must_use]
pub fn new(
identified_files: HashMap<Uuid, IdentifiedFile>,
db: Arc<PrismaClient>,
sync: Arc<SyncManager>,
with_priority: bool,
) -> Self {
Self {
id: TaskId::new_v4(),
db,
sync,
identified_files,
stage: Stage::Starting,
output: Output::default(),
with_priority,
}
}
/// Output from the `[ObjectProcessor]` task
#[derive(Debug, Serialize, Deserialize, Default)]
pub struct Output {
/// To send to frontend for priority reporting of new objects
pub file_path_ids_with_new_object: Vec<file_path::id::Type>,
/// Time elapsed fetching existing `objects` from db to be linked to `file_paths`
pub fetch_existing_objects_time: Duration,
/// Time spent linking `file_paths` to already existing `objects`
pub assign_to_existing_object_time: Duration,
/// Time spent creating new `objects`
pub create_object_time: Duration,
/// Number of new `objects` created
pub created_objects_count: u64,
/// Number of `objects` that were linked to `file_paths`
pub linked_objects_count: u64,
}
#[async_trait::async_trait]
impl Task<Error> for ObjectProcessorTask {
impl Task<Error> for ObjectProcessor {
fn id(&self) -> TaskId {
self.id
}
@ -101,16 +79,25 @@ impl Task<Error> for ObjectProcessorTask {
self.with_priority
}
#[instrument(
skip(self, interrupter),
fields(
task_id = %self.id,
cas_ids_count = %self.file_paths_by_cas_id.len(),
),
ret(level = Level::TRACE),
err,
)]
#[allow(clippy::blocks_in_conditions)] // Due to `err` on `instrument` macro above
async fn run(&mut self, interrupter: &Interrupter) -> Result<ExecStatus, Error> {
let Self {
db,
sync,
identified_files,
file_paths_by_cas_id,
stage,
output:
Output {
file_path_ids_with_new_object,
assign_cas_ids_time,
fetch_existing_objects_time,
assign_to_existing_object_time,
create_object_time,
@ -123,17 +110,17 @@ impl Task<Error> for ObjectProcessorTask {
loop {
match stage {
Stage::Starting => {
let start = Instant::now();
assign_cas_id_to_file_paths(identified_files, db, sync).await?;
*assign_cas_ids_time = start.elapsed();
*stage = Stage::FetchExistingObjects;
}
Stage::FetchExistingObjects => {
trace!("Starting object processor task");
let start = Instant::now();
let existing_objects_by_cas_id =
fetch_existing_objects_by_cas_id(identified_files, db).await?;
fetch_existing_objects_by_cas_id(file_paths_by_cas_id.keys(), db).await?;
*fetch_existing_objects_time = start.elapsed();
trace!(
elapsed_time = ?fetch_existing_objects_time,
existing_objects_count = existing_objects_by_cas_id.len(),
"Fetched existing Objects;",
);
*stage = Stage::AssignFilePathsToExistingObjects {
existing_objects_by_cas_id,
};
@ -142,48 +129,53 @@ impl Task<Error> for ObjectProcessorTask {
Stage::AssignFilePathsToExistingObjects {
existing_objects_by_cas_id,
} => {
trace!(
existing_objects_to_link = existing_objects_by_cas_id.len(),
"Assigning file paths to existing Objects;",
);
let start = Instant::now();
let assigned_file_path_pub_ids = assign_existing_objects_to_file_paths(
identified_files,
let more_file_path_ids_with_new_object = assign_existing_objects_to_file_paths(
file_paths_by_cas_id,
existing_objects_by_cas_id,
db,
sync,
)
.await?;
*assign_to_existing_object_time = start.elapsed();
*linked_objects_count = assigned_file_path_pub_ids.len() as u64;
file_path_ids_with_new_object.extend(more_file_path_ids_with_new_object);
*linked_objects_count += file_path_ids_with_new_object.len() as u64;
debug!(
"Found {} existing Objects, linked file paths to them",
existing_objects_by_cas_id.len()
trace!(
existing_objects_to_link = existing_objects_by_cas_id.len(),
%linked_objects_count,
"Found existing Objects, linked file paths to them;",
);
for file_path_pub_id::Data { pub_id } in assigned_file_path_pub_ids {
let pub_id = Uuid::from_slice(&pub_id).expect("uuid bytes are invalid");
trace!("Assigned file path <file_path_pub_id={pub_id}> to existing object");
identified_files
.remove(&pub_id)
.expect("file_path must be here");
}
*stage = Stage::CreateObjects;
if identified_files.is_empty() {
if file_paths_by_cas_id.is_empty() {
trace!("No more objects to be created, finishing task");
// No objects to be created, we're good to finish already
break;
}
}
Stage::CreateObjects => {
trace!(
creating_count = file_paths_by_cas_id.len(),
"Creating new Objects;"
);
let start = Instant::now();
*created_objects_count = create_objects(identified_files, db, sync).await?;
let (more_file_paths_with_new_object, more_linked_objects_count) =
assign_objects_to_duplicated_orphans(file_paths_by_cas_id, db, sync)
.await?;
*create_object_time = start.elapsed();
file_path_ids_with_new_object.extend(more_file_paths_with_new_object);
*linked_objects_count += more_linked_objects_count;
*file_path_ids_with_new_object = identified_files
.values()
.map(|IdentifiedFile { file_path, .. }| file_path.id)
.collect();
*created_objects_count = file_path_ids_with_new_object.len() as u64;
trace!(%created_objects_count, ?create_object_time, "Created new Objects;");
break;
}
@ -196,225 +188,188 @@ impl Task<Error> for ObjectProcessorTask {
}
}
async fn assign_cas_id_to_file_paths(
identified_files: &HashMap<Uuid, IdentifiedFile>,
db: &PrismaClient,
sync: &SyncManager,
) -> Result<(), file_identifier::Error> {
// Assign cas_id to each file path
sync.write_ops(
db,
identified_files
.iter()
.map(|(pub_id, IdentifiedFile { cas_id, .. })| {
(
sync.shared_update(
prisma_sync::file_path::SyncId {
pub_id: uuid_to_bytes(*pub_id),
},
file_path::cas_id::NAME,
msgpack!(cas_id),
),
db.file_path()
.update(
file_path::pub_id::equals(uuid_to_bytes(*pub_id)),
vec![file_path::cas_id::set(cas_id.clone())],
)
// We don't need any data here, just the id avoids receiving the entire object
// as we can't pass an empty select macro call
.select(file_path::select!({ id })),
)
})
.unzip::<_, _, _, Vec<_>>(),
)
.await?;
Ok(())
impl ObjectProcessor {
#[must_use]
pub fn new(
file_paths_by_cas_id: HashMap<CasId<'static>, Vec<FilePathToCreateOrLinkObject>>,
db: Arc<PrismaClient>,
sync: Arc<SyncManager>,
with_priority: bool,
) -> Self {
Self {
id: TaskId::new_v4(),
db,
sync,
file_paths_by_cas_id,
stage: Stage::Starting,
output: Output::default(),
with_priority,
}
}
}
async fn fetch_existing_objects_by_cas_id(
identified_files: &HashMap<Uuid, IdentifiedFile>,
/// Retrieves objects that are already connected to file paths with the same cas_id
#[instrument(skip_all, err)]
async fn fetch_existing_objects_by_cas_id<'cas_id, Iter>(
cas_ids: Iter,
db: &PrismaClient,
) -> Result<HashMap<String, object_for_file_identifier::Data>, file_identifier::Error> {
// Retrieves objects that are already connected to file paths with the same id
db.object()
.find_many(vec![object::file_paths::some(vec![
file_path::cas_id::in_vec(
identified_files
.values()
.filter_map(|IdentifiedFile { cas_id, .. }| cas_id.as_ref())
.cloned()
.collect::<HashSet<_>>()
) -> Result<HashMap<CasId<'static>, ObjectPubId>, file_identifier::Error>
where
Iter: IntoIterator<Item = &'cas_id CasId<'cas_id>> + Send,
Iter::IntoIter: Send,
{
async fn inner(
stringed_cas_ids: Vec<String>,
db: &PrismaClient,
) -> Result<HashMap<CasId<'static>, ObjectPubId>, file_identifier::Error> {
db.object()
.find_many(vec![object::file_paths::some(vec![
file_path::cas_id::in_vec(stringed_cas_ids),
file_path::object_id::not(None),
])])
.select(object_for_file_identifier::select())
.exec()
.await
.map_err(Into::into)
.map(|objects| {
objects
.into_iter()
.collect(),
),
])])
.select(object_for_file_identifier::select())
.exec()
.await
.map_err(Into::into)
.map(|objects| {
objects
.into_iter()
.filter_map(|object| {
object
.file_paths
.first()
.and_then(|file_path| file_path.cas_id.clone())
.map(|cas_id| (cas_id, object))
})
.collect()
})
.filter_map(|object_for_file_identifier::Data { pub_id, file_paths }| {
file_paths
.first()
.and_then(|file_path| {
file_path
.cas_id
.as_ref()
.map(CasId::from)
.map(CasId::into_owned)
})
.map(|cas_id| (cas_id, pub_id.into()))
})
.collect()
})
}
let stringed_cas_ids = cas_ids.into_iter().map(Into::into).collect::<Vec<_>>();
trace!(
cas_ids_count = stringed_cas_ids.len(),
"Fetching existing objects by cas_ids;",
);
inner(stringed_cas_ids, db).await
}
/// Attempt to associate each file path with an object that has been
/// connected to file paths with the same cas_id
#[instrument(skip_all, err, fields(identified_files_count = file_paths_by_cas_id.len()))]
async fn assign_existing_objects_to_file_paths(
identified_files: &HashMap<Uuid, IdentifiedFile>,
objects_by_cas_id: &HashMap<String, object_for_file_identifier::Data>,
file_paths_by_cas_id: &mut HashMap<CasId<'static>, Vec<FilePathToCreateOrLinkObject>>,
objects_by_cas_id: &HashMap<CasId<'static>, ObjectPubId>,
db: &PrismaClient,
sync: &SyncManager,
) -> Result<Vec<file_path_pub_id::Data>, file_identifier::Error> {
// Attempt to associate each file path with an object that has been
// connected to file paths with the same cas_id
) -> Result<Vec<file_path::id::Type>, file_identifier::Error> {
sync.write_ops(
db,
identified_files
objects_by_cas_id
.iter()
.filter_map(|(pub_id, IdentifiedFile { cas_id, .. })| {
objects_by_cas_id
// Filtering out files without cas_id due to being empty
.get(cas_id.as_ref()?)
.map(|object| (*pub_id, object))
})
.map(|(pub_id, object)| {
connect_file_path_to_object(
pub_id,
// SAFETY: This pub_id is generated by the uuid lib, but we have to store bytes in sqlite
Uuid::from_slice(&object.pub_id).expect("uuid bytes are invalid"),
sync,
db,
)
.flat_map(|(cas_id, object_pub_id)| {
file_paths_by_cas_id
.remove(cas_id)
.map(|file_paths| {
file_paths.into_iter().map(
|FilePathToCreateOrLinkObject {
file_path_pub_id, ..
}| {
connect_file_path_to_object(
&file_path_pub_id,
object_pub_id,
db,
sync,
)
},
)
})
.expect("must be here")
})
.unzip::<_, _, Vec<_>, Vec<_>>(),
)
.await
.map(|file_paths| {
file_paths
.into_iter()
.map(|file_path_id::Data { id }| id)
.collect()
})
.map_err(Into::into)
}
fn connect_file_path_to_object<'db>(
file_path_pub_id: Uuid,
object_pub_id: Uuid,
sync: &SyncManager,
db: &'db PrismaClient,
) -> (CRDTOperation, Select<'db, file_path_pub_id::Data>) {
trace!("Connecting <file_path_pub_id={file_path_pub_id}> to <object_pub_id={object_pub_id}'>");
let vec_id = object_pub_id.as_bytes().to_vec();
(
sync.shared_update(
prisma_sync::file_path::SyncId {
pub_id: uuid_to_bytes(file_path_pub_id),
},
file_path::object::NAME,
msgpack!(prisma_sync::object::SyncId {
pub_id: vec_id.clone()
}),
),
db.file_path()
.update(
file_path::pub_id::equals(uuid_to_bytes(file_path_pub_id)),
vec![file_path::object::connect(object::pub_id::equals(vec_id))],
)
.select(file_path_pub_id::select()),
)
}
async fn create_objects(
identified_files: &HashMap<Uuid, IdentifiedFile>,
async fn assign_objects_to_duplicated_orphans(
file_paths_by_cas_id: &mut HashMap<CasId<'static>, Vec<FilePathToCreateOrLinkObject>>,
db: &PrismaClient,
sync: &SyncManager,
) -> Result<u64, file_identifier::Error> {
trace!("Creating {} new Objects", identified_files.len(),);
) -> Result<(Vec<file_path::id::Type>, u64), file_identifier::Error> {
// at least 1 file path per cas_id
let mut selected_file_paths = Vec::with_capacity(file_paths_by_cas_id.len());
let mut cas_ids_by_file_path_id = HashMap::with_capacity(file_paths_by_cas_id.len());
let (object_create_args, file_path_update_args) = identified_files
.iter()
.map(
|(
file_path_pub_id,
IdentifiedFile {
file_path: file_path_for_file_identifier::Data { date_created, .. },
kind,
..
},
)| {
let object_pub_id = Uuid::new_v4();
file_paths_by_cas_id.retain(|cas_id, file_paths| {
let file_path = file_paths.pop().expect("file_paths can't be empty");
let has_more_file_paths = !file_paths.is_empty();
let kind = *kind as i32;
if has_more_file_paths {
cas_ids_by_file_path_id.insert(file_path.id, cas_id.clone());
}
selected_file_paths.push(file_path);
let (sync_params, db_params) = [
(
(object::date_created::NAME, msgpack!(date_created)),
object::date_created::set(*date_created),
),
(
(object::kind::NAME, msgpack!(kind)),
object::kind::set(Some(kind)),
),
]
.into_iter()
.unzip::<_, _, Vec<_>, Vec<_>>();
has_more_file_paths
});
let (mut file_paths_with_new_object, objects_by_cas_id) =
create_objects_and_update_file_paths(selected_file_paths, db, sync)
.await?
.into_iter()
.map(|(file_path_id, object_pub_id)| {
(
(
sync.shared_create(
prisma_sync::object::SyncId {
pub_id: uuid_to_bytes(object_pub_id),
},
sync_params,
),
object::create_unchecked(uuid_to_bytes(object_pub_id), db_params),
),
connect_file_path_to_object(*file_path_pub_id, object_pub_id, sync, db),
file_path_id,
cas_ids_by_file_path_id
.remove(&file_path_id)
.map(|cas_id| (cas_id, object_pub_id)),
)
},
)
.unzip::<_, _, Vec<_>, Vec<_>>();
})
.unzip::<_, _, Vec<_>, Vec<_>>();
// create new object records with assembled values
let total_created_files = sync
.write_ops(db, {
let (sync, db_params) = object_create_args
.into_iter()
.unzip::<_, _, Vec<_>, Vec<_>>();
let more_file_paths_ids_with_new_object = assign_existing_objects_to_file_paths(
file_paths_by_cas_id,
&objects_by_cas_id.into_iter().flatten().collect(),
db,
sync,
)
.await?;
(
sync.into_iter().flatten().collect(),
db.object().create_many(db_params),
)
})
.await?;
// Sanity check
assert!(
file_paths_by_cas_id.is_empty(),
"We MUST have processed all pending `file_paths` by now"
);
trace!("Created {total_created_files} new Objects");
let linked_objects_count = more_file_paths_ids_with_new_object.len() as u64;
if total_created_files > 0 {
trace!("Updating file paths with created objects");
file_paths_with_new_object.extend(more_file_paths_ids_with_new_object);
sync.write_ops(
db,
file_path_update_args
.into_iter()
.unzip::<_, _, Vec<_>, Vec<_>>(),
)
.await?;
trace!("Updated file paths with created objects");
}
#[allow(clippy::cast_sign_loss)] // SAFETY: We're sure the value is positive
Ok(total_created_files as u64)
Ok((file_paths_with_new_object, linked_objects_count))
}
impl SerializableTask<Error> for ObjectProcessorTask {
#[derive(Debug, Serialize, Deserialize)]
pub struct SaveState {
id: TaskId,
file_paths_by_cas_id: HashMap<CasId<'static>, Vec<FilePathToCreateOrLinkObject>>,
stage: Stage,
output: Output,
with_priority: bool,
}
impl SerializableTask<Error> for ObjectProcessor {
type SerializeError = rmp_serde::encode::Error;
type DeserializeError = rmp_serde::decode::Error;
@ -424,18 +379,18 @@ impl SerializableTask<Error> for ObjectProcessorTask {
async fn serialize(self) -> Result<Vec<u8>, Self::SerializeError> {
let Self {
id,
identified_files,
output,
file_paths_by_cas_id,
stage,
output,
with_priority,
..
} = self;
rmp_serde::to_vec_named(&SaveState {
id,
identified_files,
output,
file_paths_by_cas_id,
stage,
output,
with_priority,
})
}
@ -447,18 +402,18 @@ impl SerializableTask<Error> for ObjectProcessorTask {
rmp_serde::from_slice(data).map(
|SaveState {
id,
identified_files,
output,
file_paths_by_cas_id,
stage,
output,
with_priority,
}| Self {
id,
with_priority,
file_paths_by_cas_id,
stage,
output,
db,
sync,
identified_files,
output,
stage,
with_priority,
},
)
}

File diff suppressed because it is too large Load diff

View file

@ -1,7 +1,6 @@
use crate::{utils::sub_path, OuterContext};
use sd_core_file_path_helper::{FilePathError, IsolatedFilePathData};
use sd_core_indexer_rules::IndexerRuleError;
use sd_core_prisma_helpers::{
file_path_pub_and_cas_ids, file_path_to_isolate_with_pub_id, file_path_walker,
};
@ -27,11 +26,11 @@ use std::{
};
use itertools::Itertools;
use prisma_client_rust::{operator::or, Select};
use prisma_client_rust::{operator::or, QueryError, Select};
use rspc::ErrorCode;
use serde::{Deserialize, Serialize};
use specta::Type;
use tracing::warn;
use tracing::{instrument, warn};
pub mod job;
mod shallow;
@ -53,8 +52,8 @@ pub enum Error {
SubPath(#[from] sub_path::Error),
// Internal Errors
#[error("database Error: {0}")]
Database(#[from] prisma_client_rust::QueryError),
#[error("database error: {0}")]
Database(#[from] QueryError),
#[error(transparent)]
FileIO(#[from] FileIOError),
#[error(transparent)]
@ -68,27 +67,28 @@ pub enum Error {
// Mixed errors
#[error(transparent)]
Rules(#[from] IndexerRuleError),
Rules(#[from] sd_core_indexer_rules::Error),
}
impl From<Error> for rspc::Error {
fn from(err: Error) -> Self {
match err {
fn from(e: Error) -> Self {
match e {
Error::IndexerRuleNotFound(_) => {
Self::with_cause(ErrorCode::NotFound, err.to_string(), err)
Self::with_cause(ErrorCode::NotFound, e.to_string(), e)
}
Error::SubPath(sub_path_err) => sub_path_err.into(),
Error::Rules(rule_err) => rule_err.into(),
_ => Self::with_cause(ErrorCode::InternalServerError, err.to_string(), err),
_ => Self::with_cause(ErrorCode::InternalServerError, e.to_string(), e),
}
}
}
#[derive(thiserror::Error, Debug, Serialize, Deserialize, Type)]
pub enum NonCriticalError {
#[derive(thiserror::Error, Debug, Serialize, Deserialize, Type, Clone)]
#[serde(rename_all = "snake_case")]
pub enum NonCriticalIndexerError {
#[error("failed to read directory entry: {0}")]
FailedDirectoryEntry(String),
#[error("failed to fetch metadata: {0}")]
@ -153,10 +153,12 @@ async fn update_directory_sizes(
file_path::size_in_bytes_bytes::NAME,
msgpack!(size_bytes),
),
db.file_path().update(
file_path::pub_id::equals(file_path.pub_id),
vec![file_path::size_in_bytes_bytes::set(Some(size_bytes))],
),
db.file_path()
.update(
file_path::pub_id::equals(file_path.pub_id),
vec![file_path::size_in_bytes_bytes::set(Some(size_bytes))],
)
.select(file_path::select!({ id })),
))
})
.collect::<Result<Vec<_>, Error>>()?
@ -240,8 +242,16 @@ async fn remove_non_existing_file_paths(
.map_err(Into::into)
}
#[instrument(
skip(base_path, location_path, db, sync, errors),
fields(
base_path = %base_path.as_ref().display(),
location_path = %location_path.as_ref().display(),
),
err,
)]
#[allow(clippy::missing_panics_doc)] // Can't actually panic as we only deal with directories
async fn reverse_update_directories_sizes(
pub async fn reverse_update_directories_sizes(
base_path: impl AsRef<Path> + Send,
location_id: location::id::Type,
location_path: impl AsRef<Path> + Send,
@ -278,7 +288,7 @@ async fn reverse_update_directories_sizes(
IsolatedFilePathData::try_from(file_path)
.map_err(|e| {
errors.push(
NonCriticalError::MissingFilePathData(format!(
NonCriticalIndexerError::MissingFilePathData(format!(
"Found a file_path missing data: <pub_id='{:#?}'>, error: {e:#?}",
from_bytes_to_uuid(&pub_id)
))
@ -328,7 +338,7 @@ async fn reverse_update_directories_sizes(
),
))
} else {
warn!("Got a missing ancestor for a file_path in the database, maybe we have a corruption");
warn!("Got a missing ancestor for a file_path in the database, ignoring...");
None
}
})
@ -345,8 +355,9 @@ async fn compute_sizes(
pub_id_by_ancestor_materialized_path: &mut HashMap<String, (file_path::pub_id::Type, u64)>,
db: &PrismaClient,
errors: &mut Vec<crate::NonCriticalError>,
) -> Result<(), Error> {
db.file_path()
) -> Result<(), QueryError> {
for file_path in db
.file_path()
.find_many(vec![
file_path::location_id::equals(Some(location_id)),
file_path::materialized_path::in_vec(materialized_paths),
@ -354,30 +365,29 @@ async fn compute_sizes(
.select(file_path::select!({ pub_id materialized_path size_in_bytes_bytes }))
.exec()
.await?
.into_iter()
.for_each(|file_path| {
if let Some(materialized_path) = file_path.materialized_path {
if let Some((_, size)) =
pub_id_by_ancestor_materialized_path.get_mut(&materialized_path)
{
*size += file_path.size_in_bytes_bytes.map_or_else(
|| {
warn!("Got a directory missing its size in bytes");
0
},
|size_in_bytes_bytes| size_in_bytes_from_db(&size_in_bytes_bytes),
);
}
} else {
errors.push(
NonCriticalError::MissingFilePathData(format!(
{
if let Some(materialized_path) = file_path.materialized_path {
if let Some((_, size)) =
pub_id_by_ancestor_materialized_path.get_mut(&materialized_path)
{
*size += file_path.size_in_bytes_bytes.map_or_else(
|| {
warn!("Got a directory missing its size in bytes");
0
},
|size_in_bytes_bytes| size_in_bytes_from_db(&size_in_bytes_bytes),
);
}
} else {
errors.push(
NonCriticalIndexerError::MissingFilePathData(format!(
"Corrupt database possessing a file_path entry without materialized_path: <pub_id='{:#?}'>",
from_bytes_to_uuid(&file_path.pub_id)
))
.into(),
);
}
});
.into(),
);
}
}
Ok(())
}
@ -433,57 +443,76 @@ impl walker::WalkerDBProxy for WalkerDBProxy {
async fn fetch_file_paths_to_remove(
&self,
parent_iso_file_path: &IsolatedFilePathData<'_>,
mut existing_inodes: HashSet<Vec<u8>>,
unique_location_id_materialized_path_name_extension_params: Vec<file_path::WhereParam>,
) -> Result<Vec<file_path_pub_and_cas_ids::Data>, NonCriticalError> {
) -> Result<Vec<file_path_pub_and_cas_ids::Data>, NonCriticalIndexerError> {
// NOTE: This batch size can be increased if we wish to trade memory for more performance
const BATCH_SIZE: i64 = 1000;
let founds_ids = self
.db
._batch(
unique_location_id_materialized_path_name_extension_params
.into_iter()
.chunks(200)
.into_iter()
.map(|unique_params| {
self.db
.file_path()
.find_many(vec![or(unique_params.collect())])
.select(file_path::select!({ id }))
})
.collect::<Vec<_>>(),
)
.await
.map(|founds_chunk| {
founds_chunk
.into_iter()
.flat_map(|file_paths| file_paths.into_iter().map(|file_path| file_path.id))
.collect::<HashSet<_>>()
})
.map_err(|e| NonCriticalError::FetchAlreadyExistingFilePathIds(e.to_string()))?;
let founds_ids = {
let found_chunks = self
.db
._batch(
unique_location_id_materialized_path_name_extension_params
.into_iter()
.chunks(200)
.into_iter()
.map(|unique_params| {
self.db
.file_path()
.find_many(vec![or(unique_params.collect())])
.select(file_path::select!({ id inode }))
})
.collect::<Vec<_>>(),
)
.await
.map_err(|e| {
NonCriticalIndexerError::FetchAlreadyExistingFilePathIds(e.to_string())
})?;
found_chunks
.into_iter()
.flatten()
.map(|file_path| {
if let Some(inode) = file_path.inode {
existing_inodes.remove(&inode);
}
file_path.id
})
.collect::<HashSet<_>>()
};
let mut to_remove = vec![];
let mut cursor = 1;
loop {
let materialized_path_param = file_path::materialized_path::equals(Some(
parent_iso_file_path
.materialized_path_for_children()
.expect("the received isolated file path must be from a directory"),
));
let found = self
.db
.file_path()
.find_many(vec![
file_path::location_id::equals(Some(self.location_id)),
file_path::materialized_path::equals(Some(
parent_iso_file_path
.materialized_path_for_children()
.expect("the received isolated file path must be from a directory"),
)),
if existing_inodes.is_empty() {
materialized_path_param
} else {
or(vec![
materialized_path_param,
file_path::inode::in_vec(existing_inodes.iter().cloned().collect()),
])
},
])
.order_by(file_path::id::order(SortOrder::Asc))
.take(BATCH_SIZE)
.cursor(file_path::id::equals(cursor))
.select(file_path_pub_and_cas_ids::select())
.select(file_path::select!({ id pub_id cas_id inode }))
.exec()
.await
.map_err(|e| NonCriticalError::FetchFilePathsToRemove(e.to_string()))?;
.map_err(|e| NonCriticalIndexerError::FetchFilePathsToRemove(e.to_string()))?;
#[allow(clippy::cast_possible_truncation)] // Safe because we are using a constant
let should_stop = found.len() < BATCH_SIZE as usize;
@ -494,11 +523,17 @@ impl walker::WalkerDBProxy for WalkerDBProxy {
break;
}
to_remove.extend(
found
.into_iter()
.filter(|file_path| !founds_ids.contains(&file_path.id)),
);
to_remove.extend(found.into_iter().filter_map(|file_path| {
if let Some(inode) = file_path.inode {
existing_inodes.remove(&inode);
}
(!founds_ids.contains(&file_path.id)).then_some(file_path_pub_and_cas_ids::Data {
id: file_path.id,
pub_id: file_path.pub_id,
cas_id: file_path.cas_id,
})
}));
if should_stop {
break;

View file

@ -18,25 +18,32 @@ use std::{
use futures_concurrency::future::TryJoin;
use itertools::Itertools;
use tracing::{debug, warn};
use tracing::{debug, instrument, warn};
use super::{
remove_non_existing_file_paths, reverse_update_directories_sizes,
tasks::{
saver::{SaveTask, SaveTaskOutput},
updater::{UpdateTask, UpdateTaskOutput},
walker::{ToWalkEntry, WalkDirTask, WalkTaskOutput, WalkedEntry},
self, saver, updater,
walker::{self, ToWalkEntry, WalkedEntry},
},
update_directory_sizes, update_location_size, IsoFilePathFactory, WalkerDBProxy, BATCH_SIZE,
};
#[instrument(
skip_all,
fields(
location_id = location.id,
location_path = ?location.path,
sub_path = %sub_path.as_ref().display()
)
err,
)]
pub async fn shallow(
location: location_with_indexer_rules::Data,
sub_path: impl AsRef<Path> + Send,
dispatcher: BaseTaskDispatcher<Error>,
ctx: impl OuterContext,
dispatcher: &BaseTaskDispatcher<Error>,
ctx: &impl OuterContext,
) -> Result<Vec<NonCriticalError>, Error> {
let sub_path = sub_path.as_ref();
let db = ctx.db();
let sync = ctx.sync();
@ -46,15 +53,20 @@ pub async fn shallow(
.map_err(indexer::Error::from)?;
let to_walk_path = Arc::new(
get_full_path_from_sub_path(location.id, &Some(sub_path), &*location_path, db)
.await
.map_err(indexer::Error::from)?,
get_full_path_from_sub_path::<indexer::Error>(
location.id,
Some(sub_path.as_ref()),
&*location_path,
db,
)
.await?,
);
let Some(WalkTaskOutput {
let Some(walker::Output {
to_create,
to_update,
to_remove,
non_indexed_paths,
mut errors,
directory_iso_file_path,
total_size,
@ -64,13 +76,16 @@ pub async fn shallow(
Arc::clone(&location_path),
Arc::clone(&to_walk_path),
Arc::clone(db),
&dispatcher,
dispatcher,
)
.await?
else {
return Ok(vec![]);
};
// TODO use non_indexed_paths here in the future, sending it to frontend, showing then alongside the indexed files from db
debug!(non_indexed_paths_count = non_indexed_paths.len());
let removed_count = remove_non_existing_file_paths(to_remove, db, sync).await?;
let Some(Metadata {
@ -82,7 +97,7 @@ pub async fn shallow(
to_update,
Arc::clone(db),
Arc::clone(sync),
&dispatcher,
dispatcher,
)
.await?
else {
@ -109,7 +124,7 @@ pub async fn shallow(
.await?;
}
update_location_size(location.id, db, &ctx).await?;
update_location_size(location.id, db, ctx).await?;
}
if indexed_count > 0 || removed_count > 0 {
@ -119,15 +134,19 @@ pub async fn shallow(
Ok(errors)
}
#[instrument(
skip_all,
fields(to_walk_path = %to_walk_path.display())
)]
async fn walk(
location: &location_with_indexer_rules::Data,
location_path: Arc<PathBuf>,
to_walk_path: Arc<PathBuf>,
db: Arc<PrismaClient>,
dispatcher: &BaseTaskDispatcher<Error>,
) -> Result<Option<WalkTaskOutput>, Error> {
match dispatcher
.dispatch(WalkDirTask::new_shallow(
) -> Result<Option<walker::Output<WalkerDBProxy, IsoFilePathFactory>>, Error> {
let Ok(task_handle) = dispatcher
.dispatch(tasks::Walker::new_shallow(
ToWalkEntry::from(&*to_walk_path),
to_walk_path,
location
@ -147,11 +166,15 @@ async fn walk(
},
)?)
.await
.await?
{
else {
debug!("Task system is shutting down while a shallow indexer was in progress");
return Ok(None);
};
match task_handle.await? {
sd_task_system::TaskStatus::Done((_, TaskOutput::Out(data))) => Ok(Some(
*data
.downcast::<WalkTaskOutput>()
.downcast::<walker::Output<WalkerDBProxy, IsoFilePathFactory>>()
.expect("we just dispatched this task"),
)),
sd_task_system::TaskStatus::Done((_, TaskOutput::Empty)) => {
@ -188,7 +211,7 @@ async fn save_and_update(
.chunks(BATCH_SIZE)
.into_iter()
.map(|chunk| {
SaveTask::new_shallow(
tasks::Saver::new_shallow(
location.id,
location.pub_id.clone(),
chunk.collect::<Vec<_>>(),
@ -203,7 +226,7 @@ async fn save_and_update(
.chunks(BATCH_SIZE)
.into_iter()
.map(|chunk| {
UpdateTask::new_shallow(
tasks::Updater::new_shallow(
chunk.collect::<Vec<_>>(),
Arc::clone(&db),
Arc::clone(&sync),
@ -218,25 +241,28 @@ async fn save_and_update(
updated_count: 0,
};
for task_status in dispatcher
.dispatch_many_boxed(save_and_update_tasks)
.await
let Ok(tasks_handles) = dispatcher.dispatch_many_boxed(save_and_update_tasks).await else {
debug!("Task system is shutting down while a shallow indexer was in progress");
return Ok(None);
};
for task_status in tasks_handles
.into_iter()
.map(CancelTaskOnDrop)
.map(CancelTaskOnDrop::new)
.collect::<Vec<_>>()
.try_join()
.await?
{
match task_status {
sd_task_system::TaskStatus::Done((_, TaskOutput::Out(data))) => {
if data.is::<SaveTaskOutput>() {
if data.is::<saver::Output>() {
metadata.indexed_count += data
.downcast::<SaveTaskOutput>()
.downcast::<saver::Output>()
.expect("just checked")
.saved_count;
} else {
metadata.updated_count += data
.downcast::<UpdateTaskOutput>()
.downcast::<updater::Output>()
.expect("just checked")
.updated_count;
}

View file

@ -1,3 +1,7 @@
pub mod saver;
pub mod updater;
pub mod walker;
pub use saver::Saver;
pub use updater::Updater;
pub use walker::Walker;

View file

@ -16,22 +16,165 @@ use std::{sync::Arc, time::Duration};
use chrono::Utc;
use serde::{Deserialize, Serialize};
use tokio::time::Instant;
use tracing::trace;
use tracing::{instrument, trace, Level};
use super::walker::WalkedEntry;
#[derive(Debug)]
pub struct SaveTask {
pub struct Saver {
// Task control
id: TaskId,
is_shallow: bool,
// Received input args
location_id: location::id::Type,
location_pub_id: location::pub_id::Type,
walked_entries: Vec<WalkedEntry>,
// Dependencies
db: Arc<PrismaClient>,
sync: Arc<SyncManager>,
is_shallow: bool,
}
impl SaveTask {
/// [`Save`] Task output
#[derive(Debug)]
pub struct Output {
/// Number of records inserted on database
pub saved_count: u64,
/// Time spent saving records
pub save_duration: Duration,
}
#[async_trait::async_trait]
impl Task<Error> for Saver {
fn id(&self) -> TaskId {
self.id
}
fn with_priority(&self) -> bool {
// If we're running in shallow mode, then we want priority
self.is_shallow
}
#[instrument(
skip_all,
fields(
task_id = %self.id,
location_id = %self.location_id,
to_save_count = %self.walked_entries.len(),
is_shallow = self.is_shallow,
),
ret(level = Level::TRACE),
err,
)]
#[allow(clippy::blocks_in_conditions)] // Due to `err` on `instrument` macro above
async fn run(&mut self, _: &Interrupter) -> Result<ExecStatus, Error> {
use file_path::{
create_unchecked, date_created, date_indexed, date_modified, extension, hidden, inode,
is_dir, location, location_id, materialized_path, name, size_in_bytes_bytes,
};
let start_time = Instant::now();
let Self {
location_id,
location_pub_id,
walked_entries,
db,
sync,
..
} = self;
let (sync_stuff, paths): (Vec<_>, Vec<_>) = walked_entries
.drain(..)
.map(
|WalkedEntry {
pub_id,
maybe_object_id,
iso_file_path,
metadata,
}| {
let IsolatedFilePathDataParts {
materialized_path,
is_dir,
name,
extension,
..
} = iso_file_path.to_parts();
assert!(
maybe_object_id.is_none(),
"Object ID must be None as this tasks only created \
new file_paths and they were not identified yet"
);
let (sync_params, db_params): (Vec<_>, Vec<_>) = [
(
(
location::NAME,
msgpack!(prisma_sync::location::SyncId {
pub_id: location_pub_id.clone()
}),
),
location_id::set(Some(*location_id)),
),
sync_db_entry!(materialized_path.to_string(), materialized_path),
sync_db_entry!(name.to_string(), name),
sync_db_entry!(is_dir, is_dir),
sync_db_entry!(extension.to_string(), extension),
sync_db_entry!(
metadata.size_in_bytes.to_be_bytes().to_vec(),
size_in_bytes_bytes
),
sync_db_entry!(inode_to_db(metadata.inode), inode),
sync_db_entry!(metadata.created_at.into(), date_created),
sync_db_entry!(metadata.modified_at.into(), date_modified),
sync_db_entry!(Utc::now().into(), date_indexed),
sync_db_entry!(metadata.hidden, hidden),
]
.into_iter()
.unzip();
(
sync.shared_create(
prisma_sync::file_path::SyncId {
pub_id: pub_id.to_db(),
},
sync_params,
),
create_unchecked(pub_id.into(), db_params),
)
},
)
.unzip();
#[allow(clippy::cast_sign_loss)]
let saved_count = sync
.write_ops(
db,
(
sync_stuff.into_iter().flatten().collect(),
db.file_path().create_many(paths).skip_duplicates(),
),
)
.await
.map_err(indexer::Error::from)? as u64;
let save_duration = start_time.elapsed();
trace!(saved_count, "Inserted records;");
Ok(ExecStatus::Done(
Output {
saved_count,
save_duration,
}
.into_output(),
))
}
}
impl Saver {
#[must_use]
pub fn new_deep(
location_id: location::id::Type,
@ -72,15 +215,16 @@ impl SaveTask {
}
#[derive(Debug, Serialize, Deserialize)]
struct SaveTaskSaveState {
struct SaveState {
id: TaskId,
is_shallow: bool,
location_id: location::id::Type,
location_pub_id: location::pub_id::Type,
walked_entries: Vec<WalkedEntry>,
is_shallow: bool,
}
impl SerializableTask<Error> for SaveTask {
impl SerializableTask<Error> for Saver {
type SerializeError = rmp_serde::encode::Error;
type DeserializeError = rmp_serde::decode::Error;
@ -90,18 +234,18 @@ impl SerializableTask<Error> for SaveTask {
async fn serialize(self) -> Result<Vec<u8>, Self::SerializeError> {
let Self {
id,
is_shallow,
location_id,
location_pub_id,
walked_entries,
is_shallow,
..
} = self;
rmp_serde::to_vec_named(&SaveTaskSaveState {
rmp_serde::to_vec_named(&SaveState {
id,
is_shallow,
location_id,
location_pub_id,
walked_entries,
is_shallow,
})
}
@ -110,131 +254,21 @@ impl SerializableTask<Error> for SaveTask {
(db, sync): Self::DeserializeCtx,
) -> Result<Self, Self::DeserializeError> {
rmp_serde::from_slice(data).map(
|SaveTaskSaveState {
|SaveState {
id,
is_shallow,
location_id,
location_pub_id,
walked_entries,
is_shallow,
}| Self {
id,
is_shallow,
location_id,
location_pub_id,
walked_entries,
db,
sync,
is_shallow,
},
)
}
}
#[derive(Debug)]
pub struct SaveTaskOutput {
pub saved_count: u64,
pub save_duration: Duration,
}
#[async_trait::async_trait]
impl Task<Error> for SaveTask {
fn id(&self) -> TaskId {
self.id
}
fn with_priority(&self) -> bool {
// If we're running in shallow mode, then we want priority
self.is_shallow
}
async fn run(&mut self, _: &Interrupter) -> Result<ExecStatus, Error> {
use file_path::{
create_unchecked, date_created, date_indexed, date_modified, extension, hidden, inode,
is_dir, location, location_id, materialized_path, name, size_in_bytes_bytes,
};
let start_time = Instant::now();
let Self {
location_id,
location_pub_id,
walked_entries,
db,
sync,
..
} = self;
let (sync_stuff, paths): (Vec<_>, Vec<_>) = walked_entries
.drain(..)
.map(|entry| {
let IsolatedFilePathDataParts {
materialized_path,
is_dir,
name,
extension,
..
} = entry.iso_file_path.to_parts();
let pub_id = sd_utils::uuid_to_bytes(entry.pub_id);
let (sync_params, db_params): (Vec<_>, Vec<_>) = [
(
(
location::NAME,
msgpack!(prisma_sync::location::SyncId {
pub_id: location_pub_id.clone()
}),
),
location_id::set(Some(*location_id)),
),
sync_db_entry!(materialized_path.to_string(), materialized_path),
sync_db_entry!(name.to_string(), name),
sync_db_entry!(is_dir, is_dir),
sync_db_entry!(extension.to_string(), extension),
sync_db_entry!(
entry.metadata.size_in_bytes.to_be_bytes().to_vec(),
size_in_bytes_bytes
),
sync_db_entry!(inode_to_db(entry.metadata.inode), inode),
sync_db_entry!(entry.metadata.created_at.into(), date_created),
sync_db_entry!(entry.metadata.modified_at.into(), date_modified),
sync_db_entry!(Utc::now().into(), date_indexed),
sync_db_entry!(entry.metadata.hidden, hidden),
]
.into_iter()
.unzip();
(
sync.shared_create(
prisma_sync::file_path::SyncId {
pub_id: sd_utils::uuid_to_bytes(entry.pub_id),
},
sync_params,
),
create_unchecked(pub_id, db_params),
)
})
.unzip();
#[allow(clippy::cast_sign_loss)]
let saved_count = sync
.write_ops(
db,
(
sync_stuff.into_iter().flatten().collect(),
db.file_path().create_many(paths).skip_duplicates(),
),
)
.await
.map_err(indexer::Error::from)? as u64;
trace!("Inserted {saved_count} records");
Ok(ExecStatus::Done(
SaveTaskOutput {
saved_count,
save_duration: start_time.elapsed(),
}
.into_output(),
))
}
}

View file

@ -17,21 +17,169 @@ use std::{collections::HashSet, sync::Arc, time::Duration};
use serde::{Deserialize, Serialize};
use tokio::time::Instant;
use tracing::trace;
use tracing::{instrument, trace, Level};
use super::walker::WalkedEntry;
#[derive(Debug)]
pub struct UpdateTask {
pub struct Updater {
// Task control
id: TaskId,
is_shallow: bool,
// Received input args
walked_entries: Vec<WalkedEntry>,
// Inner state
object_ids_that_should_be_unlinked: HashSet<object::id::Type>,
// Dependencies
db: Arc<PrismaClient>,
sync: Arc<SyncManager>,
is_shallow: bool,
}
impl UpdateTask {
/// [`Update`] Task output
#[derive(Debug)]
pub struct Output {
/// Number of records updated on database
pub updated_count: u64,
/// Time spent updating records
pub update_duration: Duration,
}
#[async_trait::async_trait]
impl Task<Error> for Updater {
fn id(&self) -> TaskId {
self.id
}
fn with_priority(&self) -> bool {
// If we're running in shallow mode, then we want priority
self.is_shallow
}
#[instrument(
skip_all,
fields(
task_id = %self.id,
to_update_count = %self.walked_entries.len(),
is_shallow = self.is_shallow,
),
ret(level = Level::TRACE),
err,
)]
#[allow(clippy::blocks_in_conditions)] // Due to `err` on `instrument` macro above
async fn run(&mut self, interrupter: &Interrupter) -> Result<ExecStatus, Error> {
use file_path::{
cas_id, date_created, date_modified, hidden, inode, is_dir, object, object_id,
size_in_bytes_bytes,
};
let start_time = Instant::now();
let Self {
walked_entries,
db,
sync,
object_ids_that_should_be_unlinked,
..
} = self;
fetch_objects_ids_to_unlink(walked_entries, object_ids_that_should_be_unlinked, db).await?;
check_interruption!(interrupter);
let (sync_stuff, paths_to_update) = walked_entries
.drain(..)
.map(
|WalkedEntry {
pub_id,
maybe_object_id,
iso_file_path,
metadata,
}| {
let IsolatedFilePathDataParts { is_dir, .. } = &iso_file_path.to_parts();
let should_unlink_object = maybe_object_id.map_or(false, |object_id| {
object_ids_that_should_be_unlinked.contains(&object_id)
});
let (sync_params, db_params) = chain_optional_iter(
[
((cas_id::NAME, msgpack!(nil)), cas_id::set(None)),
sync_db_entry!(*is_dir, is_dir),
sync_db_entry!(
metadata.size_in_bytes.to_be_bytes().to_vec(),
size_in_bytes_bytes
),
sync_db_entry!(inode_to_db(metadata.inode), inode),
{
let v = metadata.created_at.into();
sync_db_entry!(v, date_created)
},
{
let v = metadata.modified_at.into();
sync_db_entry!(v, date_modified)
},
sync_db_entry!(metadata.hidden, hidden),
],
[
// As this file was updated while Spacedrive was offline, we mark the object_id and cas_id as null
// So this file_path will be updated at file identifier job
should_unlink_object.then_some((
(object_id::NAME, msgpack!(nil)),
object::disconnect(),
)),
],
)
.into_iter()
.unzip::<_, _, Vec<_>, Vec<_>>();
(
sync_params
.into_iter()
.map(|(field, value)| {
sync.shared_update(
prisma_sync::file_path::SyncId {
pub_id: pub_id.to_db(),
},
field,
value,
)
})
.collect::<Vec<_>>(),
db.file_path()
.update(file_path::pub_id::equals(pub_id.into()), db_params)
// selecting id to avoid fetching whole object from database
.select(file_path::select!({ id })),
)
},
)
.unzip::<_, _, Vec<_>, Vec<_>>();
let updated = sync
.write_ops(
db,
(sync_stuff.into_iter().flatten().collect(), paths_to_update),
)
.await
.map_err(indexer::Error::from)?;
let update_duration = start_time.elapsed();
trace!(?updated, "Updated records;");
Ok(ExecStatus::Done(
Output {
updated_count: updated.len() as u64,
update_duration,
}
.into_output(),
))
}
}
impl Updater {
#[must_use]
pub fn new_deep(
walked_entries: Vec<WalkedEntry>,
@ -65,177 +213,6 @@ impl UpdateTask {
}
}
#[derive(Debug, Serialize, Deserialize)]
struct UpdateTaskSaveState {
id: TaskId,
walked_entries: Vec<WalkedEntry>,
object_ids_that_should_be_unlinked: HashSet<object::id::Type>,
is_shallow: bool,
}
impl SerializableTask<Error> for UpdateTask {
type SerializeError = rmp_serde::encode::Error;
type DeserializeError = rmp_serde::decode::Error;
type DeserializeCtx = (Arc<PrismaClient>, Arc<SyncManager>);
async fn serialize(self) -> Result<Vec<u8>, Self::SerializeError> {
let Self {
id,
walked_entries,
object_ids_that_should_be_unlinked,
is_shallow,
..
} = self;
rmp_serde::to_vec_named(&UpdateTaskSaveState {
id,
walked_entries,
object_ids_that_should_be_unlinked,
is_shallow,
})
}
async fn deserialize(
data: &[u8],
(db, sync): Self::DeserializeCtx,
) -> Result<Self, Self::DeserializeError> {
rmp_serde::from_slice(data).map(
|UpdateTaskSaveState {
id,
walked_entries,
object_ids_that_should_be_unlinked,
is_shallow,
}| Self {
id,
walked_entries,
object_ids_that_should_be_unlinked,
db,
sync,
is_shallow,
},
)
}
}
#[derive(Debug)]
pub struct UpdateTaskOutput {
pub updated_count: u64,
pub update_duration: Duration,
}
#[async_trait::async_trait]
impl Task<Error> for UpdateTask {
fn id(&self) -> TaskId {
self.id
}
fn with_priority(&self) -> bool {
// If we're running in shallow mode, then we want priority
self.is_shallow
}
async fn run(&mut self, interrupter: &Interrupter) -> Result<ExecStatus, Error> {
use file_path::{
cas_id, date_created, date_modified, hidden, inode, is_dir, object, object_id,
size_in_bytes_bytes,
};
let start_time = Instant::now();
let Self {
walked_entries,
db,
sync,
object_ids_that_should_be_unlinked,
..
} = self;
fetch_objects_ids_to_unlink(walked_entries, object_ids_that_should_be_unlinked, db).await?;
check_interruption!(interrupter);
let (sync_stuff, paths_to_update) = walked_entries
.drain(..)
.map(|entry| {
let IsolatedFilePathDataParts { is_dir, .. } = &entry.iso_file_path.to_parts();
let pub_id = sd_utils::uuid_to_bytes(entry.pub_id);
let should_unlink_object = entry.maybe_object_id.map_or(false, |object_id| {
object_ids_that_should_be_unlinked.contains(&object_id)
});
let (sync_params, db_params) = chain_optional_iter(
[
((cas_id::NAME, msgpack!(nil)), cas_id::set(None)),
sync_db_entry!(*is_dir, is_dir),
sync_db_entry!(
entry.metadata.size_in_bytes.to_be_bytes().to_vec(),
size_in_bytes_bytes
),
sync_db_entry!(inode_to_db(entry.metadata.inode), inode),
{
let v = entry.metadata.created_at.into();
sync_db_entry!(v, date_created)
},
{
let v = entry.metadata.modified_at.into();
sync_db_entry!(v, date_modified)
},
sync_db_entry!(entry.metadata.hidden, hidden),
],
[
// As this file was updated while Spacedrive was offline, we mark the object_id and cas_id as null
// So this file_path will be updated at file identifier job
should_unlink_object
.then_some(((object_id::NAME, msgpack!(nil)), object::disconnect())),
],
)
.into_iter()
.unzip::<_, _, Vec<_>, Vec<_>>();
(
sync_params
.into_iter()
.map(|(field, value)| {
sync.shared_update(
prisma_sync::file_path::SyncId {
pub_id: pub_id.clone(),
},
field,
value,
)
})
.collect::<Vec<_>>(),
db.file_path()
.update(file_path::pub_id::equals(pub_id), db_params)
.select(file_path::select!({ id })),
)
})
.unzip::<_, _, Vec<_>, Vec<_>>();
let updated = sync
.write_ops(
db,
(sync_stuff.into_iter().flatten().collect(), paths_to_update),
)
.await
.map_err(indexer::Error::from)?;
trace!("Updated {updated:?} records");
Ok(ExecStatus::Done(
UpdateTaskOutput {
updated_count: updated.len() as u64,
update_duration: start_time.elapsed(),
}
.into_output(),
))
}
}
async fn fetch_objects_ids_to_unlink(
walked_entries: &[WalkedEntry],
object_ids_that_should_be_unlinked: &mut HashSet<object::id::Type>,
@ -269,3 +246,59 @@ async fn fetch_objects_ids_to_unlink(
Ok(())
}
#[derive(Debug, Serialize, Deserialize)]
struct SaveState {
id: TaskId,
is_shallow: bool,
walked_entries: Vec<WalkedEntry>,
object_ids_that_should_be_unlinked: HashSet<object::id::Type>,
}
impl SerializableTask<Error> for Updater {
type SerializeError = rmp_serde::encode::Error;
type DeserializeError = rmp_serde::decode::Error;
type DeserializeCtx = (Arc<PrismaClient>, Arc<SyncManager>);
async fn serialize(self) -> Result<Vec<u8>, Self::SerializeError> {
let Self {
id,
walked_entries,
object_ids_that_should_be_unlinked,
is_shallow,
..
} = self;
rmp_serde::to_vec_named(&SaveState {
id,
is_shallow,
walked_entries,
object_ids_that_should_be_unlinked,
})
}
async fn deserialize(
data: &[u8],
(db, sync): Self::DeserializeCtx,
) -> Result<Self, Self::DeserializeError> {
rmp_serde::from_slice(data).map(
|SaveState {
id,
is_shallow,
walked_entries,
object_ids_that_should_be_unlinked,
}| Self {
id,
is_shallow,
walked_entries,
object_ids_that_should_be_unlinked,
db,
sync,
},
)
}
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,93 @@
use sd_core_file_path_helper::{FilePathMetadata, IsolatedFilePathData};
use sd_core_prisma_helpers::FilePathPubId;
use sd_prisma::prisma::file_path;
use std::{
hash::{Hash, Hasher},
path::{Path, PathBuf},
};
use serde::{Deserialize, Serialize};
/// `WalkedEntry` represents a single path in the filesystem
#[derive(Debug, Serialize, Deserialize)]
pub struct WalkedEntry {
pub pub_id: FilePathPubId,
pub maybe_object_id: file_path::object_id::Type,
pub iso_file_path: IsolatedFilePathData<'static>,
pub metadata: FilePathMetadata,
}
impl PartialEq for WalkedEntry {
fn eq(&self, other: &Self) -> bool {
self.iso_file_path == other.iso_file_path
}
}
impl Eq for WalkedEntry {}
impl Hash for WalkedEntry {
fn hash<H: Hasher>(&self, state: &mut H) {
self.iso_file_path.hash(state);
}
}
#[derive(Debug, Serialize, Deserialize)]
pub(super) struct WalkingEntry {
pub(super) iso_file_path: IsolatedFilePathData<'static>,
pub(super) metadata: FilePathMetadata,
}
impl From<WalkingEntry> for WalkedEntry {
fn from(
WalkingEntry {
iso_file_path,
metadata,
}: WalkingEntry,
) -> Self {
Self {
pub_id: FilePathPubId::new(),
maybe_object_id: None,
iso_file_path,
metadata,
}
}
}
impl<PubId: Into<FilePathPubId>> From<(PubId, file_path::object_id::Type, WalkingEntry)>
for WalkedEntry
{
fn from(
(
pub_id,
maybe_object_id,
WalkingEntry {
iso_file_path,
metadata,
},
): (PubId, file_path::object_id::Type, WalkingEntry),
) -> Self {
Self {
pub_id: pub_id.into(),
maybe_object_id,
iso_file_path,
metadata,
}
}
}
#[derive(Debug, Serialize, Deserialize)]
pub struct ToWalkEntry {
pub(super) path: PathBuf,
pub(super) parent_dir_accepted_by_its_children: Option<bool>,
}
impl<P: AsRef<Path>> From<P> for ToWalkEntry {
fn from(path: P) -> Self {
Self {
path: path.as_ref().into(),
parent_dir_accepted_by_its_children: None,
}
}
}

View file

@ -0,0 +1,64 @@
use crate::indexer;
use sd_core_file_path_helper::FilePathMetadata;
use sd_core_indexer_rules::MetadataForIndexerRules;
use std::{fs::Metadata, path::Path};
use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
#[derive(Debug, Serialize, Deserialize)]
pub(super) struct InnerMetadata {
pub is_dir: bool,
pub is_symlink: bool,
pub inode: u64,
pub size_in_bytes: u64,
pub hidden: bool,
pub created_at: DateTime<Utc>,
pub modified_at: DateTime<Utc>,
}
impl InnerMetadata {
pub fn new(
path: impl AsRef<Path>,
metadata: &Metadata,
) -> Result<Self, indexer::NonCriticalIndexerError> {
let FilePathMetadata {
inode,
size_in_bytes,
created_at,
modified_at,
hidden,
} = FilePathMetadata::from_path(path, metadata)
.map_err(|e| indexer::NonCriticalIndexerError::FilePathMetadata(e.to_string()))?;
Ok(Self {
is_dir: metadata.is_dir(),
is_symlink: metadata.is_symlink(),
inode,
size_in_bytes,
hidden,
created_at,
modified_at,
})
}
}
impl MetadataForIndexerRules for InnerMetadata {
fn is_dir(&self) -> bool {
self.is_dir
}
}
impl From<InnerMetadata> for FilePathMetadata {
fn from(metadata: InnerMetadata) -> Self {
Self {
inode: metadata.inode,
size_in_bytes: metadata.size_in_bytes,
hidden: metadata.hidden,
created_at: metadata.created_at,
modified_at: metadata.modified_at,
}
}
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,261 @@
use crate::{indexer, NonCriticalError};
use sd_core_file_path_helper::{FilePathMetadata, IsolatedFilePathData};
use sd_core_indexer_rules::{IndexerRuler, MetadataForIndexerRules, RuleKind};
use sd_utils::error::FileIOError;
use std::{
collections::{hash_map::Entry, HashMap, HashSet},
path::{Path, PathBuf},
sync::Arc,
};
use futures_concurrency::future::Join;
use tokio::fs;
use tracing::{instrument, trace};
use super::{
entry::{ToWalkEntry, WalkingEntry},
InnerMetadata, IsoFilePathFactory, WalkedEntry,
};
pub(super) async fn apply_indexer_rules(
paths_and_metadatas: &mut HashMap<PathBuf, InnerMetadata>,
indexer_ruler: &IndexerRuler,
errors: &mut Vec<NonCriticalError>,
) -> HashMap<PathBuf, (InnerMetadata, HashMap<RuleKind, Vec<bool>>)> {
paths_and_metadatas
.drain()
// TODO: Hard ignoring symlinks for now, but this should be configurable
.filter(|(_, metadata)| !metadata.is_symlink)
.map(|(current_path, metadata)| async {
indexer_ruler
.apply_all(&current_path, &metadata)
.await
.map(|acceptance_per_rule_kind| {
(current_path, (metadata, acceptance_per_rule_kind))
})
.map_err(|e| indexer::NonCriticalIndexerError::IndexerRule(e.to_string()))
})
.collect::<Vec<_>>()
.join()
.await
.into_iter()
.filter_map(|res| res.map_err(|e| errors.push(e.into())).ok())
.collect()
}
pub(super) async fn process_rules_results(
root: &Arc<PathBuf>,
iso_file_path_factory: &impl IsoFilePathFactory,
parent_dir_accepted_by_its_children: Option<bool>,
paths_metadatas_and_acceptance: &mut HashMap<
PathBuf,
(InnerMetadata, HashMap<RuleKind, Vec<bool>>),
>,
maybe_to_keep_walking: &mut Option<Vec<ToWalkEntry>>,
collect_rejected_paths: bool,
errors: &mut Vec<NonCriticalError>,
) -> (
HashMap<PathBuf, InnerMetadata>,
HashSet<WalkedEntry>,
Vec<PathBuf>,
) {
let (accepted, accepted_ancestors, rejected) = segregate_paths(
root,
iso_file_path_factory,
paths_metadatas_and_acceptance.drain(),
parent_dir_accepted_by_its_children,
maybe_to_keep_walking,
collect_rejected_paths,
errors,
);
(
accepted,
accepted_ancestors
.into_iter()
.map(|(ancestor_iso_file_path, ancestor_path)| async move {
fs::metadata(&ancestor_path)
.await
.map_err(|e| {
indexer::NonCriticalIndexerError::Metadata(
FileIOError::from((&ancestor_path, e)).to_string(),
)
})
.and_then(|metadata| {
FilePathMetadata::from_path(&ancestor_path, &metadata)
.map(|metadata| {
WalkingEntry {
iso_file_path: ancestor_iso_file_path,
metadata,
}
.into()
})
.map_err(|e| {
indexer::NonCriticalIndexerError::FilePathMetadata(e.to_string())
})
})
})
.collect::<Vec<_>>()
.join()
.await
.into_iter()
.filter_map(|res| res.map_err(|e| errors.push(e.into())).ok())
.collect(),
rejected,
)
}
fn segregate_paths(
root: &Arc<PathBuf>,
iso_file_path_factory: &impl IsoFilePathFactory,
paths_metadatas_and_acceptance: impl IntoIterator<
Item = (PathBuf, (InnerMetadata, HashMap<RuleKind, Vec<bool>>)),
>,
parent_dir_accepted_by_its_children: Option<bool>,
maybe_to_keep_walking: &mut Option<Vec<ToWalkEntry>>,
collect_rejected_paths: bool,
errors: &mut Vec<NonCriticalError>,
) -> (
HashMap<PathBuf, InnerMetadata>,
HashMap<IsolatedFilePathData<'static>, PathBuf>,
Vec<PathBuf>,
) {
let root = root.as_ref();
let mut accepted = HashMap::new();
let mut accepted_ancestors = HashMap::new();
let mut rejected = Vec::new();
for (current_path, (metadata, acceptance_per_rule_kind)) in paths_metadatas_and_acceptance {
// Accept by children has three states,
// None if we don't now yet or if this check doesn't apply
// Some(true) if this check applies and it passes
// Some(false) if this check applies and it was rejected
// and we pass the current parent state to its children
let mut accept_by_children_dir = parent_dir_accepted_by_its_children;
if !reject_path(
&current_path,
&metadata,
&acceptance_per_rule_kind,
&mut accept_by_children_dir,
maybe_to_keep_walking,
) && accept_by_children_dir.unwrap_or(true)
{
accept_path_and_ancestors(
current_path,
metadata,
root,
&mut accepted,
iso_file_path_factory,
&mut accepted_ancestors,
errors,
);
continue;
}
if collect_rejected_paths {
rejected.push(current_path);
}
}
(accepted, accepted_ancestors, rejected)
}
#[instrument(skip_all, fields(current_path = %current_path.display()))]
fn reject_path(
current_path: &Path,
metadata: &InnerMetadata,
acceptance_per_rule_kind: &HashMap<RuleKind, Vec<bool>>,
accept_by_children_dir: &mut Option<bool>,
maybe_to_keep_walking: &mut Option<Vec<ToWalkEntry>>,
) -> bool {
IndexerRuler::rejected_by_reject_glob(acceptance_per_rule_kind)
|| IndexerRuler::rejected_by_git_ignore(acceptance_per_rule_kind)
|| (metadata.is_dir()
&& process_and_maybe_reject_by_directory_rules(
current_path,
acceptance_per_rule_kind,
accept_by_children_dir,
maybe_to_keep_walking,
)) || IndexerRuler::rejected_by_accept_glob(acceptance_per_rule_kind)
}
fn process_and_maybe_reject_by_directory_rules(
current_path: &Path,
acceptance_per_rule_kind: &HashMap<RuleKind, Vec<bool>>,
accept_by_children_dir: &mut Option<bool>,
maybe_to_keep_walking: &mut Option<Vec<ToWalkEntry>>,
) -> bool {
// If it is a directory, first we check if we must reject it and its children entirely
if IndexerRuler::rejected_by_children_directories(acceptance_per_rule_kind) {
return true;
}
// Then we check if we must accept it and its children
if let Some(accepted_by_children_rules) =
acceptance_per_rule_kind.get(&RuleKind::AcceptIfChildrenDirectoriesArePresent)
{
if accepted_by_children_rules.iter().any(|accept| *accept) {
*accept_by_children_dir = Some(true);
}
// If it wasn't accepted then we mark as rejected
if accept_by_children_dir.is_none() {
trace!(
"Rejected because it didn't passed in any \
`RuleKind::AcceptIfChildrenDirectoriesArePresent` rule",
);
*accept_by_children_dir = Some(false);
}
}
// Then we mark this directory to maybe be walked in too
if let Some(ref mut to_keep_walking) = maybe_to_keep_walking {
to_keep_walking.push(ToWalkEntry {
path: current_path.to_path_buf(),
parent_dir_accepted_by_its_children: *accept_by_children_dir,
});
}
false
}
fn accept_path_and_ancestors(
current_path: PathBuf,
metadata: InnerMetadata,
root: &Path,
accepted: &mut HashMap<PathBuf, InnerMetadata>,
iso_file_path_factory: &impl IsoFilePathFactory,
accepted_ancestors: &mut HashMap<IsolatedFilePathData<'static>, PathBuf>,
errors: &mut Vec<NonCriticalError>,
) {
// If the ancestors directories wasn't indexed before, now we do
for ancestor in current_path
.ancestors()
.skip(1) // Skip the current directory as it was already indexed
.take_while(|&ancestor| ancestor != root)
{
if let Ok(iso_file_path) = iso_file_path_factory.build(ancestor, true).map_err(|e| {
errors.push(indexer::NonCriticalIndexerError::IsoFilePath(e.to_string()).into());
}) {
match accepted_ancestors.entry(iso_file_path) {
Entry::Occupied(_) => {
// If we already accepted this ancestor, then it will contain
// also all if its ancestors too, so we can stop here
break;
}
Entry::Vacant(entry) => {
trace!(ancestor = %ancestor.display(), "Accepted ancestor");
entry.insert(ancestor.to_path_buf());
}
}
}
}
accepted.insert(current_path, metadata);
}

View file

@ -0,0 +1,219 @@
use crate::{Error, NonCriticalError};
use sd_core_file_path_helper::IsolatedFilePathData;
use sd_core_indexer_rules::{IndexerRuler, RuleKind};
use sd_core_prisma_helpers::file_path_pub_and_cas_ids;
use std::{
collections::{HashMap, HashSet},
path::PathBuf,
sync::Arc,
time::Duration,
};
use sd_task_system::{SerializableTask, TaskId};
use serde::{Deserialize, Serialize};
use super::{
entry::{ToWalkEntry, WalkingEntry},
metadata::InnerMetadata,
IsoFilePathFactory, WalkedEntry, Walker, WalkerDBProxy, WalkerStage,
};
#[derive(Debug, Serialize, Deserialize)]
pub(super) struct WalkDirSaveState {
id: TaskId,
is_shallow: bool,
entry: ToWalkEntry,
root: Arc<PathBuf>,
entry_iso_file_path: IsolatedFilePathData<'static>,
stage: WalkerStageSaveState,
errors: Vec<NonCriticalError>,
scan_time: Duration,
}
#[derive(Debug, Serialize, Deserialize)]
pub(super) enum WalkerStageSaveState {
Start,
CollectingMetadata {
found_paths: Vec<PathBuf>,
},
CheckingIndexerRules {
paths_and_metadatas: HashMap<PathBuf, InnerMetadata>,
},
ProcessingRulesResults {
paths_metadatas_and_acceptance:
HashMap<PathBuf, (InnerMetadata, HashMap<RuleKind, Vec<bool>>)>,
},
GatheringFilePathsToRemove {
accepted_paths: HashMap<PathBuf, InnerMetadata>,
maybe_to_keep_walking: Option<Vec<ToWalkEntry>>,
accepted_ancestors: HashSet<WalkedEntry>,
non_indexed_paths: Vec<PathBuf>,
},
Finalize {
walking_entries: Vec<WalkingEntry>,
accepted_ancestors: HashSet<WalkedEntry>,
to_remove_entries: Vec<file_path_pub_and_cas_ids::Data>,
maybe_to_keep_walking: Option<Vec<ToWalkEntry>>,
non_indexed_paths: Vec<PathBuf>,
},
}
impl From<WalkerStage> for WalkerStageSaveState {
fn from(stage: WalkerStage) -> Self {
match stage {
// We can't store the current state of `ReadDirStream` so we start again from the beginning
WalkerStage::Start | WalkerStage::Walking { .. } => Self::Start,
WalkerStage::CollectingMetadata { found_paths } => {
Self::CollectingMetadata { found_paths }
}
WalkerStage::CheckingIndexerRules {
paths_and_metadatas,
} => Self::CheckingIndexerRules {
paths_and_metadatas,
},
WalkerStage::ProcessingRulesResults {
paths_metadatas_and_acceptance,
} => Self::ProcessingRulesResults {
paths_metadatas_and_acceptance,
},
WalkerStage::GatheringFilePathsToRemove {
accepted_paths,
maybe_to_keep_walking,
accepted_ancestors,
non_indexed_paths,
} => Self::GatheringFilePathsToRemove {
accepted_paths,
maybe_to_keep_walking,
accepted_ancestors,
non_indexed_paths,
},
WalkerStage::Finalize {
walking_entries,
accepted_ancestors,
to_remove_entries,
maybe_to_keep_walking,
non_indexed_paths,
} => Self::Finalize {
walking_entries,
accepted_ancestors,
to_remove_entries,
maybe_to_keep_walking,
non_indexed_paths,
},
}
}
}
impl From<WalkerStageSaveState> for WalkerStage {
fn from(value: WalkerStageSaveState) -> Self {
match value {
WalkerStageSaveState::Start => Self::Start,
WalkerStageSaveState::CollectingMetadata { found_paths } => {
Self::CollectingMetadata { found_paths }
}
WalkerStageSaveState::CheckingIndexerRules {
paths_and_metadatas,
} => Self::CheckingIndexerRules {
paths_and_metadatas,
},
WalkerStageSaveState::ProcessingRulesResults {
paths_metadatas_and_acceptance,
} => Self::ProcessingRulesResults {
paths_metadatas_and_acceptance,
},
WalkerStageSaveState::GatheringFilePathsToRemove {
accepted_paths,
maybe_to_keep_walking,
accepted_ancestors,
non_indexed_paths,
} => Self::GatheringFilePathsToRemove {
accepted_paths,
maybe_to_keep_walking,
accepted_ancestors,
non_indexed_paths,
},
WalkerStageSaveState::Finalize {
walking_entries,
accepted_ancestors,
to_remove_entries,
maybe_to_keep_walking,
non_indexed_paths,
} => Self::Finalize {
walking_entries,
accepted_ancestors,
to_remove_entries,
maybe_to_keep_walking,
non_indexed_paths,
},
}
}
}
impl<DBProxy, IsoPathFactory> SerializableTask<Error> for Walker<DBProxy, IsoPathFactory>
where
DBProxy: WalkerDBProxy,
IsoPathFactory: IsoFilePathFactory,
{
type SerializeError = rmp_serde::encode::Error;
type DeserializeError = rmp_serde::decode::Error;
type DeserializeCtx = (IndexerRuler, DBProxy, IsoPathFactory);
async fn serialize(self) -> Result<Vec<u8>, Self::SerializeError> {
let Self {
id,
entry,
root,
entry_iso_file_path,
stage,
errors,
scan_time,
is_shallow,
..
} = self;
rmp_serde::to_vec_named(&WalkDirSaveState {
id,
is_shallow,
entry,
root,
entry_iso_file_path,
stage: stage.into(),
errors,
scan_time,
})
}
async fn deserialize(
data: &[u8],
(indexer_ruler, db_proxy, iso_file_path_factory): Self::DeserializeCtx,
) -> Result<Self, Self::DeserializeError> {
rmp_serde::from_slice(data).map(
|WalkDirSaveState {
id,
entry,
root,
entry_iso_file_path,
stage,
errors,
scan_time,
is_shallow,
}| Self {
id,
entry,
root,
entry_iso_file_path,
indexer_ruler,
iso_file_path_factory,
db_proxy,
stage: stage.into(),
errors,
scan_time,
is_shallow,
},
)
}
}

View file

@ -1,5 +1,4 @@
use crate::Error;
use sd_task_system::{DispatcherShutdownError, Task};
use sd_utils::error::FileIOError;
use prisma_client_rust::QueryError;
@ -17,9 +16,6 @@ pub enum JobSystemError {
already_running_id: JobId,
},
#[error("job canceled: <id='{0}'>")]
Canceled(JobId),
#[error("failed to load job reports from database to resume jobs: {0}")]
LoadReportsForResume(#[from] QueryError),
@ -34,9 +30,6 @@ pub enum JobSystemError {
#[error(transparent)]
Report(#[from] ReportError),
#[error(transparent)]
Processing(#[from] Error),
}
impl From<JobSystemError> for rspc::Error {
@ -45,17 +38,36 @@ impl From<JobSystemError> for rspc::Error {
JobSystemError::NotFound(_) => {
Self::with_cause(rspc::ErrorCode::NotFound, e.to_string(), e)
}
JobSystemError::AlreadyRunning { .. } => {
Self::with_cause(rspc::ErrorCode::Conflict, e.to_string(), e)
}
JobSystemError::Canceled(_) => {
Self::with_cause(rspc::ErrorCode::ClientClosedRequest, e.to_string(), e)
}
JobSystemError::Processing(e) => e.into(),
JobSystemError::Report(e) => e.into(),
_ => Self::with_cause(rspc::ErrorCode::InternalServerError, e.to_string(), e),
}
}
}
#[derive(thiserror::Error, Debug)]
pub enum DispatcherError {
#[error("job canceled: <id='{0}'>")]
JobCanceled(JobId),
#[error("system entered on shutdown mode <task_count={}>", .0.len())]
Shutdown(Vec<Box<dyn Task<crate::Error>>>),
}
#[derive(Debug, thiserror::Error)]
pub enum JobErrorOrDispatcherError<JobError: Into<crate::Error>> {
#[error(transparent)]
JobError(#[from] JobError),
#[error(transparent)]
Dispatcher(#[from] DispatcherError),
}
impl From<DispatcherShutdownError<crate::Error>> for DispatcherError {
fn from(DispatcherShutdownError(tasks): DispatcherShutdownError<crate::Error>) -> Self {
Self::Shutdown(tasks)
}
}

File diff suppressed because it is too large Load diff

View file

@ -1,16 +1,22 @@
use crate::Error;
use crate::{Error, JobContext};
use sd_prisma::prisma::location;
use sd_task_system::BaseTaskDispatcher;
use sd_utils::error::FileIOError;
use std::{cell::RefCell, collections::hash_map::HashMap, path::Path, sync::Arc};
use std::{
cell::RefCell,
collections::hash_map::HashMap,
panic,
path::{Path, PathBuf},
sync::Arc,
};
use async_channel as chan;
use futures::Stream;
use futures_concurrency::future::{Join, TryJoin};
use tokio::{fs, spawn, sync::oneshot, task::JoinHandle};
use tracing::{error, info, trace, warn};
use tracing::{debug, error, info, instrument, trace, warn};
use uuid::Uuid;
mod error;
@ -20,8 +26,9 @@ mod runner;
mod store;
pub mod utils;
use error::JobSystemError;
pub use error::{DispatcherError, JobErrorOrDispatcherError, JobSystemError};
use job::{IntoJob, Job, JobName, JobOutput, OuterContext};
use report::Report;
use runner::{run, JobSystemRunner, RunnerMessage};
use store::{load_jobs, StoredJobEntry};
@ -36,22 +43,23 @@ pub enum Command {
Pause,
Resume,
Cancel,
Shutdown,
}
pub struct JobSystem<Ctx: OuterContext> {
msgs_tx: chan::Sender<RunnerMessage<Ctx>>,
job_outputs_rx: chan::Receiver<(JobId, Result<JobOutput, JobSystemError>)>,
pub struct JobSystem<OuterCtx: OuterContext, JobCtx: JobContext<OuterCtx>> {
msgs_tx: chan::Sender<RunnerMessage<OuterCtx, JobCtx>>,
job_outputs_rx: chan::Receiver<(JobId, Result<JobOutput, Error>)>,
store_jobs_file: Arc<PathBuf>,
runner_handle: RefCell<Option<JoinHandle<()>>>,
}
impl<Ctx: OuterContext> JobSystem<Ctx> {
pub async fn new(
impl<OuterCtx: OuterContext, JobCtx: JobContext<OuterCtx>> JobSystem<OuterCtx, JobCtx> {
pub fn new(
base_dispatcher: BaseTaskDispatcher<Error>,
data_directory: impl AsRef<Path> + Send,
previously_existing_contexts: &HashMap<Uuid, Ctx>,
) -> Result<Self, JobSystemError> {
data_directory: impl AsRef<Path>,
) -> Self {
let (job_outputs_tx, job_outputs_rx) = chan::unbounded();
let (job_return_status_tx, job_return_status_rx) = chan::bounded(16);
let (job_done_tx, job_done_rx) = chan::bounded(16);
let (msgs_tx, msgs_rx) = chan::bounded(8);
let store_jobs_file = Arc::new(data_directory.as_ref().join(PENDING_JOBS_FILE));
@ -63,8 +71,8 @@ impl<Ctx: OuterContext> JobSystem<Ctx> {
while let Err(e) = spawn({
let store_jobs_file = Arc::clone(&store_jobs_file);
let base_dispatcher = base_dispatcher.clone();
let job_return_status_tx = job_return_status_tx.clone();
let job_return_status_rx = job_return_status_rx.clone();
let job_return_status_tx = job_done_tx.clone();
let job_done_rx = job_done_rx.clone();
let job_outputs_tx = job_outputs_tx.clone();
let msgs_rx = msgs_rx.clone();
@ -77,7 +85,7 @@ impl<Ctx: OuterContext> JobSystem<Ctx> {
),
store_jobs_file.as_ref(),
msgs_rx,
job_return_status_rx,
job_done_rx,
)
.await;
}
@ -85,7 +93,7 @@ impl<Ctx: OuterContext> JobSystem<Ctx> {
.await
{
if e.is_panic() {
error!("Job system panicked: {e:#?}");
error!(?e, "Job system panicked;");
} else {
trace!("JobSystemRunner received shutdown signal and will exit...");
break;
@ -97,22 +105,47 @@ impl<Ctx: OuterContext> JobSystem<Ctx> {
}
})));
load_stored_job_entries(
store_jobs_file.as_ref(),
previously_existing_contexts,
&msgs_tx,
)
.await?;
Ok(Self {
Self {
msgs_tx,
job_outputs_rx,
store_jobs_file,
runner_handle,
})
}
}
pub async fn init(
&self,
previously_existing_contexts: &HashMap<Uuid, OuterCtx>,
) -> Result<(), JobSystemError> {
load_stored_job_entries(
&*self.store_jobs_file,
previously_existing_contexts,
&self.msgs_tx,
)
.await
}
/// Get a map of all active reports with their respective job ids
///
/// # Panics
///
/// Panics only happen if internal channels are unexpectedly closed
pub async fn get_active_reports(&self) -> HashMap<JobId, Report> {
let (ack_tx, ack_rx) = oneshot::channel();
self.msgs_tx
.send(RunnerMessage::GetActiveReports { ack_tx })
.await
.expect("runner msgs channel unexpectedly closed on get active reports request");
ack_rx
.await
.expect("ack channel closed before receiving get active reports response")
}
/// Checks if *any* of the desired jobs is running for the desired location
///
/// # Panics
///
/// Panics only happen if internal channels are unexpectedly closed
pub async fn check_running_jobs(
&self,
@ -122,7 +155,7 @@ impl<Ctx: OuterContext> JobSystem<Ctx> {
let (ack_tx, ack_rx) = oneshot::channel();
self.msgs_tx
.send(RunnerMessage::CheckIfJobAreRunning {
.send(RunnerMessage::CheckIfJobsAreRunning {
job_names,
location_id,
ack_tx,
@ -136,7 +169,9 @@ impl<Ctx: OuterContext> JobSystem<Ctx> {
}
/// Shutdown the job system
///
/// # Panics
///
/// Panics only happen if internal channels are unexpectedly closed
pub async fn shutdown(&self) {
if let Some(handle) = self
@ -152,7 +187,7 @@ impl<Ctx: OuterContext> JobSystem<Ctx> {
if let Err(e) = handle.await {
if e.is_panic() {
error!("JobSystem panicked: {e:#?}");
error!(?e, "JobSystem panicked;");
}
}
info!("JobSystem gracefully shutdown");
@ -162,13 +197,15 @@ impl<Ctx: OuterContext> JobSystem<Ctx> {
}
/// Dispatch a new job to the system
///
/// # Panics
///
/// Panics only happen if internal channels are unexpectedly closed
pub async fn dispatch<J: Job + SerializableJob<Ctx>>(
&mut self,
job: impl IntoJob<J, Ctx> + Send,
pub async fn dispatch<J: Job + SerializableJob<OuterCtx>>(
&self,
job: impl IntoJob<J, OuterCtx, JobCtx> + Send,
location_id: location::id::Type,
ctx: Ctx,
ctx: OuterCtx,
) -> Result<JobId, JobSystemError> {
let dyn_job = job.into_job();
let id = dyn_job.id();
@ -176,7 +213,7 @@ impl<Ctx: OuterContext> JobSystem<Ctx> {
let (ack_tx, ack_rx) = oneshot::channel();
self.msgs_tx
.send(RunnerMessage::NewJob {
id,
job_id: id,
location_id,
dyn_job,
ctx,
@ -191,17 +228,35 @@ impl<Ctx: OuterContext> JobSystem<Ctx> {
.map(|()| id)
}
pub fn receive_job_outputs(
&self,
) -> impl Stream<Item = (JobId, Result<JobOutput, JobSystemError>)> {
/// Check if there are any active jobs for the desired [`OuterContext`]
///
/// # Panics
///
/// Panics only happen if internal channels are unexpectedly closed
pub async fn has_active_jobs(&self, ctx: OuterCtx) -> bool {
let ctx_id = ctx.id();
let (ack_tx, ack_rx) = oneshot::channel();
self.msgs_tx
.send(RunnerMessage::HasActiveJobs { ctx_id, ack_tx })
.await
.expect("runner msgs channel unexpectedly closed on has active jobs request");
ack_rx
.await
.expect("ack channel closed before receiving has active jobs response")
}
pub fn receive_job_outputs(&self) -> impl Stream<Item = (JobId, Result<JobOutput, Error>)> {
self.job_outputs_rx.clone()
}
async fn send_command(&self, id: JobId, command: Command) -> Result<(), JobSystemError> {
#[instrument(skip(self), err)]
async fn send_command(&self, job_id: JobId, command: Command) -> Result<(), JobSystemError> {
let (ack_tx, ack_rx) = oneshot::channel();
self.msgs_tx
.send(RunnerMessage::Command {
id,
job_id,
command,
ack_tx,
})
@ -215,38 +270,48 @@ impl<Ctx: OuterContext> JobSystem<Ctx> {
.unwrap_or_else(|_| panic!("ack channel closed before receiving {command:?} response"))
}
pub async fn pause(&self, id: JobId) -> Result<(), JobSystemError> {
self.send_command(id, Command::Pause).await
pub async fn pause(&self, job_id: JobId) -> Result<(), JobSystemError> {
self.send_command(job_id, Command::Pause).await
}
pub async fn resume(&self, id: JobId) -> Result<(), JobSystemError> {
self.send_command(id, Command::Resume).await
pub async fn resume(&self, job_id: JobId) -> Result<(), JobSystemError> {
self.send_command(job_id, Command::Resume).await
}
pub async fn cancel(&self, id: JobId) -> Result<(), JobSystemError> {
self.send_command(id, Command::Cancel).await
pub async fn cancel(&self, job_id: JobId) -> Result<(), JobSystemError> {
self.send_command(job_id, Command::Cancel).await
}
}
/// SAFETY: Due to usage of refcell we lost `Sync` impl, but we only use it to have a shutdown method
/// receiving `&self` which is called once, and we also use `try_borrow_mut` so we never panic
unsafe impl<Ctx: OuterContext> Sync for JobSystem<Ctx> {}
unsafe impl<OuterCtx: OuterContext, JobCtx: JobContext<OuterCtx>> Sync
for JobSystem<OuterCtx, JobCtx>
{
}
async fn load_stored_job_entries<Ctx: OuterContext>(
async fn load_stored_job_entries<OuterCtx: OuterContext, JobCtx: JobContext<OuterCtx>>(
store_jobs_file: impl AsRef<Path> + Send,
previously_existing_job_contexts: &HashMap<Uuid, Ctx>,
msgs_tx: &chan::Sender<RunnerMessage<Ctx>>,
previously_existing_job_contexts: &HashMap<Uuid, OuterCtx>,
msgs_tx: &chan::Sender<RunnerMessage<OuterCtx, JobCtx>>,
) -> Result<(), JobSystemError> {
let store_jobs_file = store_jobs_file.as_ref();
let stores_jobs_by_db = rmp_serde::from_slice::<HashMap<Uuid, Vec<StoredJobEntry>>>(
&fs::read(store_jobs_file).await.map_err(|e| {
JobSystemError::StoredJobs(FileIOError::from((
store_jobs_file,
e,
"Failed to load jobs from disk",
)))
})?,
&match fs::read(store_jobs_file).await {
Ok(bytes) => bytes,
Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
debug!("No pending jobs found on disk");
return Ok(());
}
Err(e) => {
return Err(JobSystemError::StoredJobs(FileIOError::from((
store_jobs_file,
e,
"Failed to load jobs from disk",
))))
}
},
)?;
stores_jobs_by_db
@ -254,7 +319,7 @@ async fn load_stored_job_entries<Ctx: OuterContext>(
.filter_map(|(ctx_id, entries)| {
previously_existing_job_contexts.get(&ctx_id).map_or_else(
|| {
warn!("Found stored jobs for a database that doesn't exist anymore: <ctx_id='{ctx_id}'>");
warn!(%ctx_id, "Found stored jobs for a database that doesn't exist anymore;");
None
},
|ctx| Some((entries, ctx.clone())),
@ -270,7 +335,7 @@ async fn load_stored_job_entries<Ctx: OuterContext>(
.await
.into_iter()
.filter_map(|res| {
res.map_err(|e| error!("Failed to load stored jobs: {e:#?}"))
res.map_err(|e| error!(?e, "Failed to load stored jobs;"))
.ok()
})
.flat_map(|(stored_jobs, ctx)| {
@ -283,7 +348,7 @@ async fn load_stored_job_entries<Ctx: OuterContext>(
msgs_tx
.send(RunnerMessage::ResumeStoredJob {
id: dyn_job.id(),
job_id: dyn_job.id(),
location_id,
dyn_job,
ctx,

View file

@ -1,14 +1,15 @@
use sd_prisma::prisma::{job, PrismaClient};
use crate::NonCriticalError;
use sd_prisma::prisma::{file_path, job, location, PrismaClient};
use sd_utils::db::{maybe_missing, MissingFieldError};
use std::{collections::HashMap, fmt, str::FromStr};
use std::{collections::HashMap, fmt, path::PathBuf, str::FromStr};
use chrono::{DateTime, Utc};
use prisma_client_rust::QueryError;
use serde::{Deserialize, Serialize};
use specta::Type;
use strum::ParseError;
use tracing::error;
use super::{job::JobName, JobId};
@ -22,10 +23,8 @@ pub enum ReportError {
InvalidJobStatusInt(i32),
#[error("job not found in database: <id='{0}'>")]
MissingReport(JobId),
#[error("serialization error: {0}")]
Serialization(#[from] rmp_serde::encode::Error),
#[error("deserialization error: {0}")]
Deserialization(#[from] rmp_serde::decode::Error),
#[error("json error: {0}")]
Json(#[from] serde_json::Error),
#[error(transparent)]
MissingField(#[from] MissingFieldError),
#[error("failed to parse job name from database: {0}")]
@ -44,10 +43,7 @@ impl From<ReportError> for rspc::Error {
ReportError::MissingReport(_) => {
Self::with_cause(rspc::ErrorCode::NotFound, e.to_string(), e)
}
ReportError::Serialization(_)
| ReportError::Deserialization(_)
| ReportError::MissingField(_)
| ReportError::JobNameParse(_) => {
ReportError::Json(_) | ReportError::MissingField(_) | ReportError::JobNameParse(_) => {
Self::with_cause(rspc::ErrorCode::InternalServerError, e.to_string(), e)
}
}
@ -55,21 +51,78 @@ impl From<ReportError> for rspc::Error {
}
#[derive(Debug, Serialize, Deserialize, Type, Clone)]
#[serde(rename_all = "snake_case")]
#[serde(tag = "type", content = "metadata")]
pub enum ReportMetadata {
Input(ReportInputMetadata),
Output(ReportOutputMetadata),
}
#[derive(Debug, Serialize, Deserialize, Type, Clone)]
#[serde(rename_all = "snake_case")]
#[serde(tag = "type", content = "data")]
pub enum ReportInputMetadata {
Placeholder,
// TODO: Add more types
// TODO: Add more variants as needed
Location(location::Data),
SubPath(PathBuf),
}
#[derive(Debug, Serialize, Deserialize, Type, Clone)]
#[serde(rename_all = "snake_case")]
#[serde(tag = "type", content = "data")]
pub enum ReportOutputMetadata {
Metrics(HashMap<String, serde_json::Value>),
// TODO: Add more types
Indexer {
total_paths: (u32, u32),
},
FileIdentifier {
total_orphan_paths: (u32, u32),
total_objects_created: (u32, u32),
total_objects_linked: (u32, u32),
},
MediaProcessor {
media_data_extracted: (u32, u32),
media_data_skipped: (u32, u32),
thumbnails_generated: (u32, u32),
thumbnails_skipped: (u32, u32),
},
Copier {
source_location_id: location::id::Type,
target_location_id: location::id::Type,
sources_file_path_ids: Vec<file_path::id::Type>,
target_location_relative_directory_path: PathBuf,
},
Mover {
source_location_id: location::id::Type,
target_location_id: location::id::Type,
sources_file_path_ids: Vec<file_path::id::Type>,
target_location_relative_directory_path: PathBuf,
},
Deleter {
location_id: location::id::Type,
file_path_ids: Vec<file_path::id::Type>,
},
Eraser {
location_id: location::id::Type,
file_path_ids: Vec<file_path::id::Type>,
passes: u32,
},
FileValidator {
location_id: location::id::Type,
sub_path: Option<PathBuf>,
},
}
impl From<ReportInputMetadata> for ReportMetadata {
fn from(value: ReportInputMetadata) -> Self {
Self::Input(value)
}
}
impl From<ReportOutputMetadata> for ReportMetadata {
fn from(value: ReportOutputMetadata) -> Self {
Self::Output(value)
}
}
#[derive(Debug, Serialize, Type, Clone)]
@ -80,7 +133,7 @@ pub struct Report {
pub metadata: Vec<ReportMetadata>,
pub critical_error: Option<String>,
pub non_critical_errors: Vec<String>,
pub non_critical_errors: Vec<NonCriticalError>,
pub created_at: Option<DateTime<Utc>>,
pub started_at: Option<DateTime<Utc>>,
@ -111,46 +164,53 @@ impl fmt::Display for Report {
impl TryFrom<job::Data> for Report {
type Error = ReportError;
fn try_from(data: job::Data) -> Result<Self, Self::Error> {
fn try_from(
job::Data {
id,
name,
action,
status,
errors_text: _, // Deprecated
critical_error,
non_critical_errors,
data: _, // Deprecated
metadata,
parent_id,
task_count,
completed_task_count,
date_estimated_completion,
date_created,
date_started,
date_completed,
..
}: job::Data,
) -> Result<Self, Self::Error> {
Ok(Self {
id: JobId::from_slice(&data.id).expect("corrupted database"),
name: JobName::from_str(&maybe_missing(data.name, "job.name")?)?,
action: data.action,
metadata: data
.metadata
.map(|m| {
rmp_serde::from_slice(&m).unwrap_or_else(|e| {
error!("Failed to deserialize job metadata: {e:#?}");
vec![]
})
})
.unwrap_or_default(),
critical_error: data.critical_error,
non_critical_errors: data.non_critical_errors.map_or_else(
Default::default,
|non_critical_errors| {
serde_json::from_slice(&non_critical_errors).unwrap_or_else(|e| {
error!("Failed to deserialize job non-critical errors: {e:#?}");
vec![]
})
},
),
created_at: data.date_created.map(DateTime::into),
started_at: data.date_started.map(DateTime::into),
completed_at: data.date_completed.map(DateTime::into),
parent_id: data
.parent_id
.map(|id| JobId::from_slice(&id).expect("corrupted database")),
status: Status::try_from(maybe_missing(data.status, "job.status")?)
id: JobId::from_slice(&id).expect("corrupted database"),
name: JobName::from_str(&maybe_missing(name, "job.name")?)?,
action,
metadata: if let Some(metadata) = metadata {
serde_json::from_slice(&metadata)?
} else {
vec![]
},
critical_error,
non_critical_errors: if let Some(non_critical_errors) = non_critical_errors {
serde_json::from_slice(&non_critical_errors)?
} else {
vec![]
},
created_at: date_created.map(DateTime::into),
started_at: date_started.map(DateTime::into),
completed_at: date_completed.map(DateTime::into),
parent_id: parent_id.map(|id| JobId::from_slice(&id).expect("corrupted database")),
status: Status::try_from(maybe_missing(status, "job.status")?)
.expect("corrupted database"),
task_count: data.task_count.unwrap_or(0),
completed_task_count: data.completed_task_count.unwrap_or(0),
task_count: task_count.unwrap_or(0),
completed_task_count: completed_task_count.unwrap_or(0),
phase: String::new(),
message: String::new(),
estimated_completion: data
.date_estimated_completion
.map_or_else(Utc::now, DateTime::into),
estimated_completion: date_estimated_completion.map_or_else(Utc::now, DateTime::into),
})
}
}
@ -178,6 +238,10 @@ impl Report {
}
}
pub fn push_metadata(&mut self, metadata: ReportOutputMetadata) {
self.metadata.push(metadata.into());
}
#[must_use]
pub fn get_action_name_and_group_key(&self) -> (String, Option<String>) {
// actions are formatted like "added_location" or "added_location-1"
@ -197,9 +261,11 @@ impl Report {
(action_name, Some(group_key))
}
pub async fn create(&mut self, db: &PrismaClient) -> Result<(), ReportError> {
let now = Utc::now();
pub async fn create(
&mut self,
db: &PrismaClient,
created_at: DateTime<Utc>,
) -> Result<(), ReportError> {
db.job()
.create(
self.id.as_bytes().to_vec(),
@ -207,11 +273,11 @@ impl Report {
[
job::name::set(Some(self.name.to_string())),
job::action::set(self.action.clone()),
job::date_created::set(Some(now.into())),
job::metadata::set(Some(rmp_serde::to_vec(&self.metadata)?)),
job::date_created::set(Some(created_at.into())),
job::metadata::set(Some(serde_json::to_vec(&self.metadata)?)),
job::status::set(Some(self.status as i32)),
job::date_started::set(self.started_at.map(Into::into)),
job::task_count::set(Some(1)),
job::task_count::set(Some(0)),
job::completed_task_count::set(Some(0)),
],
[self
@ -224,7 +290,7 @@ impl Report {
.map_err(ReportError::Create)?;
// Only setting created_at after we successfully created the job in DB
self.created_at = Some(now);
self.created_at = Some(created_at);
Ok(())
}
@ -236,10 +302,10 @@ impl Report {
vec![
job::status::set(Some(self.status as i32)),
job::critical_error::set(self.critical_error.clone()),
job::non_critical_errors::set(Some(rmp_serde::to_vec(
job::non_critical_errors::set(Some(serde_json::to_vec(
&self.non_critical_errors,
)?)),
job::metadata::set(Some(rmp_serde::to_vec(&self.metadata)?)),
job::metadata::set(Some(serde_json::to_vec(&self.metadata)?)),
job::task_count::set(Some(self.task_count)),
job::completed_task_count::set(Some(self.completed_task_count)),
job::date_started::set(self.started_at.map(Into::into)),
@ -347,7 +413,7 @@ impl ReportBuilder {
#[must_use]
pub fn with_metadata(mut self, metadata: ReportInputMetadata) -> Self {
self.metadata.push(ReportMetadata::Input(metadata));
self.metadata.push(metadata.into());
self
}

View file

@ -1,4 +1,4 @@
use crate::Error;
use crate::{Error, JobContext};
use sd_prisma::prisma::location;
use sd_task_system::BaseTaskDispatcher;
@ -15,19 +15,23 @@ use std::{
use async_channel as chan;
use chrono::Utc;
use futures::StreamExt;
use futures_concurrency::{future::TryJoin, stream::Merge};
use futures_concurrency::{
future::{Join, TryJoin},
stream::Merge,
};
use serde_json::json;
use tokio::{
fs,
sync::oneshot,
time::{interval_at, Instant},
};
use tokio_stream::wrappers::IntervalStream;
use tracing::{debug, error, info, warn};
use tracing::{debug, error, info, instrument, trace, warn};
use uuid::Uuid;
use super::{
job::{DynJob, JobHandle, JobName, JobOutput, OuterContext, ReturnStatus},
report,
report::{self, ReportOutputMetadata},
store::{StoredJob, StoredJobEntry},
Command, JobId, JobSystemError, SerializedTasks,
};
@ -35,61 +39,76 @@ use super::{
const JOBS_INITIAL_CAPACITY: usize = 32;
const FIVE_MINUTES: Duration = Duration::from_secs(5 * 60);
pub(super) enum RunnerMessage<Ctx: OuterContext> {
pub(super) enum RunnerMessage<OuterCtx: OuterContext, JobCtx: JobContext<OuterCtx>> {
NewJob {
id: JobId,
job_id: JobId,
location_id: location::id::Type,
dyn_job: Box<dyn DynJob<Ctx>>,
ctx: Ctx,
dyn_job: Box<dyn DynJob<OuterCtx, JobCtx>>,
ctx: OuterCtx,
ack_tx: oneshot::Sender<Result<(), JobSystemError>>,
},
ResumeStoredJob {
id: JobId,
job_id: JobId,
location_id: location::id::Type,
dyn_job: Box<dyn DynJob<Ctx>>,
ctx: Ctx,
dyn_job: Box<dyn DynJob<OuterCtx, JobCtx>>,
ctx: OuterCtx,
serialized_tasks: Option<SerializedTasks>,
ack_tx: oneshot::Sender<Result<(), JobSystemError>>,
},
Command {
id: JobId,
job_id: JobId,
command: Command,
ack_tx: oneshot::Sender<Result<(), JobSystemError>>,
},
CheckIfJobAreRunning {
GetActiveReports {
ack_tx: oneshot::Sender<HashMap<JobId, report::Report>>,
},
CheckIfJobsAreRunning {
job_names: Vec<JobName>,
location_id: location::id::Type,
ack_tx: oneshot::Sender<bool>,
},
Shutdown,
HasActiveJobs {
ctx_id: Uuid,
ack_tx: oneshot::Sender<bool>,
},
}
pub(super) struct JobSystemRunner<Ctx: OuterContext> {
base_dispatcher: BaseTaskDispatcher<Error>,
handles: HashMap<JobId, JobHandle<Ctx>>,
struct JobsWorktables {
job_hashes: HashMap<u64, JobId>,
job_hashes_by_id: HashMap<JobId, u64>,
running_jobs_by_job_id: HashMap<JobId, (JobName, location::id::Type)>,
running_jobs_set: HashSet<(JobName, location::id::Type)>,
jobs_to_store_by_ctx_id: HashMap<Uuid, Vec<StoredJobEntry>>,
job_return_status_tx: chan::Sender<(JobId, Result<ReturnStatus, Error>)>,
job_outputs_tx: chan::Sender<(JobId, Result<JobOutput, JobSystemError>)>,
}
impl<Ctx: OuterContext> JobSystemRunner<Ctx> {
pub(super) struct JobSystemRunner<OuterCtx: OuterContext, JobCtx: JobContext<OuterCtx>> {
on_shutdown_mode: bool,
base_dispatcher: BaseTaskDispatcher<Error>,
handles: HashMap<JobId, JobHandle<OuterCtx, JobCtx>>,
worktables: JobsWorktables,
job_return_status_tx: chan::Sender<(JobId, Result<ReturnStatus, Error>)>,
job_outputs_tx: chan::Sender<(JobId, Result<JobOutput, Error>)>,
}
impl<OuterCtx: OuterContext, JobCtx: JobContext<OuterCtx>> JobSystemRunner<OuterCtx, JobCtx> {
pub(super) fn new(
base_dispatcher: BaseTaskDispatcher<Error>,
job_return_status_tx: chan::Sender<(JobId, Result<ReturnStatus, Error>)>,
job_outputs_tx: chan::Sender<(JobId, Result<JobOutput, JobSystemError>)>,
job_outputs_tx: chan::Sender<(JobId, Result<JobOutput, Error>)>,
) -> Self {
Self {
on_shutdown_mode: false,
base_dispatcher,
handles: HashMap::with_capacity(JOBS_INITIAL_CAPACITY),
job_hashes: HashMap::with_capacity(JOBS_INITIAL_CAPACITY),
job_hashes_by_id: HashMap::with_capacity(JOBS_INITIAL_CAPACITY),
running_jobs_by_job_id: HashMap::with_capacity(JOBS_INITIAL_CAPACITY),
running_jobs_set: HashSet::with_capacity(JOBS_INITIAL_CAPACITY),
jobs_to_store_by_ctx_id: HashMap::new(),
worktables: JobsWorktables {
job_hashes: HashMap::with_capacity(JOBS_INITIAL_CAPACITY),
job_hashes_by_id: HashMap::with_capacity(JOBS_INITIAL_CAPACITY),
running_jobs_by_job_id: HashMap::with_capacity(JOBS_INITIAL_CAPACITY),
running_jobs_set: HashSet::with_capacity(JOBS_INITIAL_CAPACITY),
jobs_to_store_by_ctx_id: HashMap::new(),
},
job_return_status_tx,
job_outputs_tx,
}
@ -97,42 +116,43 @@ impl<Ctx: OuterContext> JobSystemRunner<Ctx> {
async fn new_job(
&mut self,
id: JobId,
job_id: JobId,
location_id: location::id::Type,
dyn_job: Box<dyn DynJob<Ctx>>,
ctx: Ctx,
dyn_job: Box<dyn DynJob<OuterCtx, JobCtx>>,
ctx: OuterCtx,
maybe_existing_tasks: Option<SerializedTasks>,
) -> Result<(), JobSystemError> {
let Self {
base_dispatcher,
handles,
job_hashes,
job_hashes_by_id,
worktables:
JobsWorktables {
job_hashes,
job_hashes_by_id,
running_jobs_by_job_id,
running_jobs_set,
..
},
job_return_status_tx,
running_jobs_by_job_id,
running_jobs_set,
..
} = self;
let db = ctx.db();
let job_name = dyn_job.job_name();
let job_hash = dyn_job.hash();
if let Some(&already_running_id) = job_hashes.get(&job_hash) {
return Err(JobSystemError::AlreadyRunning {
new_id: id,
new_id: job_id,
already_running_id,
job_name,
});
}
running_jobs_by_job_id.insert(id, (job_name, location_id));
running_jobs_by_job_id.insert(job_id, (job_name, location_id));
running_jobs_set.insert((job_name, location_id));
job_hashes.insert(job_hash, id);
job_hashes_by_id.insert(id, job_hash);
let start_time = Utc::now();
job_hashes.insert(job_hash, job_id);
job_hashes_by_id.insert(job_id, job_hash);
let mut handle = if maybe_existing_tasks.is_some() {
dyn_job.resume(
@ -149,174 +169,220 @@ impl<Ctx: OuterContext> JobSystemRunner<Ctx> {
)
};
handle.report.status = report::Status::Running;
if handle.report.started_at.is_none() {
handle.report.started_at = Some(start_time);
}
handle.register_start(Utc::now()).await?;
// If the report doesn't have a created_at date, it's a new report
if handle.report.created_at.is_none() {
handle.report.create(db).await?;
} else {
// Otherwise it can be a job being resumed or a children job that was already been created
handle.report.update(db).await?;
}
// Registering children jobs
handle
.next_jobs
.iter_mut()
.map(|dyn_job| dyn_job.report_mut())
.map(|next_job_report| async {
if next_job_report.created_at.is_none() {
next_job_report.create(ctx.db()).await
} else {
Ok(())
}
})
.collect::<Vec<_>>()
.try_join()
.await?;
handles.insert(id, handle);
handles.insert(job_id, handle);
Ok(())
}
async fn process_command(&mut self, id: JobId, command: Command) -> Result<(), JobSystemError> {
if let Some(handle) = self.handles.get_mut(&id) {
handle.send_command(command).await?;
Ok(())
async fn get_active_reports(&self) -> HashMap<JobId, report::Report> {
self.handles
.iter()
.map(|(job_id, handle)| async { (*job_id, handle.ctx.report().await.clone()) })
.collect::<Vec<_>>()
.join()
.await
.into_iter()
.collect()
}
async fn process_command(
&mut self,
job_id: JobId,
command: Command,
ack_tx: oneshot::Sender<Result<(), JobSystemError>>,
) {
if let Some(handle) = self.handles.get_mut(&job_id) {
match (command, handle.is_running) {
(Command::Pause, false) => {
warn!("Tried to pause a job already paused");
return ack_tx.send(Ok(())).expect(
"ack channel closed before sending response to already paused job",
);
}
(Command::Resume, true) => {
warn!("Tried to resume a job already running");
return ack_tx.send(Ok(())).expect(
"ack channel closed before sending response to already running job",
);
}
_ => {}
}
match command {
Command::Pause | Command::Cancel | Command::Shutdown => {
handle.is_running = false;
}
Command::Resume => {
handle.is_running = true;
}
}
handle.send_command(command, ack_tx).await;
handle.ctx.invalidate_query("jobs.isActive");
handle.ctx.invalidate_query("jobs.reports");
} else {
Err(JobSystemError::NotFound(id))
error!("Job not found");
ack_tx
.send(Err(JobSystemError::NotFound(job_id)))
.unwrap_or_else(|_| {
panic!("ack channel closed before sending {command:?} response")
});
}
}
fn is_empty(&self) -> bool {
self.handles.is_empty() && self.job_hashes.is_empty() && self.job_hashes_by_id.is_empty()
self.handles.is_empty()
&& self.worktables.job_hashes.is_empty()
&& self.worktables.job_hashes_by_id.is_empty()
}
fn check_if_job_are_running(
fn total_jobs(&self) -> usize {
self.handles.len()
}
fn check_if_jobs_are_running(
&self,
job_names: Vec<JobName>,
location_id: location::id::Type,
) -> bool {
job_names
.into_iter()
.any(|job_name| self.running_jobs_set.contains(&(job_name, location_id)))
job_names.into_iter().any(|job_name| {
self.worktables
.running_jobs_set
.contains(&(job_name, location_id))
})
}
async fn process_return_status(&mut self, job_id: JobId, status: Result<ReturnStatus, Error>) {
#[instrument(skip_all, fields(%job_id))]
async fn process_return_status(
&mut self,
job_id: JobId,
status: Result<ReturnStatus, Error>,
) -> Result<(), JobSystemError> {
let Self {
on_shutdown_mode,
handles,
job_hashes,
job_hashes_by_id,
worktables,
job_outputs_tx,
job_return_status_tx,
base_dispatcher,
jobs_to_store_by_ctx_id,
running_jobs_by_job_id,
running_jobs_set,
..
} = self;
let job_hash = job_hashes_by_id.remove(&job_id).expect("it must be here");
let (job_name, location_id) = running_jobs_by_job_id
let job_hash = worktables
.job_hashes_by_id
.remove(&job_id)
.expect("it must be here");
let (job_name, location_id) = worktables
.running_jobs_by_job_id
.remove(&job_id)
.expect("a JobName and location_id must've been inserted in the map with the job id");
assert!(running_jobs_set.remove(&(job_name, location_id)));
assert!(job_hashes.remove(&job_hash).is_some());
assert!(worktables.running_jobs_set.remove(&(job_name, location_id)));
assert!(worktables.job_hashes.remove(&job_hash).is_some());
let mut handle = handles.remove(&job_id).expect("it must be here");
handle.run_time += handle.start_time.elapsed();
handle
.ctx
.report_mut()
.await
.push_metadata(ReportOutputMetadata::Metrics(HashMap::from([(
"job_run_time".into(),
json!(handle.run_time),
)])));
let res = match status {
Ok(ReturnStatus::Completed(job_return)) => {
try_dispatch_next_job(
&mut handle,
location_id,
base_dispatcher.clone(),
(job_hashes, job_hashes_by_id),
worktables,
handles,
job_return_status_tx.clone(),
);
)
.await?;
handle.complete_job(job_return).await
handle.complete_job(job_return).await.map_err(Into::into)
}
Ok(ReturnStatus::Shutdown(Ok(Some(serialized_job)))) => {
let name = handle.report.name;
Ok(ReturnStatus::Shutdown(res)) => {
match res {
Ok(Some(serialized_job)) => {
let name = {
let db = handle.ctx.db();
let mut report = handle.ctx.report_mut().await;
if let Err(e) = report.update(db).await {
error!(?e, "Failed to update report on job shutdown;");
}
report.name
};
let Ok(next_jobs) = handle
.next_jobs
.into_iter()
.map(|next_job| async move {
let next_id = next_job.id();
let next_name = next_job.job_name();
next_job
.serialize()
.await
.map(|maybe_serialized_job| {
maybe_serialized_job.map(|serialized_job| StoredJob {
id: next_id,
name: next_name,
worktables
.jobs_to_store_by_ctx_id
.entry(handle.ctx.id())
.or_default()
.push(StoredJobEntry {
location_id,
root_job: StoredJob {
id: job_id,
run_time: handle.start_time.elapsed(),
name,
serialized_job,
})
})
.map_err(|e| {
error!(
"Failed to serialize next job: \
<parent_id='{job_id}', parent_name='{name}', \
next_id='{next_id}', next_name='{next_name}'>: {e:#?}"
);
})
})
.collect::<Vec<_>>()
.try_join()
.await
else {
return;
};
},
next_jobs: serialize_next_jobs_to_shutdown(
job_id,
job_name,
handle.next_jobs,
)
.await
.unwrap_or_default(),
});
jobs_to_store_by_ctx_id
.entry(handle.ctx.id())
.or_default()
.push(StoredJobEntry {
location_id,
root_job: StoredJob {
id: job_id,
name,
serialized_job,
},
next_jobs: next_jobs.into_iter().flatten().collect(),
});
debug!(%name, "Job was shutdown and serialized;");
}
return;
Ok(None) => {
debug!(
"Job was shutdown but didn't returned any serialized data, \
probably it isn't resumable job"
);
}
Err(e) => {
error!(?e, "Failed to serialize job;");
}
}
if *on_shutdown_mode && handles.is_empty() {
// Job system is empty and in shutdown mode so we close this channel to finish the shutdown process
job_return_status_tx.close();
}
return Ok(());
}
Ok(ReturnStatus::Shutdown(Ok(None))) => {
debug!(
"Job was shutdown but didn't returned any serialized data, \
probably it isn't resumable job: <id='{job_id}'>"
);
return;
Ok(ReturnStatus::Canceled(job_return)) => {
handle.cancel_job(job_return).await.map_err(Into::into)
}
Ok(ReturnStatus::Shutdown(Err(e))) => {
error!("Failed to serialize job: {e:#?}");
return;
}
Ok(ReturnStatus::Canceled) => handle
.cancel_job()
Err(e) => handle
.failed_job(&e)
.await
.and_then(|()| Err(JobSystemError::Canceled(job_id))),
Err(e) => handle.failed_job(&e).await.and_then(|()| Err(e.into())),
.map_err(Into::into)
.and_then(|()| Err(e)),
};
job_outputs_tx
.send((job_id, res))
.await
.expect("job outputs channel unexpectedly closed on job completion");
handle.ctx.invalidate_query("jobs.isActive");
handle.ctx.invalidate_query("jobs.reports");
Ok(())
}
fn clean_memory(&mut self) {
@ -326,28 +392,34 @@ impl<Ctx: OuterContext> JobSystemRunner<Ctx> {
self.handles.shrink_to(JOBS_INITIAL_CAPACITY);
}
if self.job_hashes.capacity() > JOBS_INITIAL_CAPACITY
&& self.job_hashes.len() < JOBS_INITIAL_CAPACITY
if self.worktables.job_hashes.capacity() > JOBS_INITIAL_CAPACITY
&& self.worktables.job_hashes.len() < JOBS_INITIAL_CAPACITY
{
self.job_hashes.shrink_to(JOBS_INITIAL_CAPACITY);
self.worktables.job_hashes.shrink_to(JOBS_INITIAL_CAPACITY);
}
if self.job_hashes_by_id.capacity() > JOBS_INITIAL_CAPACITY
&& self.job_hashes_by_id.len() < JOBS_INITIAL_CAPACITY
if self.worktables.job_hashes_by_id.capacity() > JOBS_INITIAL_CAPACITY
&& self.worktables.job_hashes_by_id.len() < JOBS_INITIAL_CAPACITY
{
self.job_hashes_by_id.shrink_to(JOBS_INITIAL_CAPACITY);
self.worktables
.job_hashes_by_id
.shrink_to(JOBS_INITIAL_CAPACITY);
}
if self.running_jobs_by_job_id.capacity() > JOBS_INITIAL_CAPACITY
&& self.running_jobs_by_job_id.len() < JOBS_INITIAL_CAPACITY
if self.worktables.running_jobs_by_job_id.capacity() > JOBS_INITIAL_CAPACITY
&& self.worktables.running_jobs_by_job_id.len() < JOBS_INITIAL_CAPACITY
{
self.running_jobs_by_job_id.shrink_to(JOBS_INITIAL_CAPACITY);
self.worktables
.running_jobs_by_job_id
.shrink_to(JOBS_INITIAL_CAPACITY);
}
if self.running_jobs_set.capacity() > JOBS_INITIAL_CAPACITY
&& self.running_jobs_set.len() < JOBS_INITIAL_CAPACITY
if self.worktables.running_jobs_set.capacity() > JOBS_INITIAL_CAPACITY
&& self.worktables.running_jobs_set.len() < JOBS_INITIAL_CAPACITY
{
self.running_jobs_set.shrink_to(JOBS_INITIAL_CAPACITY);
self.worktables
.running_jobs_set
.shrink_to(JOBS_INITIAL_CAPACITY);
}
}
@ -359,9 +431,13 @@ impl<Ctx: OuterContext> JobSystemRunner<Ctx> {
let Self {
handles,
job_hashes,
job_hashes_by_id,
jobs_to_store_by_ctx_id,
worktables:
JobsWorktables {
job_hashes,
job_hashes_by_id,
jobs_to_store_by_ctx_id,
..
},
..
} = self;
@ -382,23 +458,113 @@ impl<Ctx: OuterContext> JobSystemRunner<Ctx> {
.await
.map_err(|e| JobSystemError::StoredJobs(FileIOError::from((store_jobs_file, e))))
}
fn has_active_jobs(&self, ctx_id: Uuid) -> bool {
self.handles
.values()
.any(|handle| handle.ctx.id() == ctx_id && handle.is_running)
}
async fn dispatch_shutdown_command_to_jobs(&mut self) {
self.handles
.values_mut()
.map(|handle| async move {
let (tx, rx) = oneshot::channel();
handle.send_command(Command::Shutdown, tx).await;
rx.await.expect("Worker failed to ack shutdown request")
})
.collect::<Vec<_>>()
.join()
.await
.into_iter()
.for_each(|res| {
if let Err(e) = res {
error!(?e, "Failed to shutdown job;");
}
});
}
}
fn try_dispatch_next_job<Ctx: OuterContext>(
handle: &mut JobHandle<Ctx>,
#[instrument(skip(next_jobs))]
async fn serialize_next_jobs_to_shutdown<OuterCtx: OuterContext, JobCtx: JobContext<OuterCtx>>(
parent_job_id: JobId,
parent_job_name: JobName,
next_jobs: impl IntoIterator<Item = Box<dyn DynJob<OuterCtx, JobCtx>>> + Send,
) -> Option<Vec<StoredJob>> {
next_jobs
.into_iter()
.map(|next_job| async move {
let next_id = next_job.id();
let next_name = next_job.job_name();
next_job
.serialize()
.await
.map(|maybe_serialized_job| {
maybe_serialized_job.map(|serialized_job| StoredJob {
id: next_id,
run_time: Duration::ZERO,
name: next_name,
serialized_job,
})
})
.map_err(|e| {
error!(%next_id, %next_name, ?e, "Failed to serialize next job;");
})
})
.collect::<Vec<_>>()
.try_join()
.await
.map(|maybe_serialized_next_jobs| {
maybe_serialized_next_jobs.into_iter().flatten().collect()
})
.ok()
}
#[instrument(
skip_all,
fields(
job_id = %handle.id,
next_jobs_count = handle.next_jobs.len(),
location_id = %location_id,
total_running_jobs = handles.len(),
)
)]
async fn try_dispatch_next_job<OuterCtx: OuterContext, JobCtx: JobContext<OuterCtx>>(
handle: &mut JobHandle<OuterCtx, JobCtx>,
location_id: location::id::Type,
base_dispatcher: BaseTaskDispatcher<Error>,
(job_hashes, job_hashes_by_id): (&mut HashMap<u64, JobId>, &mut HashMap<JobId, u64>),
handles: &mut HashMap<JobId, JobHandle<Ctx>>,
JobsWorktables {
job_hashes,
job_hashes_by_id,
running_jobs_by_job_id,
running_jobs_set,
..
}: &mut JobsWorktables,
handles: &mut HashMap<JobId, JobHandle<OuterCtx, JobCtx>>,
job_return_status_tx: chan::Sender<(JobId, Result<ReturnStatus, Error>)>,
) {
) -> Result<(), JobSystemError> {
if let Some(next) = handle.next_jobs.pop_front() {
let next_id = next.id();
let next_hash = next.hash();
let next_name = next.job_name();
if let Entry::Vacant(e) = job_hashes.entry(next_hash) {
e.insert(next_id);
trace!(%next_id, %next_name, "Dispatching next job;");
job_hashes_by_id.insert(next_id, next_hash);
let mut next_handle =
next.dispatch(base_dispatcher, handle.ctx.clone(), job_return_status_tx);
running_jobs_by_job_id.insert(next_id, (next_name, location_id));
running_jobs_set.insert((next_name, location_id));
let mut next_handle = next.dispatch(
base_dispatcher,
handle.ctx.get_outer_ctx(),
job_return_status_tx,
);
next_handle.register_start(Utc::now()).await?;
assert!(
next_handle.next_jobs.is_empty(),
@ -410,30 +576,34 @@ fn try_dispatch_next_job<Ctx: OuterContext>(
handles.insert(next_id, next_handle);
} else {
warn!("Unexpectedly found a job with the same hash as the next job: <id='{next_id}', name='{}'>", next.job_name());
warn!(%next_id, %next_name, "Unexpectedly found a job with the same hash as the next job;");
}
} else {
trace!("No next jobs to dispatch");
}
Ok(())
}
pub(super) async fn run<Ctx: OuterContext>(
mut runner: JobSystemRunner<Ctx>,
pub(super) async fn run<OuterCtx: OuterContext, JobCtx: JobContext<OuterCtx>>(
mut runner: JobSystemRunner<OuterCtx, JobCtx>,
store_jobs_file: impl AsRef<Path> + Send,
msgs_rx: chan::Receiver<RunnerMessage<Ctx>>,
job_return_status_rx: chan::Receiver<(JobId, Result<ReturnStatus, Error>)>,
msgs_rx: chan::Receiver<RunnerMessage<OuterCtx, JobCtx>>,
job_done_rx: chan::Receiver<(JobId, Result<ReturnStatus, Error>)>,
) {
enum StreamMessage<Ctx: OuterContext> {
enum StreamMessage<OuterCtx: OuterContext, JobCtx: JobContext<OuterCtx>> {
ReturnStatus((JobId, Result<ReturnStatus, Error>)),
RunnerMessage(RunnerMessage<Ctx>),
RunnerMessage(RunnerMessage<OuterCtx, JobCtx>),
CleanMemoryTick,
}
let memory_cleanup_interval = interval_at(Instant::now() + FIVE_MINUTES, FIVE_MINUTES);
let job_return_status_rx_to_shutdown = job_return_status_rx.clone();
let job_return_status_rx_to_shutdown = job_done_rx.clone();
let mut msg_stream = pin!((
msgs_rx.map(StreamMessage::RunnerMessage),
job_return_status_rx.map(StreamMessage::ReturnStatus),
job_done_rx.map(StreamMessage::ReturnStatus),
IntervalStream::new(memory_cleanup_interval).map(|_| StreamMessage::CleanMemoryTick),
)
.merge());
@ -442,24 +612,41 @@ pub(super) async fn run<Ctx: OuterContext>(
match msg {
// Job return status messages
StreamMessage::ReturnStatus((job_id, status)) => {
runner.process_return_status(job_id, status).await;
if let Err(e) = runner.process_return_status(job_id, status).await {
error!(?e, "Failed to process return status;");
}
}
// Runner messages
StreamMessage::RunnerMessage(RunnerMessage::NewJob {
id,
job_id,
location_id,
dyn_job,
ctx,
ack_tx,
}) => {
ack_tx
.send(runner.new_job(id, location_id, dyn_job, ctx, None).await)
.send(
runner
.new_job(job_id, location_id, dyn_job, ctx, None)
.await,
)
.expect("ack channel closed before sending new job response");
}
StreamMessage::RunnerMessage(RunnerMessage::HasActiveJobs { ctx_id, ack_tx }) => {
ack_tx
.send(runner.has_active_jobs(ctx_id))
.expect("ack channel closed before sending has active jobs response");
}
StreamMessage::RunnerMessage(RunnerMessage::GetActiveReports { ack_tx }) => {
ack_tx
.send(runner.get_active_reports().await)
.expect("ack channel closed before sending active reports response");
}
StreamMessage::RunnerMessage(RunnerMessage::ResumeStoredJob {
id,
job_id,
location_id,
dyn_job,
ctx,
@ -469,60 +656,58 @@ pub(super) async fn run<Ctx: OuterContext>(
ack_tx
.send(
runner
.new_job(id, location_id, dyn_job, ctx, serialized_tasks)
.new_job(job_id, location_id, dyn_job, ctx, serialized_tasks)
.await,
)
.expect("ack channel closed before sending resume job response");
}
StreamMessage::RunnerMessage(RunnerMessage::Command {
id,
job_id: id,
command,
ack_tx,
}) => {
ack_tx
.send(runner.process_command(id, command).await)
.unwrap_or_else(|_| {
panic!("ack channel closed before sending {command:?} response")
});
}
}) => runner.process_command(id, command, ack_tx).await,
StreamMessage::RunnerMessage(RunnerMessage::Shutdown) => {
runner.on_shutdown_mode = true;
// Consuming all pending return status messages
loop {
while let Ok((job_id, status)) = job_return_status_rx_to_shutdown.try_recv() {
runner.process_return_status(job_id, status).await;
if !runner.is_empty() {
let mut job_return_status_stream = pin!(job_return_status_rx_to_shutdown);
runner.dispatch_shutdown_command_to_jobs().await;
debug!(
total_jobs = runner.total_jobs(),
"Waiting for jobs to shutdown before shutting down the job system...;",
);
while let Some((job_id, status)) = job_return_status_stream.next().await {
if let Err(e) = runner.process_return_status(job_id, status).await {
error!(?e, "Failed to process return status before shutting down;");
}
}
if runner.is_empty() {
break;
// Now the runner can shutdown
if let Err(e) = runner.save_jobs(store_jobs_file).await {
error!(?e, "Failed to save jobs before shutting down;");
}
debug!("Waiting for all jobs to complete before shutting down...");
}
// Now the runner can shutdown
if let Err(e) = runner.save_jobs(store_jobs_file).await {
error!("Failed to save jobs before shutting down: {e:#?}");
}
return;
}
StreamMessage::RunnerMessage(RunnerMessage::CheckIfJobAreRunning {
StreamMessage::RunnerMessage(RunnerMessage::CheckIfJobsAreRunning {
job_names,
location_id,
ack_tx,
}) => {
ack_tx
.send(runner.check_if_job_are_running(job_names, location_id))
.send(runner.check_if_jobs_are_running(job_names, location_id))
.expect("ack channel closed before sending resume job response");
}
// Memory cleanup tick
StreamMessage::CleanMemoryTick => {
runner.clean_memory();
}
StreamMessage::CleanMemoryTick => runner.clean_memory(),
}
}
}

View file

@ -1,4 +1,4 @@
use crate::{file_identifier, indexer, media_processor};
use crate::{file_identifier, indexer, media_processor, JobContext};
use sd_prisma::prisma::{job, location};
use sd_utils::uuid_to_bytes;
@ -8,6 +8,7 @@ use std::{
future::Future,
iter,
marker::PhantomData,
time::Duration,
};
use futures_concurrency::future::TryJoin;
@ -20,9 +21,11 @@ use super::{
};
#[derive(Debug, Serialize, Deserialize)]
#[repr(transparent)]
#[serde(transparent)]
pub struct SerializedTasks(pub Vec<u8>);
pub trait SerializableJob<Ctx: OuterContext>: 'static
pub trait SerializableJob<OuterCtx: OuterContext>: 'static
where
Self: Sized,
{
@ -35,7 +38,7 @@ where
#[allow(unused_variables)]
fn deserialize(
serialized_job: &[u8],
ctx: &Ctx,
ctx: &OuterCtx,
) -> impl Future<
Output = Result<Option<(Self, Option<SerializedTasks>)>, rmp_serde::decode::Error>,
> + Send {
@ -47,6 +50,7 @@ where
pub struct StoredJob {
pub(super) id: JobId,
pub(super) name: JobName,
pub(super) run_time: Duration,
pub(super) serialized_job: Vec<u8>,
}
@ -57,13 +61,13 @@ pub struct StoredJobEntry {
pub(super) next_jobs: Vec<StoredJob>,
}
pub async fn load_jobs<Ctx: OuterContext>(
pub async fn load_jobs<OuterCtx: OuterContext, JobCtx: JobContext<OuterCtx>>(
entries: Vec<StoredJobEntry>,
ctx: &Ctx,
ctx: &OuterCtx,
) -> Result<
Vec<(
location::id::Type,
Box<dyn DynJob<Ctx>>,
Box<dyn DynJob<OuterCtx, JobCtx>>,
Option<SerializedTasks>,
)>,
JobSystemError,
@ -81,7 +85,7 @@ pub async fn load_jobs<Ctx: OuterContext>(
..
}| { iter::once(*id).chain(next_jobs.iter().map(|StoredJob { id, .. }| *id)) },
)
.map(uuid_to_bytes)
.map(|job_id| uuid_to_bytes(&job_id))
.collect::<Vec<_>>(),
)])
.exec()
@ -166,50 +170,58 @@ pub async fn load_jobs<Ctx: OuterContext>(
}
macro_rules! match_deserialize_job {
($stored_job:ident, $report:ident, $ctx:ident, $ctx_type:ty, [$($job_type:ty),+ $(,)?]) => {{
($stored_job:ident, $report:ident, $outer_ctx:ident, $outer_ctx_type:ty, $job_ctx_type:ty, [$($job_type:ty),+ $(,)?]) => {{
let StoredJob {
id,
name,
run_time,
serialized_job,
} = $stored_job;
match name {
$(<$job_type as Job>::NAME => <$job_type as SerializableJob<$ctx_type>>::deserialize(
$(<$job_type as Job>::NAME => <$job_type as SerializableJob<$outer_ctx_type>>::deserialize(
&serialized_job,
$ctx,
$outer_ctx,
).await
.map(|maybe_job| maybe_job.map(|(job, tasks)| -> (
Box<dyn DynJob<$ctx_type>>,
.map(|maybe_job| maybe_job.map(|(job, maybe_tasks)| -> (
Box<dyn DynJob<$outer_ctx_type, $job_ctx_type>>,
Option<SerializedTasks>
) {
(
Box::new(JobHolder {
id,
job,
run_time,
report: $report,
next_jobs: VecDeque::new(),
_ctx: PhantomData,
}),
tasks,
maybe_tasks.and_then(
|tasks| (!tasks.0.is_empty()).then_some(tasks)
),
)
}
))
.map_err(Into::into),)+
// TODO(fogodev): this is temporary until we can get rid of the old job system
_ => unimplemented!("Job not implemented"),
}
}};
}
async fn load_job<Ctx: OuterContext>(
async fn load_job<OuterCtx: OuterContext, JobCtx: JobContext<OuterCtx>>(
stored_job: StoredJob,
report: Report,
ctx: &Ctx,
) -> Result<Option<(Box<dyn DynJob<Ctx>>, Option<SerializedTasks>)>, JobSystemError> {
ctx: &OuterCtx,
) -> Result<Option<(Box<dyn DynJob<OuterCtx, JobCtx>>, Option<SerializedTasks>)>, JobSystemError> {
match_deserialize_job!(
stored_job,
report,
ctx,
Ctx,
OuterCtx,
JobCtx,
[
indexer::job::Indexer,
file_identifier::job::FileIdentifier,

View file

@ -1,16 +1,35 @@
use crate::Error;
use sd_task_system::TaskHandle;
use sd_task_system::{TaskHandle, TaskStatus};
use futures::{stream::FuturesUnordered, StreamExt};
use futures_concurrency::future::Join;
use tracing::{error, trace};
pub async fn cancel_pending_tasks(
pending_tasks: impl IntoIterator<Item = &TaskHandle<Error>> + Send,
) {
pub async fn cancel_pending_tasks(pending_tasks: &mut FuturesUnordered<TaskHandle<Error>>) {
pending_tasks
.into_iter()
.iter()
.map(TaskHandle::cancel)
.collect::<Vec<_>>()
.join()
.await;
trace!(total_tasks = %pending_tasks.len(), "canceled all pending tasks, now waiting completion");
while let Some(task_result) = pending_tasks.next().await {
match task_result {
Ok(TaskStatus::Done((task_id, _))) => trace!(
%task_id,
"tasks cancellation received a completed task;",
),
Ok(TaskStatus::Canceled | TaskStatus::ForcedAbortion | TaskStatus::Shutdown(_)) => {
// Job canceled task
}
Ok(TaskStatus::Error(e)) => error!(%e, "job canceled an errored task;"),
Err(e) => error!(%e, "task system failed to cancel a task;"),
}
}
}

View file

@ -44,8 +44,12 @@ pub mod utils;
use media_processor::ThumbKey;
pub use job_system::{
job::{IntoJob, JobBuilder, JobName, JobOutput, JobOutputData, OuterContext, ProgressUpdate},
JobId, JobSystem,
job::{
IntoJob, JobContext, JobEnqueuer, JobName, JobOutput, JobOutputData, OuterContext,
ProgressUpdate,
},
report::Report,
JobId, JobSystem, JobSystemError,
};
#[derive(Error, Debug)]
@ -59,6 +63,9 @@ pub enum Error {
#[error(transparent)]
TaskSystem(#[from] TaskSystemError),
#[error(transparent)]
JobSystem(#[from] JobSystemError),
}
impl From<Error> for rspc::Error {
@ -70,19 +77,21 @@ impl From<Error> for rspc::Error {
Error::TaskSystem(e) => {
Self::with_cause(rspc::ErrorCode::InternalServerError, e.to_string(), e)
}
Error::JobSystem(e) => e.into(),
}
}
}
#[derive(thiserror::Error, Debug, Serialize, Deserialize, Type)]
#[derive(thiserror::Error, Debug, Serialize, Deserialize, Type, Clone)]
#[serde(rename_all = "snake_case")]
pub enum NonCriticalError {
// TODO: Add variants as needed
#[error(transparent)]
Indexer(#[from] indexer::NonCriticalError),
Indexer(#[from] indexer::NonCriticalIndexerError),
#[error(transparent)]
FileIdentifier(#[from] file_identifier::NonCriticalError),
FileIdentifier(#[from] file_identifier::NonCriticalFileIdentifierError),
#[error(transparent)]
MediaProcessor(#[from] media_processor::NonCriticalError),
MediaProcessor(#[from] media_processor::NonCriticalMediaProcessorError),
}
#[repr(i32)]
@ -96,7 +105,7 @@ pub enum LocationScanState {
#[derive(Debug, Serialize, Type)]
pub enum UpdateEvent {
NewThumbnailEvent {
NewThumbnail {
thumb_key: ThumbKey,
},
NewIdentifiedObjects {

View file

@ -1,12 +1,24 @@
use crate::media_processor::{self, media_data_extractor};
use sd_core_prisma_helpers::ObjectPubId;
use sd_core_sync::Manager as SyncManager;
use sd_file_ext::extensions::{Extension, ImageExtension, ALL_IMAGE_EXTENSIONS};
use sd_media_metadata::ExifMetadata;
use sd_prisma::prisma::{exif_data, object, PrismaClient};
use sd_prisma::{
prisma::{exif_data, object, PrismaClient},
prisma_sync,
};
use sd_sync::{option_sync_db_entry, OperationFactory};
use sd_utils::chain_optional_iter;
use std::path::Path;
use futures_concurrency::future::TryJoin;
use once_cell::sync::Lazy;
use prisma_client_rust::QueryError;
use super::from_slice_option_to_option;
pub static AVAILABLE_EXTENSIONS: Lazy<Vec<Extension>> = Lazy::new(|| {
ALL_IMAGE_EXTENSIONS
@ -17,6 +29,7 @@ pub static AVAILABLE_EXTENSIONS: Lazy<Vec<Extension>> = Lazy::new(|| {
.collect()
});
#[must_use]
pub const fn can_extract(image_extension: ImageExtension) -> bool {
use ImageExtension::{
Avci, Avcs, Avif, Dng, Heic, Heif, Heifs, Hif, Jpeg, Jpg, Png, Tiff, Webp,
@ -27,33 +40,62 @@ pub const fn can_extract(image_extension: ImageExtension) -> bool {
)
}
pub fn to_query(
mdi: ExifMetadata,
#[must_use]
fn to_query(
ExifMetadata {
resolution,
date_taken,
location,
camera_data,
artist,
description,
copyright,
exif_version,
}: ExifMetadata,
object_id: exif_data::object_id::Type,
) -> exif_data::CreateUnchecked {
exif_data::CreateUnchecked {
object_id,
_params: vec![
exif_data::camera_data::set(serde_json::to_vec(&mdi.camera_data).ok()),
exif_data::media_date::set(serde_json::to_vec(&mdi.date_taken).ok()),
exif_data::resolution::set(serde_json::to_vec(&mdi.resolution).ok()),
exif_data::media_location::set(serde_json::to_vec(&mdi.location).ok()),
exif_data::artist::set(mdi.artist),
exif_data::description::set(mdi.description),
exif_data::copyright::set(mdi.copyright),
exif_data::exif_version::set(mdi.exif_version),
exif_data::epoch_time::set(mdi.date_taken.map(|x| x.unix_timestamp())),
) -> (Vec<(&'static str, rmpv::Value)>, exif_data::Create) {
let (sync_params, db_params) = chain_optional_iter(
[],
[
option_sync_db_entry!(
serde_json::to_vec(&camera_data).ok(),
exif_data::camera_data
),
option_sync_db_entry!(serde_json::to_vec(&date_taken).ok(), exif_data::media_date),
option_sync_db_entry!(serde_json::to_vec(&resolution).ok(), exif_data::resolution),
option_sync_db_entry!(
serde_json::to_vec(&location).ok(),
exif_data::media_location
),
option_sync_db_entry!(artist, exif_data::artist),
option_sync_db_entry!(description, exif_data::description),
option_sync_db_entry!(copyright, exif_data::copyright),
option_sync_db_entry!(exif_version, exif_data::exif_version),
option_sync_db_entry!(
date_taken.map(|x| x.unix_timestamp()),
exif_data::epoch_time
),
],
}
)
.into_iter()
.unzip();
(
sync_params,
exif_data::Create {
object: object::id::equals(object_id),
_params: db_params,
},
)
}
pub async fn extract(
path: impl AsRef<Path> + Send,
) -> Result<Option<ExifMetadata>, media_processor::NonCriticalError> {
) -> Result<Option<ExifMetadata>, media_processor::NonCriticalMediaProcessorError> {
let path = path.as_ref();
ExifMetadata::from_path(&path).await.map_err(|e| {
media_data_extractor::NonCriticalError::FailedToExtractImageMediaData(
media_data_extractor::NonCriticalMediaDataExtractorError::FailedToExtractImageMediaData(
path.to_path_buf(),
e.to_string(),
)
@ -62,24 +104,62 @@ pub async fn extract(
}
pub async fn save(
media_datas: Vec<(ExifMetadata, object::id::Type)>,
exif_datas: impl IntoIterator<Item = (ExifMetadata, object::id::Type, ObjectPubId)> + Send,
db: &PrismaClient,
) -> Result<u64, media_processor::Error> {
db.exif_data()
.create_many(
media_datas
.into_iter()
.map(|(exif_data, object_id)| to_query(exif_data, object_id))
.collect(),
)
.skip_duplicates()
.exec()
.await
.map(|created| {
#[allow(clippy::cast_sign_loss)]
{
created as u64
}
sync: &SyncManager,
) -> Result<u64, QueryError> {
exif_datas
.into_iter()
.map(|(exif_data, object_id, object_pub_id)| async move {
let (sync_params, create) = to_query(exif_data, object_id);
let db_params = create._params.clone();
sync.write_ops(
db,
(
sync.shared_create(
prisma_sync::exif_data::SyncId {
object: prisma_sync::object::SyncId {
pub_id: object_pub_id.into(),
},
},
sync_params,
),
db.exif_data()
.upsert(exif_data::object_id::equals(object_id), create, db_params)
.select(exif_data::select!({ id })),
),
)
.await
})
.map_err(Into::into)
.collect::<Vec<_>>()
.try_join()
.await
.map(|created_vec| created_vec.len() as u64)
}
#[must_use]
pub fn from_prisma_data(
exif_data::Data {
resolution,
media_date,
media_location,
camera_data,
artist,
description,
copyright,
exif_version,
..
}: exif_data::Data,
) -> ExifMetadata {
ExifMetadata {
camera_data: from_slice_option_to_option(camera_data).unwrap_or_default(),
date_taken: from_slice_option_to_option(media_date).unwrap_or_default(),
resolution: from_slice_option_to_option(resolution).unwrap_or_default(),
location: from_slice_option_to_option(media_location),
artist,
description,
copyright,
exif_version,
}
}

View file

@ -1,5 +1,7 @@
use crate::media_processor::{self, media_data_extractor};
use sd_core_prisma_helpers::object_with_media_data;
use sd_file_ext::extensions::{
AudioExtension, Extension, VideoExtension, ALL_AUDIO_EXTENSIONS, ALL_VIDEO_EXTENSIONS,
};
@ -19,7 +21,10 @@ use sd_prisma::prisma::{
ffmpeg_data, ffmpeg_media_audio_props, ffmpeg_media_chapter, ffmpeg_media_codec,
ffmpeg_media_program, ffmpeg_media_stream, ffmpeg_media_video_props, object, PrismaClient,
};
use sd_utils::db::ffmpeg_data_field_to_db;
use sd_utils::{
db::{ffmpeg_data_field_from_db, ffmpeg_data_field_to_db},
i64_to_frontend,
};
use std::{collections::HashMap, path::Path};
@ -28,6 +33,8 @@ use once_cell::sync::Lazy;
use prisma_client_rust::QueryError;
use tracing::error;
use super::from_slice_option_to_option;
pub static AVAILABLE_EXTENSIONS: Lazy<Vec<Extension>> = Lazy::new(|| {
ALL_AUDIO_EXTENSIONS
.iter()
@ -44,6 +51,7 @@ pub static AVAILABLE_EXTENSIONS: Lazy<Vec<Extension>> = Lazy::new(|| {
.collect()
});
#[must_use]
pub const fn can_extract_for_audio(audio_extension: AudioExtension) -> bool {
use AudioExtension::{
Aac, Adts, Aif, Aiff, Amr, Aptx, Ast, Caf, Flac, Loas, M4a, Mid, Mp2, Mp3, Oga, Ogg, Opus,
@ -63,34 +71,35 @@ pub const fn can_extract_for_audio(audio_extension: AudioExtension) -> bool {
)
}
#[must_use]
pub const fn can_extract_for_video(video_extension: VideoExtension) -> bool {
use VideoExtension::{
Asf, Avi, Avifs, F4v, Flv, Hevc, M2ts, M2v, M4v, Mjpeg, Mkv, Mov, Mp4, Mpe, Mpeg, Mpg, Mts,
Mxf, Ogv, Qt, Swf, Ts, Vob, Webm, Wm, Wmv, Wtv, _3gp,
Asf, Avi, Avifs, F4v, Flv, Hevc, M2ts, M2v, M4v, Mjpeg, Mkv, Mov, Mp4, Mpe, Mpeg, Mpg, Mxf,
Ogv, Qt, Swf, Vob, Webm, Wm, Wmv, Wtv, _3gp,
};
matches!(
video_extension,
Avi | Avifs
| Qt | Mov | Swf
| Mjpeg | Ts | Mts
| Mpeg | Mxf | M2v
| Mpg | Mpe | M2ts
| Flv | Wm | _3gp
| M4v | Wmv | Asf
| Mp4 | Webm | Mkv
| Vob | Ogv | Wtv
| Hevc | F4v
| Mjpeg | Mpeg
| Mxf | M2v | Mpg
| Mpe | M2ts | Flv
| Wm | _3gp | M4v
| Wmv | Asf | Mp4
| Webm | Mkv | Vob
| Ogv | Wtv | Hevc
| F4v // | Ts | Mts TODO: Uncomment when we start using magic instead of extension
)
}
pub async fn extract(
path: impl AsRef<Path> + Send,
) -> Result<FFmpegMetadata, media_processor::NonCriticalError> {
) -> Result<FFmpegMetadata, media_processor::NonCriticalMediaProcessorError> {
let path = path.as_ref();
FFmpegMetadata::from_path(&path).await.map_err(|e| {
media_data_extractor::NonCriticalError::FailedToExtractImageMediaData(
media_data_extractor::NonCriticalMediaDataExtractorError::FailedToExtractImageMediaData(
path.to_path_buf(),
e.to_string(),
)
@ -101,7 +110,7 @@ pub async fn extract(
pub async fn save(
ffmpeg_datas: impl IntoIterator<Item = (FFmpegMetadata, object::id::Type)> + Send,
db: &PrismaClient,
) -> Result<u64, media_processor::Error> {
) -> Result<u64, QueryError> {
ffmpeg_datas
.into_iter()
.map(
@ -180,9 +189,9 @@ async fn create_ffmpeg_data(
)),
ffmpeg_data::metadata::set(
serde_json::to_vec(&metadata)
.map_err(|err| {
error!("Error reading FFmpegData metadata: {err:#?}");
err
.map_err(|e| {
error!(?e, "Error reading FFmpegData metadata;");
e
})
.ok(),
),
@ -224,9 +233,9 @@ async fn create_ffmpeg_chapters(
ffmpeg_data_id,
_params: vec![ffmpeg_media_chapter::metadata::set(
serde_json::to_vec(&metadata)
.map_err(|err| {
error!("Error reading FFmpegMediaChapter metadata: {err:#?}");
err
.map_err(|e| {
error!(?e, "Error reading FFmpegMediaChapter metadata;");
e
})
.ok(),
)],
@ -244,37 +253,36 @@ async fn create_ffmpeg_programs(
programs: Vec<Program>,
db: &PrismaClient,
) -> Result<Vec<(ffmpeg_media_program::program_id::Type, Vec<Stream>)>, QueryError> {
let (creates, streams_by_program_id) =
programs
.into_iter()
.map(
|Program {
id: program_id,
name,
metadata,
streams,
}| {
(
ffmpeg_media_program::CreateUnchecked {
program_id,
ffmpeg_data_id: data_id,
_params: vec![
ffmpeg_media_program::name::set(name),
ffmpeg_media_program::metadata::set(
serde_json::to_vec(&metadata)
.map_err(|err| {
error!("Error reading FFmpegMediaProgram metadata: {err:#?}");
err
})
.ok(),
),
],
},
(program_id, streams),
)
},
)
.unzip::<_, _, Vec<_>, Vec<_>>();
let (creates, streams_by_program_id) = programs
.into_iter()
.map(
|Program {
id: program_id,
name,
metadata,
streams,
}| {
(
ffmpeg_media_program::CreateUnchecked {
program_id,
ffmpeg_data_id: data_id,
_params: vec![
ffmpeg_media_program::name::set(name),
ffmpeg_media_program::metadata::set(
serde_json::to_vec(&metadata)
.map_err(|e| {
error!(?e, "Error reading FFmpegMediaProgram metadata;");
e
})
.ok(),
),
],
},
(program_id, streams),
)
},
)
.unzip::<_, _, Vec<_>, Vec<_>>();
db.ffmpeg_media_program()
.create_many(creates)
@ -333,9 +341,9 @@ async fn create_ffmpeg_streams(
ffmpeg_media_stream::language::set(metadata.language.clone()),
ffmpeg_media_stream::metadata::set(
serde_json::to_vec(&metadata)
.map_err(|err| {
error!("Error reading FFmpegMediaStream metadata: {err:#?}");
err
.map_err(|e| {
error!(?e, "Error reading FFmpegMediaStream metadata;");
e
})
.ok(),
),
@ -570,3 +578,207 @@ async fn create_ffmpeg_video_props(
.await
.map(|_| ())
}
pub fn from_prisma_data(
object_with_media_data::ffmpeg_data::Data {
formats,
duration,
start_time,
bit_rate,
metadata,
chapters,
programs,
..
}: object_with_media_data::ffmpeg_data::Data,
) -> FFmpegMetadata {
FFmpegMetadata {
formats: formats.split(',').map(String::from).collect::<Vec<_>>(),
duration: duration.map(|duration| i64_to_frontend(ffmpeg_data_field_from_db(&duration))),
start_time: start_time
.map(|start_time| i64_to_frontend(ffmpeg_data_field_from_db(&start_time))),
bit_rate: i64_to_frontend(ffmpeg_data_field_from_db(&bit_rate)),
chapters: chapters_from_prisma_data(chapters),
programs: programs_from_prisma_data(programs),
metadata: from_slice_option_to_option(metadata).unwrap_or_default(),
}
}
#[inline]
fn chapters_from_prisma_data(chapters: Vec<ffmpeg_media_chapter::Data>) -> Vec<Chapter> {
chapters
.into_iter()
.map(
|ffmpeg_media_chapter::Data {
chapter_id,
start,
end,
time_base_den,
time_base_num,
metadata,
..
}| Chapter {
id: chapter_id,
start: i64_to_frontend(ffmpeg_data_field_from_db(&start)),
end: i64_to_frontend(ffmpeg_data_field_from_db(&end)),
time_base_den,
time_base_num,
metadata: from_slice_option_to_option(metadata).unwrap_or_default(),
},
)
.collect()
}
#[inline]
fn programs_from_prisma_data(
programs: Vec<object_with_media_data::ffmpeg_data::programs::Data>,
) -> Vec<Program> {
programs
.into_iter()
.map(
|object_with_media_data::ffmpeg_data::programs::Data {
program_id,
name,
metadata,
streams,
..
}| Program {
id: program_id,
name,
streams: streams_from_prisma_data(streams),
metadata: from_slice_option_to_option(metadata).unwrap_or_default(),
},
)
.collect()
}
fn streams_from_prisma_data(
streams: Vec<object_with_media_data::ffmpeg_data::programs::streams::Data>,
) -> Vec<Stream> {
streams
.into_iter()
.map(
|object_with_media_data::ffmpeg_data::programs::streams::Data {
stream_id,
name,
aspect_ratio_num,
aspect_ratio_den,
frames_per_second_num,
frames_per_second_den,
time_base_real_den,
time_base_real_num,
dispositions,
metadata,
codec,
..
}| {
Stream {
id: stream_id,
name,
codec: codec_from_prisma_data(codec),
aspect_ratio_num,
aspect_ratio_den,
frames_per_second_num,
frames_per_second_den,
time_base_real_den,
time_base_real_num,
dispositions: dispositions
.map(|dispositions| {
dispositions
.split(',')
.map(String::from)
.collect::<Vec<_>>()
})
.unwrap_or_default(),
metadata: from_slice_option_to_option(metadata).unwrap_or_default(),
}
},
)
.collect()
}
fn codec_from_prisma_data(
codec: Option<object_with_media_data::ffmpeg_data::programs::streams::codec::Data>,
) -> Option<Codec> {
codec.map(
|object_with_media_data::ffmpeg_data::programs::streams::codec::Data {
kind,
sub_kind,
tag,
name,
profile,
bit_rate,
audio_props,
video_props,
..
}| Codec {
kind,
sub_kind,
tag,
name,
profile,
bit_rate,
props: match (audio_props, video_props) {
(
Some(ffmpeg_media_audio_props::Data {
delay,
padding,
sample_rate,
sample_format,
bit_per_sample,
channel_layout,
..
}),
None,
) => Some(Props::Audio(AudioProps {
delay,
padding,
sample_rate,
sample_format,
bit_per_sample,
channel_layout,
})),
(
None,
Some(ffmpeg_media_video_props::Data {
pixel_format,
color_range,
bits_per_channel,
color_space,
color_primaries,
color_transfer,
field_order,
chroma_location,
width,
height,
aspect_ratio_num,
aspect_ratio_den,
properties,
..
}),
) => Some(Props::Video(VideoProps {
pixel_format,
color_range,
bits_per_channel,
color_space,
color_primaries,
color_transfer,
field_order,
chroma_location,
width,
height,
aspect_ratio_num,
aspect_ratio_den,
properties: properties
.map(|dispositions| {
dispositions
.split(',')
.map(String::from)
.collect::<Vec<_>>()
})
.unwrap_or_default(),
})),
_ => None,
},
},
)
}

View file

@ -1,3 +1,12 @@
pub mod exif_media_data;
pub mod ffmpeg_media_data;
pub mod thumbnailer;
#[must_use]
fn from_slice_option_to_option<T: serde::Serialize + serde::de::DeserializeOwned>(
value: Option<Vec<u8>>,
) -> Option<T> {
value
.map(|x| serde_json::from_slice(&x).ok())
.unwrap_or_default()
}

View file

@ -1,16 +1,37 @@
use once_cell::sync::Lazy;
use crate::media_processor::thumbnailer;
use sd_core_prisma_helpers::CasId;
use sd_file_ext::extensions::{
DocumentExtension, Extension, ImageExtension, ALL_DOCUMENT_EXTENSIONS, ALL_IMAGE_EXTENSIONS,
};
use sd_images::{format_image, scale_dimensions, ConvertibleExtension};
use sd_media_metadata::exif::Orientation;
use sd_utils::error::FileIOError;
#[cfg(feature = "ffmpeg")]
use sd_file_ext::extensions::{VideoExtension, ALL_VIDEO_EXTENSIONS};
use std::time::Duration;
use std::{
ops::Deref,
path::{Path, PathBuf},
str::FromStr,
time::Duration,
};
use image::{imageops, DynamicImage, GenericImageView};
use once_cell::sync::Lazy;
use serde::{Deserialize, Serialize};
use specta::Type;
use tokio::{
fs, io,
sync::{oneshot, Mutex},
task::spawn_blocking,
time::{sleep, Instant},
};
use tracing::{error, instrument, trace};
use uuid::Uuid;
use webp::Encoder;
// Files names constants
pub const THUMBNAIL_CACHE_DIR_NAME: &str = "thumbnails";
@ -25,8 +46,12 @@ pub const TARGET_PX: f32 = 1_048_576.0; // 1024x1024
/// and is treated as a percentage (so 60% in this case, or it's the same as multiplying by `0.6`).
pub const TARGET_QUALITY: f32 = 60.0;
/// How much time we allow for the thumbnail generation process to complete before we give up.
pub const THUMBNAIL_GENERATION_TIMEOUT: Duration = Duration::from_secs(60);
/// How much time we allow for the thumbnailer task to complete before we give up.
pub const THUMBNAILER_TASK_TIMEOUT: Duration = Duration::from_secs(60 * 5);
pub fn get_thumbnails_directory(data_directory: impl AsRef<Path>) -> PathBuf {
data_directory.as_ref().join(THUMBNAIL_CACHE_DIR_NAME)
}
#[cfg(feature = "ffmpeg")]
pub static THUMBNAILABLE_VIDEO_EXTENSIONS: Lazy<Vec<Extension>> = Lazy::new(|| {
@ -68,25 +93,43 @@ pub static ALL_THUMBNAILABLE_EXTENSIONS: Lazy<Vec<Extension>> = Lazy::new(|| {
/// This type is used to pass the relevant data to the frontend so it can request the thumbnail.
/// Tt supports extending the shard hex to support deeper directory structures in the future
#[derive(Debug, Serialize, Deserialize, Type)]
#[derive(Debug, Serialize, Deserialize, Type, Clone)]
pub struct ThumbKey {
pub shard_hex: String,
pub cas_id: String,
pub cas_id: CasId<'static>,
pub base_directory_str: String,
}
impl ThumbKey {
#[must_use]
pub fn new(cas_id: &str, kind: &ThumbnailKind) -> Self {
pub fn new(cas_id: CasId<'static>, kind: &ThumbnailKind) -> Self {
Self {
shard_hex: get_shard_hex(cas_id).to_string(),
cas_id: cas_id.to_string(),
shard_hex: get_shard_hex(&cas_id).to_string(),
cas_id,
base_directory_str: match kind {
ThumbnailKind::Ephemeral => String::from(EPHEMERAL_DIR),
ThumbnailKind::Indexed(library_id) => library_id.to_string(),
},
}
}
#[must_use]
pub fn new_indexed(cas_id: CasId<'static>, library_id: Uuid) -> Self {
Self {
shard_hex: get_shard_hex(&cas_id).to_string(),
cas_id,
base_directory_str: library_id.to_string(),
}
}
#[must_use]
pub fn new_ephemeral(cas_id: CasId<'static>) -> Self {
Self {
shard_hex: get_shard_hex(&cas_id).to_string(),
cas_id,
base_directory_str: String::from(EPHEMERAL_DIR),
}
}
}
#[derive(Debug, Serialize, Deserialize, Type, Clone, Copy)]
@ -95,6 +138,41 @@ pub enum ThumbnailKind {
Indexed(Uuid),
}
impl ThumbnailKind {
pub fn compute_path(&self, data_directory: impl AsRef<Path>, cas_id: &CasId<'_>) -> PathBuf {
let mut thumb_path = get_thumbnails_directory(data_directory);
match self {
Self::Ephemeral => thumb_path.push(EPHEMERAL_DIR),
Self::Indexed(library_id) => {
thumb_path.push(library_id.to_string());
}
}
thumb_path.push(get_shard_hex(cas_id));
thumb_path.push(cas_id.as_str());
thumb_path.set_extension(WEBP_EXTENSION);
thumb_path
}
}
#[derive(Debug, Serialize, Deserialize)]
pub struct GenerateThumbnailArgs<'cas_id> {
pub extension: String,
pub cas_id: CasId<'cas_id>,
pub path: PathBuf,
}
impl<'cas_id> GenerateThumbnailArgs<'cas_id> {
#[must_use]
pub const fn new(extension: String, cas_id: CasId<'cas_id>, path: PathBuf) -> Self {
Self {
extension,
cas_id,
path,
}
}
}
/// The practice of dividing files into hex coded folders, often called "sharding,"
/// is mainly used to optimize file system performance. File systems can start to slow down
/// as the number of files in a directory increases. Thus, it's often beneficial to split
@ -105,18 +183,21 @@ pub enum ThumbnailKind {
/// three characters of a the hash, this will give us 4096 (16^3) possible directories,
/// named 000 to fff.
#[inline]
pub fn get_shard_hex(cas_id: &str) -> &str {
#[must_use]
pub fn get_shard_hex<'cas_id>(cas_id: &'cas_id CasId<'cas_id>) -> &'cas_id str {
// Use the first three characters of the hash as the directory name
&cas_id[0..3]
&cas_id.as_str()[0..3]
}
#[cfg(feature = "ffmpeg")]
#[must_use]
pub const fn can_generate_thumbnail_for_video(video_extension: VideoExtension) -> bool {
use VideoExtension::{Hevc, M2ts, M2v, Mpg, Mts, Swf, Ts};
// File extensions that are specifically not supported by the thumbnailer
!matches!(video_extension, Mpg | Swf | M2v | Hevc | M2ts | Mts | Ts)
}
#[must_use]
pub const fn can_generate_thumbnail_for_image(image_extension: ImageExtension) -> bool {
use ImageExtension::{
Avif, Bmp, Gif, Heic, Heics, Heif, Heifs, Ico, Jpeg, Jpg, Png, Svg, Webp,
@ -128,8 +209,291 @@ pub const fn can_generate_thumbnail_for_image(image_extension: ImageExtension) -
)
}
#[must_use]
pub const fn can_generate_thumbnail_for_document(document_extension: DocumentExtension) -> bool {
use DocumentExtension::Pdf;
matches!(document_extension, Pdf)
}
#[derive(Debug)]
pub enum GenerationStatus {
Generated,
Skipped,
}
#[instrument(skip(thumbnails_directory, cas_id, should_regenerate, kind))]
pub async fn generate_thumbnail(
thumbnails_directory: &Path,
GenerateThumbnailArgs {
extension,
cas_id,
path,
}: &GenerateThumbnailArgs<'_>,
kind: &ThumbnailKind,
should_regenerate: bool,
) -> (
Duration,
Result<(ThumbKey, GenerationStatus), thumbnailer::NonCriticalThumbnailerError>,
) {
trace!("Generating thumbnail");
let start = Instant::now();
let mut output_path = match kind {
ThumbnailKind::Ephemeral => thumbnails_directory.join(EPHEMERAL_DIR),
ThumbnailKind::Indexed(library_id) => thumbnails_directory.join(library_id.to_string()),
};
output_path.push(get_shard_hex(cas_id));
output_path.push(cas_id.as_str());
output_path.set_extension(WEBP_EXTENSION);
if let Err(e) = fs::metadata(&*output_path).await {
if e.kind() != io::ErrorKind::NotFound {
error!(
?e,
"Failed to check if thumbnail exists, but we will try to generate it anyway;"
);
}
// Otherwise we good, thumbnail doesn't exist so we can generate it
} else if !should_regenerate {
trace!("Skipping thumbnail generation because it already exists");
return (
start.elapsed(),
Ok((
ThumbKey::new(cas_id.to_owned(), kind),
GenerationStatus::Skipped,
)),
);
}
if let Ok(extension) = ImageExtension::from_str(extension) {
if can_generate_thumbnail_for_image(extension) {
trace!("Generating image thumbnail");
if let Err(e) = generate_image_thumbnail(&path, &output_path).await {
return (start.elapsed(), Err(e));
}
trace!("Generated image thumbnail");
}
} else if let Ok(extension) = DocumentExtension::from_str(extension) {
if can_generate_thumbnail_for_document(extension) {
trace!("Generating document thumbnail");
if let Err(e) = generate_image_thumbnail(&path, &output_path).await {
return (start.elapsed(), Err(e));
}
trace!("Generating document thumbnail");
}
}
#[cfg(feature = "ffmpeg")]
{
use crate::media_processor::helpers::thumbnailer::can_generate_thumbnail_for_video;
use sd_file_ext::extensions::VideoExtension;
if let Ok(extension) = VideoExtension::from_str(extension) {
if can_generate_thumbnail_for_video(extension) {
trace!("Generating video thumbnail");
if let Err(e) = generate_video_thumbnail(&path, &output_path).await {
return (start.elapsed(), Err(e));
}
trace!("Generated video thumbnail");
}
}
}
trace!("Generated thumbnail");
(
start.elapsed(),
Ok((
ThumbKey::new(cas_id.to_owned(), kind),
GenerationStatus::Generated,
)),
)
}
fn inner_generate_image_thumbnail(
file_path: PathBuf,
) -> Result<Vec<u8>, thumbnailer::NonCriticalThumbnailerError> {
let mut img = format_image(&file_path).map_err(|e| {
thumbnailer::NonCriticalThumbnailerError::FormatImage(file_path.clone(), e.to_string())
})?;
let (w, h) = img.dimensions();
#[allow(clippy::cast_precision_loss)]
let (w_scaled, h_scaled) = scale_dimensions(w as f32, h as f32, TARGET_PX);
// Optionally, resize the existing photo and convert back into DynamicImage
if w != w_scaled && h != h_scaled {
img = DynamicImage::ImageRgba8(imageops::resize(
&img,
w_scaled,
h_scaled,
imageops::FilterType::Triangle,
));
}
// this corrects the rotation/flip of the image based on the *available* exif data
// not all images have exif data, so we don't error. we also don't rotate HEIF as that's against the spec
if let Some(orientation) = Orientation::from_path(&file_path) {
if ConvertibleExtension::try_from(file_path.as_ref())
.expect("we already checked if the image was convertible")
.should_rotate()
{
img = orientation.correct_thumbnail(img);
}
}
// Create the WebP encoder for the above image
let encoder = Encoder::from_image(&img).map_err(|reason| {
thumbnailer::NonCriticalThumbnailerError::WebPEncoding(file_path, reason.to_string())
})?;
// Type `WebPMemory` is !Send, which makes the `Future` in this function `!Send`,
// this make us `deref` to have a `&[u8]` and then `to_owned` to make a `Vec<u8>`
// which implies on a unwanted clone...
Ok(encoder.encode(TARGET_QUALITY).deref().to_owned())
}
#[instrument(
skip_all,
fields(
input_path = %file_path.as_ref().display(),
output_path = %output_path.as_ref().display()
)
)]
async fn generate_image_thumbnail(
file_path: impl AsRef<Path> + Send,
output_path: impl AsRef<Path> + Send,
) -> Result<(), thumbnailer::NonCriticalThumbnailerError> {
let file_path = file_path.as_ref().to_path_buf();
let (tx, rx) = oneshot::channel();
// Using channel instead of waiting the JoinHandle as for some reason
// the JoinHandle can take some extra time to complete
let handle = spawn_blocking({
let file_path = file_path.clone();
move || {
// Handling error on receiver side
let _ = tx.send(inner_generate_image_thumbnail(file_path));
}
});
let webp = if let Ok(res) = rx.await {
res?
} else {
error!("Failed to generate thumbnail");
return Err(
thumbnailer::NonCriticalThumbnailerError::PanicWhileGeneratingThumbnail(
file_path,
handle
.await
.expect_err("as the channel was closed, then the spawned task panicked")
.to_string(),
),
);
};
trace!("Generated thumbnail bytes");
let output_path = output_path.as_ref();
if let Some(shard_dir) = output_path.parent() {
fs::create_dir_all(shard_dir).await.map_err(|e| {
thumbnailer::NonCriticalThumbnailerError::CreateShardDirectory(
FileIOError::from((shard_dir, e)).to_string(),
)
})?;
} else {
error!("Failed to get parent directory for sharding parent directory");
}
trace!("Created shard directory and writing it to disk");
let res = fs::write(output_path, &webp).await.map_err(|e| {
thumbnailer::NonCriticalThumbnailerError::SaveThumbnail(
file_path,
FileIOError::from((output_path, e)).to_string(),
)
});
trace!("Wrote thumbnail to disk");
res
}
#[instrument(
skip_all,
fields(
input_path = %file_path.as_ref().display(),
output_path = %output_path.as_ref().display()
)
)]
#[cfg(feature = "ffmpeg")]
async fn generate_video_thumbnail(
file_path: impl AsRef<Path> + Send,
output_path: impl AsRef<Path> + Send,
) -> Result<(), thumbnailer::NonCriticalThumbnailerError> {
use sd_ffmpeg::{to_thumbnail, ThumbnailSize};
let file_path = file_path.as_ref();
to_thumbnail(
file_path,
output_path,
ThumbnailSize::Scale(1024),
TARGET_QUALITY,
)
.await
.map_err(|e| {
thumbnailer::NonCriticalThumbnailerError::VideoThumbnailGenerationFailed(
file_path.to_path_buf(),
e.to_string(),
)
})
}
const HALF_SEC: Duration = Duration::from_millis(500);
static LAST_SINGLE_THUMB_GENERATED_LOCK: Lazy<Mutex<Instant>> =
Lazy::new(|| Mutex::new(Instant::now()));
/// WARNING!!!! DON'T USE THIS FUNCTION IN A LOOP!!!!!!!!!!!!! It will be pretty slow on purpose!
pub async fn generate_single_thumbnail(
thumbnails_directory: impl AsRef<Path> + Send,
extension: String,
cas_id: CasId<'static>,
path: impl AsRef<Path> + Send,
kind: ThumbnailKind,
) -> Result<(), thumbnailer::NonCriticalThumbnailerError> {
let mut last_single_thumb_generated_guard = LAST_SINGLE_THUMB_GENERATED_LOCK.lock().await;
let elapsed = Instant::now() - *last_single_thumb_generated_guard;
if elapsed < HALF_SEC {
// This will choke up in case someone try to use this method in a loop, otherwise
// it will consume all the machine resources like a gluton monster from hell
sleep(HALF_SEC - elapsed).await;
}
let (_duration, res) = generate_thumbnail(
thumbnails_directory.as_ref(),
&GenerateThumbnailArgs {
extension,
cas_id,
path: path.as_ref().to_path_buf(),
},
&kind,
false,
)
.await;
let (_thumb_key, status) = res?;
if matches!(status, GenerationStatus::Generated) {
*last_single_thumb_generated_guard = Instant::now();
drop(last_single_thumb_generated_guard); // Clippy was weirdly complaining about not doing an "early" drop here
}
Ok(())
}

File diff suppressed because it is too large Load diff

View file

@ -1,11 +1,15 @@
use crate::{utils::sub_path, OuterContext, UpdateEvent};
use sd_core_file_path_helper::FilePathError;
use sd_core_file_path_helper::{FilePathError, IsolatedFilePathData};
use sd_core_prisma_helpers::file_path_for_media_processor;
use sd_file_ext::extensions::Extension;
use sd_prisma::prisma::{file_path, object, PrismaClient};
use sd_utils::db::MissingFieldError;
use std::fmt;
use std::{collections::HashMap, fmt};
use prisma_client_rust::{raw, PrismaValue};
use serde::{Deserialize, Serialize};
use specta::Type;
@ -19,10 +23,22 @@ pub use tasks::{
thumbnailer::{self, Thumbnailer},
};
pub use helpers::thumbnailer::{ThumbKey, ThumbnailKind};
pub use helpers::{
exif_media_data, ffmpeg_media_data,
thumbnailer::{
can_generate_thumbnail_for_document, can_generate_thumbnail_for_image,
generate_single_thumbnail, get_shard_hex, get_thumbnails_directory, GenerateThumbnailArgs,
ThumbKey, ThumbnailKind, WEBP_EXTENSION,
},
};
#[cfg(feature = "ffmpeg")]
pub use helpers::thumbnailer::can_generate_thumbnail_for_video;
pub use shallow::shallow;
use self::thumbnailer::NewThumbnailReporter;
use media_data_extractor::NonCriticalMediaDataExtractorError;
use thumbnailer::{NewThumbnailReporter, NonCriticalThumbnailerError};
const BATCH_SIZE: usize = 10;
@ -43,31 +59,126 @@ pub enum Error {
impl From<Error> for rspc::Error {
fn from(e: Error) -> Self {
Self::with_cause(rspc::ErrorCode::InternalServerError, e.to_string(), e)
match e {
Error::SubPath(sub_path_err) => sub_path_err.into(),
_ => Self::with_cause(rspc::ErrorCode::InternalServerError, e.to_string(), e),
}
}
}
#[derive(thiserror::Error, Debug, Serialize, Deserialize, Type)]
pub enum NonCriticalError {
#[derive(thiserror::Error, Debug, Serialize, Deserialize, Type, Clone)]
#[serde(rename_all = "snake_case")]
pub enum NonCriticalMediaProcessorError {
#[error(transparent)]
MediaDataExtractor(#[from] media_data_extractor::NonCriticalError),
MediaDataExtractor(#[from] NonCriticalMediaDataExtractorError),
#[error(transparent)]
Thumbnailer(#[from] thumbnailer::NonCriticalError),
Thumbnailer(#[from] NonCriticalThumbnailerError),
}
struct NewThumbnailsReporter<Ctx: OuterContext> {
ctx: Ctx,
#[derive(Clone)]
pub struct NewThumbnailsReporter<OuterCtx: OuterContext> {
pub ctx: OuterCtx,
}
impl<Ctx: OuterContext> fmt::Debug for NewThumbnailsReporter<Ctx> {
impl<OuterCtx: OuterContext> fmt::Debug for NewThumbnailsReporter<OuterCtx> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("NewThumbnailsReporter").finish()
}
}
impl<Ctx: OuterContext> NewThumbnailReporter for NewThumbnailsReporter<Ctx> {
impl<OuterCtx: OuterContext> NewThumbnailReporter for NewThumbnailsReporter<OuterCtx> {
fn new_thumbnail(&self, thumb_key: ThumbKey) {
self.ctx
.report_update(UpdateEvent::NewThumbnailEvent { thumb_key });
.report_update(UpdateEvent::NewThumbnail { thumb_key });
}
}
#[derive(Deserialize)]
struct RawFilePathForMediaProcessor {
id: file_path::id::Type,
materialized_path: file_path::materialized_path::Type,
is_dir: file_path::is_dir::Type,
name: file_path::name::Type,
extension: file_path::extension::Type,
cas_id: file_path::cas_id::Type,
object_id: object::id::Type,
object_pub_id: object::pub_id::Type,
}
impl From<RawFilePathForMediaProcessor> for file_path_for_media_processor::Data {
fn from(
RawFilePathForMediaProcessor {
id,
materialized_path,
is_dir,
name,
extension,
cas_id,
object_id,
object_pub_id,
}: RawFilePathForMediaProcessor,
) -> Self {
Self {
id,
materialized_path,
is_dir,
name,
extension,
cas_id,
object: Some(file_path_for_media_processor::object::Data {
id: object_id,
pub_id: object_pub_id,
}),
}
}
}
async fn get_direct_children_files_by_extensions(
parent_iso_file_path: &IsolatedFilePathData<'_>,
extensions: &[Extension],
db: &PrismaClient,
) -> Result<Vec<file_path_for_media_processor::Data>, Error> {
// FIXME: Had to use format! macro because PCR doesn't support IN with Vec for SQLite
// We have no data coming from the user, so this is sql injection safe
let unique_by_object_id = db
._query_raw::<RawFilePathForMediaProcessor>(raw!(
&format!(
"SELECT
file_path.id,
file_path.materialized_path,
file_path.is_dir,
file_path.name,
file_path.extension,
file_path.cas_id,
object.id as 'object_id',
object.pub_id as 'object_pub_id'
FROM file_path
INNER JOIN object ON object.id = file_path.object_id
WHERE
location_id={{}}
AND cas_id IS NOT NULL
AND LOWER(extension) IN ({})
AND materialized_path = {{}}
ORDER BY name ASC",
extensions
.iter()
.map(|ext| format!("LOWER('{ext}')"))
.collect::<Vec<_>>()
.join(",")
),
PrismaValue::Int(parent_iso_file_path.location_id()),
PrismaValue::String(
parent_iso_file_path
.materialized_path_for_children()
.expect("sub path iso_file_path must be a directory")
)
))
.exec()
.await?
.into_iter()
.map(|raw_file_path| (raw_file_path.object_id, raw_file_path))
.collect::<HashMap<_, _>>();
Ok(unique_by_object_id.into_values().map(Into::into).collect())
}

View file

@ -4,9 +4,8 @@ use crate::{
};
use sd_core_file_path_helper::IsolatedFilePathData;
use sd_core_prisma_helpers::file_path_for_media_processor;
use sd_core_sync::Manager as SyncManager;
use sd_file_ext::extensions::Extension;
use sd_prisma::prisma::{location, PrismaClient};
use sd_task_system::{
BaseTaskDispatcher, CancelTaskOnDrop, IntoTask, TaskDispatcher, TaskHandle, TaskOutput,
@ -19,15 +18,18 @@ use std::{
sync::Arc,
};
use futures::StreamExt;
use futures_concurrency::future::{FutureGroup, TryJoin};
use futures::{stream::FuturesUnordered, StreamExt};
use futures_concurrency::future::TryJoin;
use itertools::Itertools;
use prisma_client_rust::{raw, PrismaValue};
use tracing::{debug, warn};
use super::{
get_direct_children_files_by_extensions,
helpers::{self, exif_media_data, ffmpeg_media_data, thumbnailer::THUMBNAIL_CACHE_DIR_NAME},
tasks::{self, media_data_extractor, thumbnailer},
tasks::{
self, media_data_extractor,
thumbnailer::{self, NewThumbnailReporter},
},
NewThumbnailsReporter, BATCH_SIZE,
};
@ -35,8 +37,8 @@ use super::{
pub async fn shallow(
location: location::Data,
sub_path: impl AsRef<Path> + Send,
dispatcher: BaseTaskDispatcher<Error>,
ctx: impl OuterContext,
dispatcher: &BaseTaskDispatcher<Error>,
ctx: &impl OuterContext,
) -> Result<Vec<NonCriticalError>, Error> {
let sub_path = sub_path.as_ref();
@ -47,14 +49,13 @@ pub async fn shallow(
let location = Arc::new(location);
let sub_iso_file_path = maybe_get_iso_file_path_from_sub_path(
let sub_iso_file_path = maybe_get_iso_file_path_from_sub_path::<media_processor::Error>(
location.id,
&Some(sub_path),
Some(sub_path),
&*location_path,
ctx.db(),
)
.await
.map_err(media_processor::Error::from)?
.await?
.map_or_else(
|| {
IsolatedFilePathData::new(location.id, &*location_path, &*location_path, true)
@ -65,37 +66,70 @@ pub async fn shallow(
let mut errors = vec![];
let mut futures = dispatch_media_data_extractor_tasks(
let media_data_extraction_tasks = dispatch_media_data_extractor_tasks(
ctx.db(),
ctx.sync(),
&sub_iso_file_path,
&location_path,
&dispatcher,
dispatcher,
)
.await?
.into_iter()
.map(CancelTaskOnDrop)
.chain(
dispatch_thumbnailer_tasks(&sub_iso_file_path, false, &location_path, &dispatcher, &ctx)
.await?
.into_iter()
.map(CancelTaskOnDrop),
)
.collect::<FutureGroup<_>>();
.await?;
let total_media_data_extraction_tasks = media_data_extraction_tasks.len();
let thumbnailer_tasks =
dispatch_thumbnailer_tasks(&sub_iso_file_path, false, &location_path, dispatcher, ctx)
.await?;
let total_thumbnailer_tasks = thumbnailer_tasks.len();
let mut futures = media_data_extraction_tasks
.into_iter()
.chain(thumbnailer_tasks.into_iter())
.map(CancelTaskOnDrop::new)
.collect::<FuturesUnordered<_>>();
let mut completed_media_data_extraction_tasks = 0;
let mut completed_thumbnailer_tasks = 0;
while let Some(res) = futures.next().await {
match res {
Ok(TaskStatus::Done((_, TaskOutput::Out(out)))) => {
if out.is::<media_data_extractor::Output>() {
errors.extend(
out.downcast::<media_data_extractor::Output>()
.expect("just checked")
.errors,
let media_data_extractor::Output {
db_read_time,
filtering_time,
extraction_time,
db_write_time,
errors: new_errors,
..
} = *out
.downcast::<media_data_extractor::Output>()
.expect("just checked");
errors.extend(new_errors);
completed_media_data_extraction_tasks += 1;
debug!(
"Media data extraction task ({completed_media_data_extraction_tasks}/\
{total_media_data_extraction_tasks}) completed in {:?};",
db_read_time + filtering_time + extraction_time + db_write_time
);
} else if out.is::<thumbnailer::Output>() {
errors.extend(
out.downcast::<thumbnailer::Output>()
.expect("just checked")
.errors,
let thumbnailer::Output {
total_time,
errors: new_errors,
..
} = *out.downcast::<thumbnailer::Output>().expect("just checked");
errors.extend(new_errors);
completed_thumbnailer_tasks += 1;
debug!(
"Thumbnailer task ({completed_thumbnailer_tasks}/{total_thumbnailer_tasks}) \
completed in {total_time:?};",
);
} else {
unreachable!(
@ -120,20 +154,21 @@ pub async fn shallow(
async fn dispatch_media_data_extractor_tasks(
db: &Arc<PrismaClient>,
sync: &Arc<SyncManager>,
parent_iso_file_path: &IsolatedFilePathData<'_>,
location_path: &Arc<PathBuf>,
dispatcher: &BaseTaskDispatcher<Error>,
) -> Result<Vec<TaskHandle<Error>>, media_processor::Error> {
) -> Result<Vec<TaskHandle<Error>>, Error> {
let (extract_exif_file_paths, extract_ffmpeg_file_paths) = (
get_files_by_extensions(
db,
get_direct_children_files_by_extensions(
parent_iso_file_path,
&exif_media_data::AVAILABLE_EXTENSIONS,
),
get_files_by_extensions(
db,
),
get_direct_children_files_by_extensions(
parent_iso_file_path,
&ffmpeg_media_data::AVAILABLE_EXTENSIONS,
db,
),
)
.try_join()
@ -150,6 +185,7 @@ async fn dispatch_media_data_extractor_tasks(
parent_iso_file_path.location_id(),
Arc::clone(location_path),
Arc::clone(db),
Arc::clone(sync),
)
})
.map(IntoTask::into_task)
@ -165,47 +201,20 @@ async fn dispatch_media_data_extractor_tasks(
parent_iso_file_path.location_id(),
Arc::clone(location_path),
Arc::clone(db),
Arc::clone(sync),
)
})
.map(IntoTask::into_task),
)
.collect::<Vec<_>>();
Ok(dispatcher.dispatch_many_boxed(tasks).await)
}
async fn get_files_by_extensions(
db: &PrismaClient,
parent_iso_file_path: &IsolatedFilePathData<'_>,
extensions: &[Extension],
) -> Result<Vec<file_path_for_media_processor::Data>, media_processor::Error> {
// FIXME: Had to use format! macro because PCR doesn't support IN with Vec for SQLite
// We have no data coming from the user, so this is sql injection safe
db._query_raw(raw!(
&format!(
"SELECT id, materialized_path, is_dir, name, extension, cas_id, object_id
FROM file_path
WHERE
location_id={{}}
AND cas_id IS NOT NULL
AND LOWER(extension) IN ({})
AND materialized_path = {{}}",
extensions
.iter()
.map(|ext| format!("LOWER('{ext}')"))
.collect::<Vec<_>>()
.join(",")
),
PrismaValue::Int(parent_iso_file_path.location_id()),
PrismaValue::String(
parent_iso_file_path
.materialized_path_for_children()
.expect("sub path iso_file_path must be a directory")
)
))
.exec()
.await
.map_err(Into::into)
dispatcher.dispatch_many_boxed(tasks).await.map_or_else(
|_| {
debug!("Task system is shutting down while a shallow media processor was in progress");
Ok(vec![])
},
Ok,
)
}
async fn dispatch_thumbnailer_tasks(
@ -214,18 +223,19 @@ async fn dispatch_thumbnailer_tasks(
location_path: &PathBuf,
dispatcher: &BaseTaskDispatcher<Error>,
ctx: &impl OuterContext,
) -> Result<Vec<TaskHandle<Error>>, media_processor::Error> {
) -> Result<Vec<TaskHandle<Error>>, Error> {
let thumbnails_directory_path =
Arc::new(ctx.get_data_directory().join(THUMBNAIL_CACHE_DIR_NAME));
let location_id = parent_iso_file_path.location_id();
let library_id = ctx.id();
let db = ctx.db();
let reporter = Arc::new(NewThumbnailsReporter { ctx: ctx.clone() });
let reporter: Arc<dyn NewThumbnailReporter> =
Arc::new(NewThumbnailsReporter { ctx: ctx.clone() });
let file_paths = get_files_by_extensions(
db,
let file_paths = get_direct_children_files_by_extensions(
parent_iso_file_path,
&helpers::thumbnailer::ALL_THUMBNAILABLE_EXTENSIONS,
db,
)
.await?;
@ -249,10 +259,13 @@ async fn dispatch_thumbnailer_tasks(
.map(IntoTask::into_task)
.collect::<Vec<_>>();
debug!(
"Dispatching {thumbs_count} thumbnails to be processed, in {} priority tasks",
tasks.len(),
);
debug!(%thumbs_count, priority_tasks_count = tasks.len(), "Dispatching thumbnails to be processed;");
Ok(dispatcher.dispatch_many_boxed(tasks).await)
dispatcher.dispatch_many_boxed(tasks).await.map_or_else(
|_| {
debug!("Task system is shutting down while a shallow media processor was in progress");
Ok(vec![])
},
Ok,
)
}

View file

@ -7,7 +7,8 @@ use crate::{
};
use sd_core_file_path_helper::IsolatedFilePathData;
use sd_core_prisma_helpers::file_path_for_media_processor;
use sd_core_prisma_helpers::{file_path_for_media_processor, ObjectPubId};
use sd_core_sync::Manager as SyncManager;
use sd_media_metadata::{ExifMetadata, FFmpegMetadata};
use sd_prisma::prisma::{exif_data, ffmpeg_data, file_path, location, object, PrismaClient};
@ -26,11 +27,22 @@ use std::{
time::Duration,
};
use futures::{FutureExt, StreamExt};
use futures_concurrency::future::{FutureGroup, Race};
use futures::{stream::FuturesUnordered, FutureExt, StreamExt};
use futures_concurrency::future::Race;
use serde::{Deserialize, Serialize};
use specta::Type;
use tokio::time::Instant;
use tracing::{debug, instrument, trace, Level};
#[derive(thiserror::Error, Debug, Serialize, Deserialize, Type, Clone)]
pub enum NonCriticalMediaDataExtractorError {
#[error("failed to extract media data from <file='{}'>: {1}", .0.display())]
FailedToExtractImageMediaData(PathBuf, String),
#[error("file path missing object id: <file_path_id='{0}'>")]
FilePathMissingObjectId(file_path::id::Type),
#[error("failed to construct isolated file path data: <file_path_id='{0}'>: {1}")]
FailedToConstructIsolatedFilePathData(file_path::id::Type, String),
}
#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq, Eq)]
enum Kind {
@ -40,14 +52,24 @@ enum Kind {
#[derive(Debug)]
pub struct MediaDataExtractor {
// Task control
id: TaskId,
kind: Kind,
// Received input args
file_paths: Vec<file_path_for_media_processor::Data>,
location_id: location::id::Type,
location_path: Arc<PathBuf>,
// Inner state
stage: Stage,
db: Arc<PrismaClient>,
// Out collector
output: Output,
// Dependencies
db: Arc<PrismaClient>,
sync: Arc<SyncManager>,
}
#[derive(Debug, Serialize, Deserialize)]
@ -55,74 +77,34 @@ enum Stage {
Starting,
FetchedObjectsAlreadyWithMediaData(Vec<object::id::Type>),
ExtractingMediaData {
paths_by_id: HashMap<file_path::id::Type, (PathBuf, object::id::Type)>,
exif_media_datas: Vec<(ExifMetadata, object::id::Type)>,
paths_by_id: HashMap<file_path::id::Type, (PathBuf, object::id::Type, ObjectPubId)>,
exif_media_datas: Vec<(ExifMetadata, object::id::Type, ObjectPubId)>,
ffmpeg_media_datas: Vec<(FFmpegMetadata, object::id::Type)>,
extract_ids_to_remove_from_map: Vec<file_path::id::Type>,
},
SaveMediaData {
exif_media_datas: Vec<(ExifMetadata, object::id::Type)>,
exif_media_datas: Vec<(ExifMetadata, object::id::Type, ObjectPubId)>,
ffmpeg_media_datas: Vec<(FFmpegMetadata, object::id::Type)>,
},
}
impl MediaDataExtractor {
fn new(
kind: Kind,
file_paths: &[file_path_for_media_processor::Data],
location_id: location::id::Type,
location_path: Arc<PathBuf>,
db: Arc<PrismaClient>,
) -> Self {
let mut output = Output::default();
Self {
id: TaskId::new_v4(),
kind,
file_paths: file_paths
.iter()
.filter(|file_path| {
if file_path.object_id.is_some() {
true
} else {
output.errors.push(
media_processor::NonCriticalError::from(
NonCriticalError::FilePathMissingObjectId(file_path.id),
)
.into(),
);
false
}
})
.cloned()
.collect(),
location_id,
location_path,
stage: Stage::Starting,
db,
output,
}
}
#[must_use]
pub fn new_exif(
file_paths: &[file_path_for_media_processor::Data],
location_id: location::id::Type,
location_path: Arc<PathBuf>,
db: Arc<PrismaClient>,
) -> Self {
Self::new(Kind::Exif, file_paths, location_id, location_path, db)
}
#[must_use]
pub fn new_ffmpeg(
file_paths: &[file_path_for_media_processor::Data],
location_id: location::id::Type,
location_path: Arc<PathBuf>,
db: Arc<PrismaClient>,
) -> Self {
Self::new(Kind::FFmpeg, file_paths, location_id, location_path, db)
}
/// [`MediaDataExtractor`] task output
#[derive(Serialize, Deserialize, Default, Debug)]
pub struct Output {
/// How many files were successfully processed
pub extracted: u64,
/// How many files were skipped
pub skipped: u64,
/// Time spent reading data from database
pub db_read_time: Duration,
/// Time spent filtering files to extract media data and files to skip
pub filtering_time: Duration,
/// Time spent extracting media data
pub extraction_time: Duration,
/// Time spent writing media data to database
pub db_write_time: Duration,
/// Errors encountered during the task
pub errors: Vec<crate::NonCriticalError>,
}
#[async_trait::async_trait]
@ -138,6 +120,20 @@ impl Task<Error> for MediaDataExtractor {
false
}
#[instrument(
skip_all,
fields(
task_id = %self.id,
kind = ?self.kind,
location_id = %self.location_id,
location_path = %self.location_path.display(),
file_paths_count = %self.file_paths.len(),
),
ret(level = Level::TRACE),
err,
)]
#[allow(clippy::blocks_in_conditions)] // Due to `err` on `instrument` macro above
#[allow(clippy::too_many_lines)]
async fn run(&mut self, interrupter: &Interrupter) -> Result<ExecStatus, Error> {
loop {
match &mut self.stage {
@ -150,18 +146,22 @@ impl Task<Error> for MediaDataExtractor {
)
.await?;
self.output.db_read_time = db_read_start.elapsed();
trace!(
object_ids_count = object_ids.len(),
"Fetched objects already with media data;",
);
self.stage = Stage::FetchedObjectsAlreadyWithMediaData(object_ids);
}
Stage::FetchedObjectsAlreadyWithMediaData(objects_already_with_media_data) => {
let filtering_start = Instant::now();
if self.file_paths.len() == objects_already_with_media_data.len() {
// All files already have media data, skipping
self.output.skipped = self.file_paths.len() as u64;
self.output.skipped = self.file_paths.len() as u64; // Files already have media data, skipping
debug!("Skipped all files as they already have media data");
break;
}
let filtering_start = Instant::now();
let paths_by_id = filter_files_to_extract_media_data(
mem::take(objects_already_with_media_data),
self.location_id,
@ -169,9 +169,13 @@ impl Task<Error> for MediaDataExtractor {
&mut self.file_paths,
&mut self.output,
);
self.output.filtering_time = filtering_start.elapsed();
trace!(
paths_needing_media_data_extraction_count = paths_by_id.len(),
"Filtered files to extract media data;",
);
self.stage = Stage::ExtractingMediaData {
extract_ids_to_remove_from_map: Vec::with_capacity(paths_by_id.len()),
exif_media_datas: if self.kind == Kind::Exif {
@ -241,8 +245,14 @@ impl Task<Error> for MediaDataExtractor {
ffmpeg_media_datas,
} => {
let db_write_start = Instant::now();
self.output.extracted =
save(self.kind, exif_media_datas, ffmpeg_media_datas, &self.db).await?;
self.output.extracted = save(
self.kind,
exif_media_datas,
ffmpeg_media_datas,
&self.db,
&self.sync,
)
.await?;
self.output.db_write_time = db_write_start.elapsed();
self.output.skipped += self.output.errors.len() as u64;
@ -258,91 +268,74 @@ impl Task<Error> for MediaDataExtractor {
}
}
#[derive(thiserror::Error, Debug, Serialize, Deserialize, Type)]
pub enum NonCriticalError {
#[error("failed to extract media data from <file='{}'>: {1}", .0.display())]
FailedToExtractImageMediaData(PathBuf, String),
#[error("file path missing object id: <file_path_id='{0}'>")]
FilePathMissingObjectId(file_path::id::Type),
#[error("failed to construct isolated file path data: <file_path_id='{0}'>: {1}")]
FailedToConstructIsolatedFilePathData(file_path::id::Type, String),
}
impl MediaDataExtractor {
fn new(
kind: Kind,
file_paths: &[file_path_for_media_processor::Data],
location_id: location::id::Type,
location_path: Arc<PathBuf>,
db: Arc<PrismaClient>,
sync: Arc<SyncManager>,
) -> Self {
let mut output = Output::default();
#[derive(Serialize, Deserialize, Default, Debug)]
pub struct Output {
pub extracted: u64,
pub skipped: u64,
pub db_read_time: Duration,
pub filtering_time: Duration,
pub extraction_time: Duration,
pub db_write_time: Duration,
pub errors: Vec<crate::NonCriticalError>,
}
#[derive(Debug, Serialize, Deserialize)]
struct SaveState {
id: TaskId,
kind: Kind,
file_paths: Vec<file_path_for_media_processor::Data>,
location_id: location::id::Type,
location_path: Arc<PathBuf>,
stage: Stage,
output: Output,
}
impl SerializableTask<Error> for MediaDataExtractor {
type SerializeError = rmp_serde::encode::Error;
type DeserializeError = rmp_serde::decode::Error;
type DeserializeCtx = Arc<PrismaClient>;
async fn serialize(self) -> Result<Vec<u8>, Self::SerializeError> {
let Self {
id,
Self {
id: TaskId::new_v4(),
kind,
file_paths,
file_paths: file_paths
.iter()
.filter(|file_path| {
if file_path.object.is_some() {
true
} else {
output.errors.push(
media_processor::NonCriticalMediaProcessorError::from(
NonCriticalMediaDataExtractorError::FilePathMissingObjectId(
file_path.id,
),
)
.into(),
);
false
}
})
.cloned()
.collect(),
location_id,
location_path,
stage,
stage: Stage::Starting,
db,
sync,
output,
..
} = self;
rmp_serde::to_vec_named(&SaveState {
id,
kind,
file_paths,
location_id,
location_path,
stage,
output,
})
}
}
async fn deserialize(
data: &[u8],
db: Self::DeserializeCtx,
) -> Result<Self, Self::DeserializeError> {
rmp_serde::from_slice(data).map(
|SaveState {
id,
kind,
file_paths,
location_id,
location_path,
stage,
output,
}| Self {
id,
kind,
file_paths,
location_id,
location_path,
stage,
db,
output,
},
#[must_use]
pub fn new_exif(
file_paths: &[file_path_for_media_processor::Data],
location_id: location::id::Type,
location_path: Arc<PathBuf>,
db: Arc<PrismaClient>,
sync: Arc<SyncManager>,
) -> Self {
Self::new(Kind::Exif, file_paths, location_id, location_path, db, sync)
}
#[must_use]
pub fn new_ffmpeg(
file_paths: &[file_path_for_media_processor::Data],
location_id: location::id::Type,
location_path: Arc<PathBuf>,
db: Arc<PrismaClient>,
sync: Arc<SyncManager>,
) -> Self {
Self::new(
Kind::FFmpeg,
file_paths,
location_id,
location_path,
db,
sync,
)
}
}
@ -355,7 +348,7 @@ async fn fetch_objects_already_with_media_data(
) -> Result<Vec<object::id::Type>, media_processor::Error> {
let object_ids = file_paths
.iter()
.filter_map(|file_path| file_path.object_id)
.filter_map(|file_path| file_path.object.as_ref().map(|object| object.id))
.collect();
match kind {
@ -388,7 +381,7 @@ fn filter_files_to_extract_media_data(
Output {
skipped, errors, ..
}: &mut Output,
) -> HashMap<file_path::id::Type, (PathBuf, object::id::Type)> {
) -> HashMap<file_path::id::Type, (PathBuf, object::id::Type, ObjectPubId)> {
let unique_objects_already_with_media_data = objects_already_with_media_data
.into_iter()
.collect::<HashSet<_>>();
@ -397,7 +390,7 @@ fn filter_files_to_extract_media_data(
file_paths.retain(|file_path| {
!unique_objects_already_with_media_data
.contains(&file_path.object_id.expect("already checked"))
.contains(&file_path.object.as_ref().expect("already checked").id)
});
file_paths
@ -406,8 +399,8 @@ fn filter_files_to_extract_media_data(
IsolatedFilePathData::try_from((location_id, file_path))
.map_err(|e| {
errors.push(
media_processor::NonCriticalError::from(
NonCriticalError::FailedToConstructIsolatedFilePathData(
media_processor::NonCriticalMediaProcessorError::from(
NonCriticalMediaDataExtractorError::FailedToConstructIsolatedFilePathData(
file_path.id,
e.to_string(),
),
@ -416,11 +409,14 @@ fn filter_files_to_extract_media_data(
);
})
.map(|iso_file_path| {
let object = file_path.object.as_ref().expect("already checked");
(
file_path.id,
(
location_path.join(iso_file_path),
file_path.object_id.expect("already checked"),
object.id,
object.pub_id.as_slice().into(),
),
)
})
@ -430,13 +426,14 @@ fn filter_files_to_extract_media_data(
}
enum ExtractionOutputKind {
Exif(Result<Option<ExifMetadata>, media_processor::NonCriticalError>),
FFmpeg(Result<FFmpegMetadata, media_processor::NonCriticalError>),
Exif(Result<Option<ExifMetadata>, media_processor::NonCriticalMediaProcessorError>),
FFmpeg(Result<FFmpegMetadata, media_processor::NonCriticalMediaProcessorError>),
}
struct ExtractionOutput {
file_path_id: file_path::id::Type,
object_id: object::id::Type,
object_pub_id: ObjectPubId,
kind: ExtractionOutputKind,
}
@ -453,23 +450,28 @@ enum InterruptRace {
#[inline]
fn prepare_extraction_futures<'a>(
kind: Kind,
paths_by_id: &'a HashMap<file_path::id::Type, (PathBuf, object::id::Type)>,
paths_by_id: &'a HashMap<file_path::id::Type, (PathBuf, object::id::Type, ObjectPubId)>,
interrupter: &'a Interrupter,
) -> FutureGroup<impl Future<Output = InterruptRace> + 'a> {
) -> FuturesUnordered<impl Future<Output = InterruptRace> + 'a> {
paths_by_id
.iter()
.map(|(file_path_id, (path, object_id))| async move {
InterruptRace::Processed(ExtractionOutput {
file_path_id: *file_path_id,
object_id: *object_id,
kind: match kind {
Kind::Exif => ExtractionOutputKind::Exif(exif_media_data::extract(path).await),
Kind::FFmpeg => {
ExtractionOutputKind::FFmpeg(ffmpeg_media_data::extract(path).await)
}
},
})
})
.map(
|(file_path_id, (path, object_id, object_pub_id))| async move {
InterruptRace::Processed(ExtractionOutput {
file_path_id: *file_path_id,
object_id: *object_id,
object_pub_id: object_pub_id.clone(),
kind: match kind {
Kind::Exif => {
ExtractionOutputKind::Exif(exif_media_data::extract(path).await)
}
Kind::FFmpeg => {
ExtractionOutputKind::FFmpeg(ffmpeg_media_data::extract(path).await)
}
},
})
},
)
.map(|fut| {
(
fut,
@ -477,24 +479,28 @@ fn prepare_extraction_futures<'a>(
)
.race()
})
.collect::<FutureGroup<_>>()
.collect::<FuturesUnordered<_>>()
}
#[instrument(skip_all, fields(%file_path_id, %object_id))]
#[inline]
fn process_output(
ExtractionOutput {
file_path_id,
object_id,
object_pub_id,
kind,
}: ExtractionOutput,
exif_media_datas: &mut Vec<(ExifMetadata, object::id::Type)>,
exif_media_datas: &mut Vec<(ExifMetadata, object::id::Type, ObjectPubId)>,
ffmpeg_media_datas: &mut Vec<(FFmpegMetadata, object::id::Type)>,
extract_ids_to_remove_from_map: &mut Vec<file_path::id::Type>,
output: &mut Output,
) {
trace!("Processing extracted media data");
match kind {
ExtractionOutputKind::Exif(Ok(Some(exif_data))) => {
exif_media_datas.push((exif_data, object_id));
exif_media_datas.push((exif_data, object_id, object_pub_id));
}
ExtractionOutputKind::Exif(Ok(None)) => {
// No exif media data found
@ -514,12 +520,85 @@ fn process_output(
#[inline]
async fn save(
kind: Kind,
exif_media_datas: &mut Vec<(ExifMetadata, object::id::Type)>,
exif_media_datas: &mut Vec<(ExifMetadata, object::id::Type, ObjectPubId)>,
ffmpeg_media_datas: &mut Vec<(FFmpegMetadata, object::id::Type)>,
db: &PrismaClient,
sync: &SyncManager,
) -> Result<u64, media_processor::Error> {
trace!("Saving media data on database");
match kind {
Kind::Exif => exif_media_data::save(mem::take(exif_media_datas), db).await,
Kind::Exif => exif_media_data::save(mem::take(exif_media_datas), db, sync).await,
Kind::FFmpeg => ffmpeg_media_data::save(mem::take(ffmpeg_media_datas), db).await,
}
.map_err(Into::into)
}
#[derive(Debug, Serialize, Deserialize)]
struct SaveState {
id: TaskId,
kind: Kind,
file_paths: Vec<file_path_for_media_processor::Data>,
location_id: location::id::Type,
location_path: Arc<PathBuf>,
stage: Stage,
output: Output,
}
impl SerializableTask<Error> for MediaDataExtractor {
type SerializeError = rmp_serde::encode::Error;
type DeserializeError = rmp_serde::decode::Error;
type DeserializeCtx = (Arc<PrismaClient>, Arc<SyncManager>);
async fn serialize(self) -> Result<Vec<u8>, Self::SerializeError> {
let Self {
id,
kind,
file_paths,
location_id,
location_path,
stage,
output,
..
} = self;
rmp_serde::to_vec_named(&SaveState {
id,
kind,
file_paths,
location_id,
location_path,
stage,
output,
})
}
async fn deserialize(
data: &[u8],
(db, sync): Self::DeserializeCtx,
) -> Result<Self, Self::DeserializeError> {
rmp_serde::from_slice(data).map(
|SaveState {
id,
kind,
file_paths,
location_id,
location_path,
stage,
output,
}| Self {
id,
kind,
file_paths,
location_id,
location_path,
stage,
output,
db,
sync,
},
)
}
}

View file

@ -12,8 +12,7 @@ use crate::{
media_processor::{
self,
helpers::thumbnailer::{
can_generate_thumbnail_for_document, can_generate_thumbnail_for_image, get_shard_hex,
EPHEMERAL_DIR, TARGET_PX, TARGET_QUALITY, THUMBNAIL_GENERATION_TIMEOUT, WEBP_EXTENSION,
generate_thumbnail, GenerateThumbnailArgs, GenerationStatus, THUMBNAILER_TASK_TIMEOUT,
},
ThumbKey, ThumbnailKind,
},
@ -21,61 +20,31 @@ use crate::{
};
use sd_core_file_path_helper::IsolatedFilePathData;
use sd_core_prisma_helpers::file_path_for_media_processor;
use sd_core_prisma_helpers::{file_path_for_media_processor, CasId};
use sd_file_ext::extensions::{DocumentExtension, ImageExtension};
use sd_images::{format_image, scale_dimensions, ConvertibleExtension};
use sd_media_metadata::exif::Orientation;
use sd_prisma::prisma::{file_path, location};
use sd_task_system::{
ExecStatus, Interrupter, InterruptionKind, IntoAnyTaskOutput, SerializableTask, Task, TaskId,
};
use sd_utils::error::FileIOError;
use std::{
collections::HashMap,
fmt,
future::IntoFuture,
mem,
ops::Deref,
path::{Path, PathBuf},
pin::pin,
str::FromStr,
sync::Arc,
time::Duration,
};
use futures::{FutureExt, StreamExt};
use futures_concurrency::future::{FutureGroup, Race};
use image::{imageops, DynamicImage, GenericImageView};
use futures::{stream::FuturesUnordered, FutureExt, StreamExt};
use futures_concurrency::future::Race;
use serde::{Deserialize, Serialize};
use specta::Type;
use tokio::{
fs, io,
task::spawn_blocking,
time::{sleep, Instant},
};
use tracing::{error, info, trace};
use tokio::time::Instant;
use tracing::{error, instrument, trace, Level};
use uuid::Uuid;
use webp::Encoder;
#[derive(Debug, Serialize, Deserialize)]
pub struct GenerateThumbnailArgs {
pub extension: String,
pub cas_id: String,
pub path: PathBuf,
}
impl GenerateThumbnailArgs {
#[must_use]
pub const fn new(extension: String, cas_id: String, path: PathBuf) -> Self {
Self {
extension,
cas_id,
path,
}
}
}
pub type ThumbnailId = u32;
@ -84,20 +53,29 @@ pub trait NewThumbnailReporter: Send + Sync + fmt::Debug + 'static {
}
#[derive(Debug)]
pub struct Thumbnailer<Reporter: NewThumbnailReporter> {
pub struct Thumbnailer {
// Task control
id: TaskId,
reporter: Arc<Reporter>,
with_priority: bool,
// Received input args
thumbs_kind: ThumbnailKind,
thumbnails_directory_path: Arc<PathBuf>,
thumbnails_to_generate: HashMap<ThumbnailId, GenerateThumbnailArgs>,
already_processed_ids: Vec<ThumbnailId>,
thumbnails_to_generate: HashMap<ThumbnailId, GenerateThumbnailArgs<'static>>,
should_regenerate: bool,
with_priority: bool,
// Inner state
already_processed_ids: Vec<ThumbnailId>,
// Out collector
output: Output,
// Dependencies
reporter: Arc<dyn NewThumbnailReporter>,
}
#[async_trait::async_trait]
impl<Reporter: NewThumbnailReporter> Task<Error> for Thumbnailer<Reporter> {
impl Task<Error> for Thumbnailer {
fn id(&self) -> TaskId {
self.id
}
@ -107,9 +85,23 @@ impl<Reporter: NewThumbnailReporter> Task<Error> for Thumbnailer<Reporter> {
}
fn with_timeout(&self) -> Option<Duration> {
Some(Duration::from_secs(60 * 5)) // The entire task must not take more than 5 minutes
Some(THUMBNAILER_TASK_TIMEOUT) // The entire task must not take more than this constant
}
#[instrument(
skip_all,
fields(
task_id = %self.id,
thumbs_kind = ?self.thumbs_kind,
should_regenerate = self.should_regenerate,
thumbnails_to_generate_count = self.thumbnails_to_generate.len(),
already_processed_ids_count = self.already_processed_ids.len(),
with_priority = self.with_priority,
),
ret(level = Level::TRACE),
err,
)]
#[allow(clippy::blocks_in_conditions)] // Due to `err` on `instrument` macro above
async fn run(&mut self, interrupter: &Interrupter) -> Result<ExecStatus, Error> {
enum InterruptRace {
Interrupted(InterruptionKind),
@ -135,38 +127,27 @@ impl<Reporter: NewThumbnailReporter> Task<Error> for Thumbnailer<Reporter> {
let start = Instant::now();
let mut futures = pin!(thumbnails_to_generate
let futures = thumbnails_to_generate
.iter()
.map(|(id, generate_args)| {
let path = &generate_args.path;
generate_thumbnail(
thumbnails_directory_path,
generate_args,
thumbs_kind,
*should_regenerate,
)
.map(|res| InterruptRace::Processed((*id, res)))
})
.map(|fut| {
(
generate_thumbnail(
thumbnails_directory_path,
generate_args,
thumbs_kind,
*should_regenerate,
)
.map(|res| (*id, res)),
sleep(THUMBNAIL_GENERATION_TIMEOUT).map(|()| {
(
*id,
(
THUMBNAIL_GENERATION_TIMEOUT,
Err(NonCriticalError::ThumbnailGenerationTimeout(path.clone())),
),
)
}),
fut,
interrupter.into_future().map(InterruptRace::Interrupted),
)
.race()
.map(InterruptRace::Processed)
})
.map(|fut| (
fut,
interrupter.into_future().map(InterruptRace::Interrupted)
)
.race())
.collect::<FutureGroup<_>>());
.collect::<FuturesUnordered<_>>();
let mut futures = pin!(futures);
while let Some(race_output) = futures.next().await {
match race_output {
@ -190,25 +171,25 @@ impl<Reporter: NewThumbnailReporter> Task<Error> for Thumbnailer<Reporter> {
output.total_time += start.elapsed();
#[allow(clippy::cast_precision_loss)]
// SAFETY: we're probably won't have 2^52 thumbnails being generated on a single task for this cast to have
// a precision loss issue
let total = (output.generated + output.skipped) as f64;
if output.generated > 1 {
#[allow(clippy::cast_precision_loss)]
// SAFETY: we're probably won't have 2^52 thumbnails being generated on a single task for this cast to have
// a precision loss issue
let total = (output.generated + output.skipped) as f64;
let mean_generation_time_f64 = output.mean_time_acc / total;
let mean_generation_time = output.mean_time_acc / total;
let generation_time_std_dev = Duration::from_secs_f64(
(mean_generation_time.mul_add(-mean_generation_time, output.std_dev_acc / total))
.sqrt(),
);
info!(
"{{generated: {generated}, skipped: {skipped}}} thumbnails; \
mean generation time: {mean_generation_time:?} ± {generation_time_std_dev:?}",
generated = output.generated,
skipped = output.skipped,
mean_generation_time = Duration::from_secs_f64(mean_generation_time)
);
trace!(
generated = output.generated,
skipped = output.skipped,
"mean generation time: {mean_generation_time:?} ± {generation_time_std_dev:?};",
mean_generation_time = Duration::from_secs_f64(mean_generation_time_f64),
generation_time_std_dev = Duration::from_secs_f64(
(mean_generation_time_f64
.mul_add(-mean_generation_time_f64, output.std_dev_acc / total))
.sqrt(),
)
);
}
Ok(ExecStatus::Done(mem::take(output).into_output()))
}
@ -224,8 +205,8 @@ pub struct Output {
pub std_dev_acc: f64,
}
#[derive(thiserror::Error, Debug, Serialize, Deserialize, Type)]
pub enum NonCriticalError {
#[derive(thiserror::Error, Debug, Serialize, Deserialize, Type, Clone)]
pub enum NonCriticalThumbnailerError {
#[error("file path <id='{0}'> has no cas_id")]
MissingCasId(file_path::id::Type),
#[error("failed to extract isolated file path data from file path <id='{0}'>: {1}")]
@ -242,19 +223,19 @@ pub enum NonCriticalError {
CreateShardDirectory(String),
#[error("failed to save thumbnail <path='{}'>: {1}", .0.display())]
SaveThumbnail(PathBuf, String),
#[error("thumbnail generation timed out <path='{}'>", .0.display())]
ThumbnailGenerationTimeout(PathBuf),
#[error("task timed out: {0}")]
TaskTimeout(TaskId),
}
impl<Reporter: NewThumbnailReporter> Thumbnailer<Reporter> {
impl Thumbnailer {
fn new(
thumbs_kind: ThumbnailKind,
thumbnails_directory_path: Arc<PathBuf>,
thumbnails_to_generate: HashMap<ThumbnailId, GenerateThumbnailArgs>,
thumbnails_to_generate: HashMap<ThumbnailId, GenerateThumbnailArgs<'static>>,
errors: Vec<crate::NonCriticalError>,
should_regenerate: bool,
with_priority: bool,
reporter: Arc<Reporter>,
reporter: Arc<dyn NewThumbnailReporter>,
) -> Self {
Self {
id: TaskId::new_v4(),
@ -275,8 +256,8 @@ impl<Reporter: NewThumbnailReporter> Thumbnailer<Reporter> {
#[must_use]
pub fn new_ephemeral(
thumbnails_directory_path: Arc<PathBuf>,
thumbnails_to_generate: Vec<GenerateThumbnailArgs>,
reporter: Arc<Reporter>,
thumbnails_to_generate: Vec<GenerateThumbnailArgs<'static>>,
reporter: Arc<dyn NewThumbnailReporter>,
) -> Self {
Self::new(
ThumbnailKind::Ephemeral,
@ -308,7 +289,7 @@ impl<Reporter: NewThumbnailReporter> Thumbnailer<Reporter> {
library_id: Uuid,
should_regenerate: bool,
with_priority: bool,
reporter: Arc<Reporter>,
reporter: Arc<dyn NewThumbnailReporter>,
) -> Self {
let mut errors = Vec::new();
@ -318,13 +299,18 @@ impl<Reporter: NewThumbnailReporter> Thumbnailer<Reporter> {
file_paths
.iter()
.filter_map(|file_path| {
if let Some(cas_id) = file_path.cas_id.as_ref() {
if let Some(cas_id) = file_path
.cas_id
.as_ref()
.map(CasId::from)
.map(CasId::into_owned)
{
let file_path_id = file_path.id;
IsolatedFilePathData::try_from((location_id, file_path))
.map_err(|e| {
errors.push(
media_processor::NonCriticalError::from(
NonCriticalError::FailedToExtractIsolatedFilePathData(
media_processor::NonCriticalMediaProcessorError::from(
NonCriticalThumbnailerError::FailedToExtractIsolatedFilePathData(
file_path_id,
e.to_string(),
),
@ -336,8 +322,8 @@ impl<Reporter: NewThumbnailReporter> Thumbnailer<Reporter> {
.map(|iso_file_path| (file_path_id, cas_id, iso_file_path))
} else {
errors.push(
media_processor::NonCriticalError::from(
NonCriticalError::MissingCasId(file_path.id),
media_processor::NonCriticalMediaProcessorError::from(
NonCriticalThumbnailerError::MissingCasId(file_path.id),
)
.into(),
);
@ -354,7 +340,7 @@ impl<Reporter: NewThumbnailReporter> Thumbnailer<Reporter> {
file_path_id as u32,
GenerateThumbnailArgs::new(
iso_file_path.extension().to_string(),
cas_id.clone(),
cas_id,
full_path,
),
)
@ -369,23 +355,74 @@ impl<Reporter: NewThumbnailReporter> Thumbnailer<Reporter> {
}
}
#[instrument(skip_all, fields(thumb_id = id, %generated, %skipped, ?elapsed_time, ?res))]
fn process_thumbnail_generation_output(
(id, (elapsed_time, res)): ThumbnailGenerationOutput,
with_priority: bool,
reporter: &dyn NewThumbnailReporter,
already_processed_ids: &mut Vec<ThumbnailId>,
Output {
generated,
skipped,
errors,
mean_time_acc: mean_generation_time_accumulator,
std_dev_acc: std_dev_accumulator,
..
}: &mut Output,
) {
let elapsed_time = elapsed_time.as_secs_f64();
*mean_generation_time_accumulator += elapsed_time;
*std_dev_accumulator += elapsed_time * elapsed_time;
match res {
Ok((thumb_key, status)) => {
match status {
GenerationStatus::Generated => {
*generated += 1;
}
GenerationStatus::Skipped => {
*skipped += 1;
}
}
// This if is REALLY needed, due to the sheer performance of the thumbnailer,
// I restricted to only send events notifying for thumbnails in the current
// opened directory, sending events for the entire location turns into a
// humongous bottleneck in the frontend lol, since it doesn't even knows
// what to do with thumbnails for inner directories lol
// - fogodev
if with_priority {
reporter.new_thumbnail(thumb_key);
}
}
Err(e) => {
errors.push(media_processor::NonCriticalMediaProcessorError::from(e).into());
*skipped += 1;
}
}
already_processed_ids.push(id);
trace!("Thumbnail processed");
}
#[derive(Debug, Serialize, Deserialize)]
struct SaveState {
id: TaskId,
thumbs_kind: ThumbnailKind,
thumbnails_directory_path: Arc<PathBuf>,
thumbnails_to_generate: HashMap<ThumbnailId, GenerateThumbnailArgs>,
thumbnails_to_generate: HashMap<ThumbnailId, GenerateThumbnailArgs<'static>>,
should_regenerate: bool,
with_priority: bool,
output: Output,
}
impl<Reporter: NewThumbnailReporter> SerializableTask<Error> for Thumbnailer<Reporter> {
impl SerializableTask<Error> for Thumbnailer {
type SerializeError = rmp_serde::encode::Error;
type DeserializeError = rmp_serde::decode::Error;
type DeserializeCtx = Arc<Reporter>;
type DeserializeCtx = Arc<dyn NewThumbnailReporter>;
async fn serialize(self) -> Result<Vec<u8>, Self::SerializeError> {
let Self {
@ -443,235 +480,10 @@ impl<Reporter: NewThumbnailReporter> SerializableTask<Error> for Thumbnailer<Rep
}
}
enum GenerationStatus {
Generated,
Skipped,
}
type ThumbnailGenerationOutput = (
ThumbnailId,
(
Duration,
Result<(ThumbKey, GenerationStatus), NonCriticalError>,
Result<(ThumbKey, GenerationStatus), NonCriticalThumbnailerError>,
),
);
fn process_thumbnail_generation_output(
(id, (elapsed_time, res)): ThumbnailGenerationOutput,
with_priority: bool,
reporter: &impl NewThumbnailReporter,
already_processed_ids: &mut Vec<ThumbnailId>,
Output {
generated,
skipped,
errors,
mean_time_acc: mean_generation_time_accumulator,
std_dev_acc: std_dev_accumulator,
..
}: &mut Output,
) {
let elapsed_time = elapsed_time.as_secs_f64();
*mean_generation_time_accumulator += elapsed_time;
*std_dev_accumulator += elapsed_time * elapsed_time;
match res {
Ok((thumb_key, status)) => {
match status {
GenerationStatus::Generated => {
*generated += 1;
}
GenerationStatus::Skipped => {
*skipped += 1;
}
}
// This if is REALLY needed, due to the sheer performance of the thumbnailer,
// I restricted to only send events notifying for thumbnails in the current
// opened directory, sending events for the entire location turns into a
// humongous bottleneck in the frontend lol, since it doesn't even knows
// what to do with thumbnails for inner directories lol
// - fogodev
if with_priority {
reporter.new_thumbnail(thumb_key);
}
}
Err(e) => {
errors.push(media_processor::NonCriticalError::from(e).into());
*skipped += 1;
}
}
already_processed_ids.push(id);
}
async fn generate_thumbnail(
thumbnails_directory: &Path,
GenerateThumbnailArgs {
extension,
cas_id,
path,
}: &GenerateThumbnailArgs,
kind: &ThumbnailKind,
should_regenerate: bool,
) -> (
Duration,
Result<(ThumbKey, GenerationStatus), NonCriticalError>,
) {
trace!("Generating thumbnail for {}", path.display());
let start = Instant::now();
let mut output_path = match kind {
ThumbnailKind::Ephemeral => thumbnails_directory.join(EPHEMERAL_DIR),
ThumbnailKind::Indexed(library_id) => thumbnails_directory.join(library_id.to_string()),
};
output_path.push(get_shard_hex(cas_id));
output_path.push(cas_id);
output_path.set_extension(WEBP_EXTENSION);
if let Err(e) = fs::metadata(&*output_path).await {
if e.kind() != io::ErrorKind::NotFound {
error!(
"Failed to check if thumbnail exists, but we will try to generate it anyway: {e:#?}"
);
}
// Otherwise we good, thumbnail doesn't exist so we can generate it
} else if !should_regenerate {
trace!(
"Skipping thumbnail generation for {} because it already exists",
path.display()
);
return (
start.elapsed(),
Ok((ThumbKey::new(cas_id, kind), GenerationStatus::Skipped)),
);
}
if let Ok(extension) = ImageExtension::from_str(extension) {
if can_generate_thumbnail_for_image(extension) {
if let Err(e) = generate_image_thumbnail(&path, &output_path).await {
return (start.elapsed(), Err(e));
}
}
} else if let Ok(extension) = DocumentExtension::from_str(extension) {
if can_generate_thumbnail_for_document(extension) {
if let Err(e) = generate_image_thumbnail(&path, &output_path).await {
return (start.elapsed(), Err(e));
}
}
}
#[cfg(feature = "ffmpeg")]
{
use crate::media_processor::helpers::thumbnailer::can_generate_thumbnail_for_video;
use sd_file_ext::extensions::VideoExtension;
if let Ok(extension) = VideoExtension::from_str(extension) {
if can_generate_thumbnail_for_video(extension) {
if let Err(e) = generate_video_thumbnail(&path, &output_path).await {
return (start.elapsed(), Err(e));
}
}
}
}
trace!("Generated thumbnail for {}", path.display());
(
start.elapsed(),
Ok((ThumbKey::new(cas_id, kind), GenerationStatus::Generated)),
)
}
async fn generate_image_thumbnail(
file_path: impl AsRef<Path> + Send,
output_path: impl AsRef<Path> + Send,
) -> Result<(), NonCriticalError> {
let file_path = file_path.as_ref().to_path_buf();
let webp = spawn_blocking({
let file_path = file_path.clone();
move || -> Result<_, NonCriticalError> {
let mut img = format_image(&file_path)
.map_err(|e| NonCriticalError::FormatImage(file_path.clone(), e.to_string()))?;
let (w, h) = img.dimensions();
#[allow(clippy::cast_precision_loss)]
let (w_scaled, h_scaled) = scale_dimensions(w as f32, h as f32, TARGET_PX);
// Optionally, resize the existing photo and convert back into DynamicImage
if w != w_scaled && h != h_scaled {
img = DynamicImage::ImageRgba8(imageops::resize(
&img,
w_scaled,
h_scaled,
imageops::FilterType::Triangle,
));
}
// this corrects the rotation/flip of the image based on the *available* exif data
// not all images have exif data, so we don't error. we also don't rotate HEIF as that's against the spec
if let Some(orientation) = Orientation::from_path(&file_path) {
if ConvertibleExtension::try_from(file_path.as_ref())
.expect("we already checked if the image was convertible")
.should_rotate()
{
img = orientation.correct_thumbnail(img);
}
}
// Create the WebP encoder for the above image
let encoder = Encoder::from_image(&img)
.map_err(|reason| NonCriticalError::WebPEncoding(file_path, reason.to_string()))?;
// Type `WebPMemory` is !Send, which makes the `Future` in this function `!Send`,
// this make us `deref` to have a `&[u8]` and then `to_owned` to make a `Vec<u8>`
// which implies on a unwanted clone...
Ok(encoder.encode(TARGET_QUALITY).deref().to_owned())
}
})
.await
.map_err(|e| {
NonCriticalError::PanicWhileGeneratingThumbnail(file_path.clone(), e.to_string())
})??;
let output_path = output_path.as_ref();
if let Some(shard_dir) = output_path.parent() {
fs::create_dir_all(shard_dir).await.map_err(|e| {
NonCriticalError::CreateShardDirectory(FileIOError::from((shard_dir, e)).to_string())
})?;
} else {
error!(
"Failed to get parent directory of '{}' for sharding parent directory",
output_path.display()
);
}
fs::write(output_path, &webp).await.map_err(|e| {
NonCriticalError::SaveThumbnail(file_path, FileIOError::from((output_path, e)).to_string())
})
}
#[cfg(feature = "ffmpeg")]
async fn generate_video_thumbnail(
file_path: impl AsRef<Path> + Send,
output_path: impl AsRef<Path> + Send,
) -> Result<(), NonCriticalError> {
use sd_ffmpeg::{to_thumbnail, ThumbnailSize};
let file_path = file_path.as_ref();
to_thumbnail(
file_path,
output_path,
ThumbnailSize::Scale(1024),
TARGET_QUALITY,
)
.await
.map_err(|e| {
NonCriticalError::VideoThumbnailGenerationFailed(file_path.to_path_buf(), e.to_string())
})
}

View file

@ -1,4 +1,3 @@
use rspc::ErrorCode;
use sd_core_file_path_helper::{
ensure_file_path_exists, ensure_sub_path_is_directory, ensure_sub_path_is_in_location,
FilePathError, IsolatedFilePathData,
@ -9,6 +8,7 @@ use sd_prisma::prisma::{location, PrismaClient};
use std::path::{Path, PathBuf};
use prisma_client_rust::QueryError;
use rspc::ErrorCode;
#[derive(thiserror::Error, Debug)]
pub enum Error {
@ -23,66 +23,91 @@ pub enum Error {
}
impl From<Error> for rspc::Error {
fn from(err: Error) -> Self {
match err {
Error::SubPathNotFound(_) => {
Self::with_cause(ErrorCode::NotFound, err.to_string(), err)
fn from(e: Error) -> Self {
match e {
Error::SubPathNotFound(_) => Self::with_cause(ErrorCode::NotFound, e.to_string(), e),
_ => Self::with_cause(ErrorCode::InternalServerError, e.to_string(), e),
}
}
}
pub async fn get_full_path_from_sub_path<E: From<Error>>(
location_id: location::id::Type,
sub_path: Option<impl AsRef<Path> + Send + Sync>,
location_path: impl AsRef<Path> + Send,
db: &PrismaClient,
) -> Result<PathBuf, E> {
async fn inner(
location_id: location::id::Type,
sub_path: Option<&Path>,
location_path: &Path,
db: &PrismaClient,
) -> Result<PathBuf, Error> {
match sub_path {
Some(sub_path) if sub_path != Path::new("") => {
let full_path = ensure_sub_path_is_in_location(location_path, sub_path).await?;
ensure_sub_path_is_directory(location_path, sub_path).await?;
ensure_file_path_exists(
sub_path,
&IsolatedFilePathData::new(location_id, location_path, &full_path, true)?,
db,
Error::SubPathNotFound,
)
.await?;
Ok(full_path)
}
_ => Self::with_cause(ErrorCode::InternalServerError, err.to_string(), err),
_ => Ok(location_path.to_path_buf()),
}
}
inner(
location_id,
sub_path.as_ref().map(AsRef::as_ref),
location_path.as_ref(),
db,
)
.await
.map_err(E::from)
}
pub async fn get_full_path_from_sub_path(
pub async fn maybe_get_iso_file_path_from_sub_path<E: From<Error>>(
location_id: location::id::Type,
sub_path: &Option<impl AsRef<Path> + Send + Sync>,
sub_path: Option<impl AsRef<Path> + Send + Sync>,
location_path: impl AsRef<Path> + Send,
db: &PrismaClient,
) -> Result<PathBuf, Error> {
let location_path = location_path.as_ref();
) -> Result<Option<IsolatedFilePathData<'static>>, E> {
async fn inner(
location_id: location::id::Type,
sub_path: Option<&Path>,
location_path: &Path,
db: &PrismaClient,
) -> Result<Option<IsolatedFilePathData<'static>>, Error> {
match sub_path {
Some(sub_path) if sub_path != Path::new("") => {
let full_path = ensure_sub_path_is_in_location(location_path, sub_path).await?;
ensure_sub_path_is_directory(location_path, sub_path).await?;
match sub_path {
Some(sub_path) if sub_path.as_ref() != Path::new("") => {
let sub_path = sub_path.as_ref();
let full_path = ensure_sub_path_is_in_location(location_path, sub_path).await?;
let sub_iso_file_path =
IsolatedFilePathData::new(location_id, location_path, &full_path, true)?;
ensure_sub_path_is_directory(location_path, sub_path).await?;
ensure_file_path_exists(
sub_path,
&IsolatedFilePathData::new(location_id, location_path, &full_path, true)?,
db,
Error::SubPathNotFound,
)
.await?;
Ok(full_path)
ensure_file_path_exists(sub_path, &sub_iso_file_path, db, Error::SubPathNotFound)
.await
.map(|()| Some(sub_iso_file_path))
}
_ => Ok(None),
}
_ => Ok(location_path.to_path_buf()),
}
}
pub async fn maybe_get_iso_file_path_from_sub_path(
location_id: location::id::Type,
sub_path: &Option<impl AsRef<Path> + Send + Sync>,
location_path: impl AsRef<Path> + Send,
db: &PrismaClient,
) -> Result<Option<IsolatedFilePathData<'static>>, Error> {
let location_path = location_path.as_ref();
match sub_path {
Some(sub_path) if sub_path.as_ref() != Path::new("") => {
let full_path = ensure_sub_path_is_in_location(location_path, sub_path).await?;
ensure_sub_path_is_directory(location_path, sub_path).await?;
let sub_iso_file_path =
IsolatedFilePathData::new(location_id, location_path, &full_path, true)?;
ensure_file_path_exists(sub_path, &sub_iso_file_path, db, Error::SubPathNotFound)
.await
.map(|()| Some(sub_iso_file_path))
}
_ => Ok(None),
}
inner(
location_id,
sub_path.as_ref().map(AsRef::as_ref),
location_path.as_ref(),
db,
)
.await
.map_err(E::from)
}

View file

@ -51,15 +51,15 @@ use rspc::ErrorCode;
use specta::Type;
use thiserror::Error;
use tokio::{fs, sync::RwLock};
use tracing::debug;
use tokio::fs;
use tracing::{debug, instrument, trace};
use uuid::Uuid;
pub mod seed;
mod serde_impl;
#[derive(Error, Debug)]
pub enum IndexerRuleError {
pub enum Error {
// User errors
#[error("invalid indexer rule kind integer: {0}")]
InvalidRuleKindInt(i32),
@ -83,16 +83,14 @@ pub enum IndexerRuleError {
MissingField(#[from] MissingFieldError),
}
impl From<IndexerRuleError> for rspc::Error {
fn from(err: IndexerRuleError) -> Self {
match err {
IndexerRuleError::InvalidRuleKindInt(_)
| IndexerRuleError::Glob(_)
| IndexerRuleError::NonUtf8Path(_) => {
Self::with_cause(ErrorCode::BadRequest, err.to_string(), err)
impl From<Error> for rspc::Error {
fn from(e: Error) -> Self {
match e {
Error::InvalidRuleKindInt(_) | Error::Glob(_) | Error::NonUtf8Path(_) => {
Self::with_cause(ErrorCode::BadRequest, e.to_string(), e)
}
_ => Self::with_cause(ErrorCode::InternalServerError, err.to_string(), err),
_ => Self::with_cause(ErrorCode::InternalServerError, e.to_string(), e),
}
}
}
@ -113,21 +111,17 @@ pub struct IndexerRuleCreateArgs {
}
impl IndexerRuleCreateArgs {
pub async fn create(
self,
db: &PrismaClient,
) -> Result<Option<indexer_rule::Data>, IndexerRuleError> {
#[instrument(skip_all, fields(name = %self.name, rules = ?self.rules), err)]
pub async fn create(self, db: &PrismaClient) -> Result<Option<indexer_rule::Data>, Error> {
use indexer_rule::{date_created, date_modified, name, rules_per_kind};
debug!(
"{} a new indexer rule (name = {}, params = {:?})",
"{} a new indexer rule",
if self.dry_run {
"Dry run: Would create"
} else {
"Trying to create"
},
self.name,
self.rules
);
let rules_data = rmp_serde::to_vec_named(
@ -167,7 +161,7 @@ impl IndexerRuleCreateArgs {
Ok(Some(
db.indexer_rule()
.create(
sd_utils::uuid_to_bytes(generate_pub_id()),
sd_utils::uuid_to_bytes(&generate_pub_id()),
vec![
name::set(Some(self.name)),
rules_per_kind::set(Some(rules_data)),
@ -224,7 +218,7 @@ impl RulePerKind {
fn new_files_by_globs_str_and_kind(
globs_str: impl IntoIterator<Item = impl AsRef<str>>,
kind_fn: impl Fn(Vec<Glob>, GlobSet) -> Self,
) -> Result<Self, IndexerRuleError> {
) -> Result<Self, Error> {
globs_str
.into_iter()
.map(|s| s.as_ref().parse::<Glob>())
@ -245,13 +239,13 @@ impl RulePerKind {
pub fn new_accept_files_by_globs_str(
globs_str: impl IntoIterator<Item = impl AsRef<str>>,
) -> Result<Self, IndexerRuleError> {
) -> Result<Self, Error> {
Self::new_files_by_globs_str_and_kind(globs_str, Self::AcceptFilesByGlob)
}
pub fn new_reject_files_by_globs_str(
globs_str: impl IntoIterator<Item = impl AsRef<str>>,
) -> Result<Self, IndexerRuleError> {
) -> Result<Self, Error> {
Self::new_files_by_globs_str_and_kind(globs_str, Self::RejectFilesByGlob)
}
}
@ -267,51 +261,19 @@ impl MetadataForIndexerRules for Metadata {
}
impl RulePerKind {
#[deprecated = "Use `[apply_with_metadata]` instead"]
async fn apply(
&self,
source: impl AsRef<Path> + Send,
) -> Result<(RuleKind, bool), IndexerRuleError> {
match self {
Self::AcceptIfChildrenDirectoriesArePresent(children) => {
accept_dir_for_its_children(source, children)
.await
.map(|accepted| (RuleKind::AcceptIfChildrenDirectoriesArePresent, accepted))
}
Self::RejectIfChildrenDirectoriesArePresent(children) => {
reject_dir_for_its_children(source, children)
.await
.map(|rejected| (RuleKind::RejectIfChildrenDirectoriesArePresent, rejected))
}
Self::AcceptFilesByGlob(_globs, accept_glob_set) => Ok((
RuleKind::AcceptFilesByGlob,
accept_by_glob(source, accept_glob_set),
)),
Self::RejectFilesByGlob(_globs, reject_glob_set) => Ok((
RuleKind::RejectFilesByGlob,
reject_by_glob(source, reject_glob_set),
)),
Self::IgnoredByGit(git_repo, patterns) => Ok((
RuleKind::IgnoredByGit,
accept_by_gitpattern(source.as_ref(), git_repo, patterns),
)),
}
}
async fn apply_with_metadata(
&self,
source: impl AsRef<Path> + Send,
metadata: &impl MetadataForIndexerRules,
) -> Result<(RuleKind, bool), IndexerRuleError> {
) -> Result<(RuleKind, bool), Error> {
match self {
Self::AcceptIfChildrenDirectoriesArePresent(children) => {
accept_dir_for_its_children_with_metadata(source, metadata, children)
accept_dir_for_its_children(source, metadata, children)
.await
.map(|accepted| (RuleKind::AcceptIfChildrenDirectoriesArePresent, accepted))
}
Self::RejectIfChildrenDirectoriesArePresent(children) => {
reject_dir_for_its_children_with_metadata(source, metadata, children)
reject_dir_for_its_children(source, metadata, children)
.await
.map(|rejected| (RuleKind::RejectIfChildrenDirectoriesArePresent, rejected))
}
@ -326,24 +288,32 @@ impl RulePerKind {
)),
Self::IgnoredByGit(base_dir, patterns) => Ok((
RuleKind::IgnoredByGit,
accept_by_gitpattern(source.as_ref(), base_dir, patterns),
accept_by_git_pattern(source, base_dir, patterns),
)),
}
}
}
fn accept_by_gitpattern(source: &Path, base_dir: &Path, search: &Search) -> bool {
let relative = source
.strip_prefix(base_dir)
.expect("`base_dir` should be our git repo, and `source` should be inside of it");
fn accept_by_git_pattern(
source: impl AsRef<Path>,
base_dir: impl AsRef<Path>,
search: &Search,
) -> bool {
fn inner(source: &Path, base_dir: &Path, search: &Search) -> bool {
let relative = source
.strip_prefix(base_dir)
.expect("`base_dir` should be our git repo, and `source` should be inside of it");
let Some(src) = relative.to_str().map(|s| s.as_bytes().into()) else {
return false;
};
let Some(src) = relative.to_str().map(|s| s.as_bytes().into()) else {
return false;
};
search
.pattern_matching_relative_path(src, Some(source.is_dir()), Case::Fold)
.map_or(true, |rule| rule.pattern.is_negative())
search
.pattern_matching_relative_path(src, Some(source.is_dir()), Case::Fold)
.map_or(true, |rule| rule.pattern.is_negative())
}
inner(source.as_ref(), base_dir.as_ref(), search)
}
#[derive(Debug, Serialize, Deserialize, Clone)]
@ -357,32 +327,19 @@ pub struct IndexerRule {
}
impl IndexerRule {
#[deprecated = "Use `[apply_with_metadata]` instead"]
pub async fn apply(
&self,
source: impl AsRef<Path> + Send,
) -> Result<Vec<(RuleKind, bool)>, IndexerRuleError> {
self.rules
.iter()
.map(|rule| rule.apply(source.as_ref()))
.collect::<Vec<_>>()
.try_join()
.await
}
pub async fn apply_with_metadata(
&self,
source: impl AsRef<Path> + Send,
metadata: &impl MetadataForIndexerRules,
) -> Result<Vec<(RuleKind, bool)>, IndexerRuleError> {
) -> Result<Vec<(RuleKind, bool)>, Error> {
async fn inner(
rules: &[RulePerKind],
source: &Path,
metadata: &impl MetadataForIndexerRules,
) -> Result<Vec<(RuleKind, bool)>, IndexerRuleError> {
) -> Result<Vec<(RuleKind, bool)>, Error> {
rules
.iter()
.map(|rule| rule.apply_with_metadata(source, metadata))
.map(|rule| rule.apply(source, metadata))
.collect::<Vec<_>>()
.try_join()
.await
@ -390,64 +347,79 @@ impl IndexerRule {
inner(&self.rules, source.as_ref(), metadata).await
}
#[deprecated = "Use `[IndexerRuler::apply_all]` instead"]
pub async fn apply_all(
rules: &[Self],
source: impl AsRef<Path> + Send,
) -> Result<HashMap<RuleKind, Vec<bool>>, IndexerRuleError> {
rules
.iter()
.map(|rule| rule.apply(source.as_ref()))
.collect::<Vec<_>>()
.try_join()
.await
.map(|results| {
results.into_iter().flatten().fold(
HashMap::<_, Vec<_>>::with_capacity(RuleKind::variant_count()),
|mut map, (kind, result)| {
map.entry(kind).or_default().push(result);
map
},
)
})
}
}
#[derive(Debug, Clone, Default)]
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum RulerDecision {
Accept,
Reject,
}
#[derive(Debug, Default, Serialize, Deserialize)]
pub struct IndexerRuler {
rules: Arc<RwLock<Vec<IndexerRule>>>,
base: Arc<Vec<IndexerRule>>,
extra: Vec<IndexerRule>,
}
impl Clone for IndexerRuler {
fn clone(&self) -> Self {
Self {
base: Arc::clone(&self.base),
// Each instance of IndexerRules MUST have its own extra rules no clones allowed!
extra: Vec::new(),
}
}
}
impl IndexerRuler {
#[must_use]
pub fn new(rules: Vec<IndexerRule>) -> Self {
Self {
rules: Arc::new(RwLock::new(rules)),
base: Arc::new(rules),
extra: Vec::new(),
}
}
pub async fn serialize(&self) -> Result<Vec<u8>, encode::Error> {
rmp_serde::to_vec_named(&*self.rules.read().await)
}
pub async fn evaluate_path(
&self,
source: impl AsRef<Path> + Send,
metadata: &impl MetadataForIndexerRules,
) -> Result<RulerDecision, Error> {
async fn inner(
this: &IndexerRuler,
source: &Path,
metadata: &impl MetadataForIndexerRules,
) -> Result<RulerDecision, Error> {
Ok(
if IndexerRuler::reject_path(
source,
metadata.is_dir(),
&this.apply_all(source, metadata).await?,
) {
RulerDecision::Reject
} else {
RulerDecision::Accept
},
)
}
pub fn deserialize(data: &[u8]) -> Result<Self, decode::Error> {
rmp_serde::from_slice(data).map(Self::new)
inner(self, source.as_ref(), metadata).await
}
pub async fn apply_all(
&self,
source: impl AsRef<Path> + Send,
metadata: &impl MetadataForIndexerRules,
) -> Result<HashMap<RuleKind, Vec<bool>>, IndexerRuleError> {
) -> Result<HashMap<RuleKind, Vec<bool>>, Error> {
async fn inner(
rules: &[IndexerRule],
base: &[IndexerRule],
extra: &[IndexerRule],
source: &Path,
metadata: &impl MetadataForIndexerRules,
) -> Result<HashMap<RuleKind, Vec<bool>>, IndexerRuleError> {
rules
.iter()
.map(|rule| rule.apply_with_metadata(source, metadata))
) -> Result<HashMap<RuleKind, Vec<bool>>, Error> {
base.iter()
.chain(extra.iter())
.map(|rule| rule.apply(source, metadata))
.collect::<Vec<_>>()
.try_join()
.await
@ -462,24 +434,99 @@ impl IndexerRuler {
})
}
inner(&self.rules.read().await, source.as_ref(), metadata).await
inner(&self.base, &self.extra, source.as_ref(), metadata).await
}
/// Extend the indexer rules with the contents from an iterator of rules
pub async fn extend(&self, iter: impl IntoIterator<Item = IndexerRule> + Send) {
let mut indexer = self.rules.write().await;
indexer.extend(iter);
pub fn extend(&mut self, iter: impl IntoIterator<Item = IndexerRule> + Send) {
self.extra.extend(iter);
}
pub async fn has_system(&self, rule: &SystemIndexerRule) -> bool {
let rules = self.rules.read().await;
#[must_use]
pub fn has_system(&self, rule: &SystemIndexerRule) -> bool {
self.base
.iter()
.chain(self.extra.iter())
.any(|inner_rule| rule == inner_rule)
}
rules.iter().any(|inner_rule| rule == inner_rule)
#[instrument(skip_all, fields(current_path = %current_path.display()))]
fn reject_path(
current_path: &Path,
is_dir: bool,
acceptance_per_rule_kind: &HashMap<RuleKind, Vec<bool>>,
) -> bool {
Self::rejected_by_reject_glob(acceptance_per_rule_kind)
|| Self::rejected_by_git_ignore(acceptance_per_rule_kind)
|| (is_dir && Self::rejected_by_children_directories(acceptance_per_rule_kind))
|| Self::rejected_by_accept_glob(acceptance_per_rule_kind)
}
pub fn rejected_by_accept_glob(
acceptance_per_rule_kind: &HashMap<RuleKind, Vec<bool>>,
) -> bool {
let res = acceptance_per_rule_kind
.get(&RuleKind::AcceptFilesByGlob)
.map_or(false, |accept_rules| {
accept_rules.iter().all(|accept| !accept)
});
if res {
trace!("Reject because it didn't passed in any `RuleKind::AcceptFilesByGlob` rules");
}
res
}
pub fn rejected_by_children_directories(
acceptance_per_rule_kind: &HashMap<RuleKind, Vec<bool>>,
) -> bool {
let res = acceptance_per_rule_kind
.get(&RuleKind::RejectIfChildrenDirectoriesArePresent)
.map_or(false, |reject_results| {
reject_results.iter().any(|reject| !reject)
});
if res {
trace!("Rejected by rule `RuleKind::RejectIfChildrenDirectoriesArePresent`");
}
res
}
pub fn rejected_by_reject_glob(
acceptance_per_rule_kind: &HashMap<RuleKind, Vec<bool>>,
) -> bool {
let res = acceptance_per_rule_kind
.get(&RuleKind::RejectFilesByGlob)
.map_or(false, |reject_results| {
reject_results.iter().any(|reject| !reject)
});
if res {
trace!("Rejected by `RuleKind::RejectFilesByGlob`");
}
res
}
pub fn rejected_by_git_ignore(acceptance_per_rule_kind: &HashMap<RuleKind, Vec<bool>>) -> bool {
let res = acceptance_per_rule_kind
.get(&RuleKind::IgnoredByGit)
.map_or(false, |reject_results| {
reject_results.iter().any(|reject| !reject)
});
if res {
trace!("Rejected by `RuleKind::IgnoredByGit`");
}
res
}
}
impl TryFrom<&indexer_rule::Data> for IndexerRule {
type Error = IndexerRuleError;
type Error = Error;
fn try_from(data: &indexer_rule::Data) -> Result<Self, Self::Error> {
Ok(Self {
@ -497,7 +544,7 @@ impl TryFrom<&indexer_rule::Data> for IndexerRule {
}
impl TryFrom<indexer_rule::Data> for IndexerRule {
type Error = IndexerRuleError;
type Error = Error;
fn try_from(data: indexer_rule::Data) -> Result<Self, Self::Error> {
Self::try_from(&data)
@ -512,140 +559,56 @@ fn reject_by_glob(source: impl AsRef<Path>, reject_glob_set: &GlobSet) -> bool {
!accept_by_glob(source.as_ref(), reject_glob_set)
}
#[deprecated = "Use `[accept_dir_for_its_children_with_metadata]` instead"]
async fn accept_dir_for_its_children(
source: impl AsRef<Path> + Send,
children: &HashSet<String>,
) -> Result<bool, IndexerRuleError> {
let source = source.as_ref();
// FIXME(fogodev): Just check for io::ErrorKind::NotADirectory error instead (feature = "io_error_more", issue = "86442")
if !fs::metadata(source)
.await
.map_err(|e| IndexerRuleError::AcceptByItsChildrenFileIO(FileIOError::from((source, e))))?
.is_dir()
{
return Ok(false);
}
let mut read_dir = fs::read_dir(source)
.await // TODO: Check NotADirectory error here when available
.map_err(|e| IndexerRuleError::AcceptByItsChildrenFileIO(FileIOError::from((source, e))))?;
while let Some(entry) = read_dir
.next_entry()
.await
.map_err(|e| IndexerRuleError::AcceptByItsChildrenFileIO(FileIOError::from((source, e))))?
{
let entry_name = entry
.file_name()
.to_str()
.ok_or_else(|| NonUtf8PathError(entry.path().into()))?
.to_string();
if entry
.metadata()
.await
.map_err(|e| {
IndexerRuleError::AcceptByItsChildrenFileIO(FileIOError::from((source, e)))
})?
.is_dir() && children.contains(&entry_name)
{
return Ok(true);
}
}
Ok(false)
}
async fn accept_dir_for_its_children_with_metadata(
source: impl AsRef<Path> + Send,
metadata: &impl MetadataForIndexerRules,
children: &HashSet<String>,
) -> Result<bool, IndexerRuleError> {
let source = source.as_ref();
// FIXME(fogodev): Just check for io::ErrorKind::NotADirectory error instead (feature = "io_error_more", issue = "86442")
if !metadata.is_dir() {
return Ok(false);
}
let mut read_dir = fs::read_dir(source)
.await // TODO: Check NotADirectory error here when available
.map_err(|e| IndexerRuleError::AcceptByItsChildrenFileIO(FileIOError::from((source, e))))?;
while let Some(entry) = read_dir
.next_entry()
.await
.map_err(|e| IndexerRuleError::AcceptByItsChildrenFileIO(FileIOError::from((source, e))))?
{
let entry_name = entry
.file_name()
.to_str()
.ok_or_else(|| NonUtf8PathError(entry.path().into()))?
.to_string();
if entry
.metadata()
.await
.map_err(|e| {
IndexerRuleError::AcceptByItsChildrenFileIO(FileIOError::from((source, e)))
})?
.is_dir() && children.contains(&entry_name)
{
return Ok(true);
}
}
Ok(false)
}
#[deprecated = "Use `[reject_dir_for_its_children_with_metadata]` instead"]
async fn reject_dir_for_its_children(
source: impl AsRef<Path> + Send,
children: &HashSet<String>,
) -> Result<bool, IndexerRuleError> {
let source = source.as_ref();
// FIXME(fogodev): Just check for io::ErrorKind::NotADirectory error instead (feature = "io_error_more", issue = "86442")
if !fs::metadata(source)
.await
.map_err(|e| IndexerRuleError::AcceptByItsChildrenFileIO(FileIOError::from((source, e))))?
.is_dir()
{
return Ok(true);
}
let mut read_dir = fs::read_dir(source)
.await // TODO: Check NotADirectory error here when available
.map_err(|e| IndexerRuleError::RejectByItsChildrenFileIO(FileIOError::from((source, e))))?;
while let Some(entry) = read_dir
.next_entry()
.await
.map_err(|e| IndexerRuleError::RejectByItsChildrenFileIO(FileIOError::from((source, e))))?
{
if entry
.metadata()
.await
.map_err(|e| {
IndexerRuleError::RejectByItsChildrenFileIO(FileIOError::from((source, e)))
})?
.is_dir() && children.contains(
entry
.file_name()
.to_str()
.ok_or_else(|| NonUtf8PathError(entry.path().into()))?,
) {
) -> Result<bool, Error> {
async fn inner(
source: &Path,
metadata: &impl MetadataForIndexerRules,
children: &HashSet<String>,
) -> Result<bool, Error> {
// FIXME(fogodev): Just check for io::ErrorKind::NotADirectory error instead (feature = "io_error_more", issue = "86442")
if !metadata.is_dir() {
return Ok(false);
}
let mut read_dir = fs::read_dir(source)
.await // TODO: Check NotADirectory error here when available
.map_err(|e| Error::AcceptByItsChildrenFileIO(FileIOError::from((source, e))))?;
while let Some(entry) = read_dir
.next_entry()
.await
.map_err(|e| Error::AcceptByItsChildrenFileIO(FileIOError::from((source, e))))?
{
let entry_name = entry
.file_name()
.to_str()
.ok_or_else(|| NonUtf8PathError(entry.path().into()))?
.to_string();
if entry
.metadata()
.await
.map_err(|e| Error::AcceptByItsChildrenFileIO(FileIOError::from((source, e))))?
.is_dir() && children.contains(&entry_name)
{
return Ok(true);
}
}
Ok(false)
}
Ok(true)
inner(source.as_ref(), metadata, children).await
}
async fn reject_dir_for_its_children_with_metadata(
async fn reject_dir_for_its_children(
source: impl AsRef<Path> + Send,
metadata: &impl MetadataForIndexerRules,
children: &HashSet<String>,
) -> Result<bool, IndexerRuleError> {
) -> Result<bool, Error> {
let source = source.as_ref();
// FIXME(fogodev): Just check for io::ErrorKind::NotADirectory error instead (feature = "io_error_more", issue = "86442")
@ -655,18 +618,16 @@ async fn reject_dir_for_its_children_with_metadata(
let mut read_dir = fs::read_dir(source)
.await // TODO: Check NotADirectory error here when available
.map_err(|e| IndexerRuleError::RejectByItsChildrenFileIO(FileIOError::from((source, e))))?;
.map_err(|e| Error::RejectByItsChildrenFileIO(FileIOError::from((source, e))))?;
while let Some(entry) = read_dir
.next_entry()
.await
.map_err(|e| IndexerRuleError::RejectByItsChildrenFileIO(FileIOError::from((source, e))))?
.map_err(|e| Error::RejectByItsChildrenFileIO(FileIOError::from((source, e))))?
{
if entry
.metadata()
.await
.map_err(|e| {
IndexerRuleError::RejectByItsChildrenFileIO(FileIOError::from((source, e)))
})?
.map_err(|e| Error::RejectByItsChildrenFileIO(FileIOError::from((source, e))))?
.is_dir() && children.contains(
entry
.file_name()
@ -710,9 +671,37 @@ mod tests {
}
}
async fn check_rule(indexer_rule: &IndexerRule, path: impl AsRef<Path> + Send) -> bool {
fn check_rule(indexer_rule: &IndexerRule, path: impl AsRef<Path>) -> bool {
let path = path.as_ref();
indexer_rule
.apply(path)
.rules
.iter()
.map(|rule| match rule {
RulePerKind::AcceptFilesByGlob(_globs, accept_glob_set) => (
RuleKind::AcceptFilesByGlob,
accept_by_glob(path, accept_glob_set),
),
RulePerKind::RejectFilesByGlob(_globs, reject_glob_set) => (
RuleKind::RejectFilesByGlob,
reject_by_glob(path, reject_glob_set),
),
RulePerKind::IgnoredByGit(git_repo, patterns) => (
RuleKind::IgnoredByGit,
accept_by_git_pattern(path, git_repo, patterns),
),
_ => unimplemented!("can't use simple `apply` for this rule: {:?}", rule),
})
.all(|(_kind, res)| res)
}
async fn check_rule_with_metadata(
indexer_rule: &IndexerRule,
path: impl AsRef<Path> + Send,
metadata: &impl MetadataForIndexerRules,
) -> bool {
indexer_rule
.apply(path.as_ref(), metadata)
.await
.unwrap()
.into_iter()
@ -739,12 +728,12 @@ mod tests {
)],
);
assert!(!check_rule(&rule, hidden).await);
assert!(check_rule(&rule, normal).await);
assert!(!check_rule(&rule, hidden_inner_dir).await);
assert!(!check_rule(&rule, hidden_inner_file).await);
assert!(check_rule(&rule, normal_inner_dir).await);
assert!(check_rule(&rule, normal_inner_file).await);
assert!(!check_rule(&rule, hidden));
assert!(check_rule(&rule, normal));
assert!(!check_rule(&rule, hidden_inner_dir));
assert!(!check_rule(&rule, hidden_inner_file));
assert!(check_rule(&rule, normal_inner_dir));
assert!(check_rule(&rule, normal_inner_file));
}
#[tokio::test]
@ -765,9 +754,9 @@ mod tests {
)],
);
assert!(check_rule(&rule, project_file).await);
assert!(!check_rule(&rule, project_build_dir).await);
assert!(!check_rule(&rule, project_build_dir_inner).await);
assert!(check_rule(&rule, project_file));
assert!(!check_rule(&rule, project_build_dir));
assert!(!check_rule(&rule, project_build_dir_inner));
}
#[tokio::test]
@ -795,16 +784,16 @@ mod tests {
)],
);
assert!(!check_rule(&rule, text).await);
assert!(check_rule(&rule, png).await);
assert!(check_rule(&rule, jpg).await);
assert!(check_rule(&rule, jpeg).await);
assert!(!check_rule(&rule, inner_text).await);
assert!(check_rule(&rule, inner_png).await);
assert!(check_rule(&rule, inner_jpg).await);
assert!(check_rule(&rule, inner_jpeg).await);
assert!(!check_rule(&rule, many_inner_dirs_text).await);
assert!(check_rule(&rule, many_inner_dirs_png).await);
assert!(!check_rule(&rule, text));
assert!(check_rule(&rule, png));
assert!(check_rule(&rule, jpg));
assert!(check_rule(&rule, jpeg));
assert!(!check_rule(&rule, inner_text));
assert!(check_rule(&rule, inner_png));
assert!(check_rule(&rule, inner_jpg));
assert!(check_rule(&rule, inner_jpeg));
assert!(!check_rule(&rule, many_inner_dirs_text));
assert!(check_rule(&rule, many_inner_dirs_png));
}
#[tokio::test]
@ -833,9 +822,22 @@ mod tests {
)],
);
assert!(check_rule(&rule, project1).await);
assert!(check_rule(&rule, project2).await);
assert!(!check_rule(&rule, not_project).await);
assert!(
!check_rule_with_metadata(&rule, &project1, &fs::metadata(&project1).await.unwrap())
.await
);
assert!(
!check_rule_with_metadata(&rule, &project2, &fs::metadata(&project2).await.unwrap())
.await
);
assert!(
check_rule_with_metadata(
&rule,
&not_project,
&fs::metadata(&not_project).await.unwrap()
)
.await
);
}
#[tokio::test]
@ -864,9 +866,22 @@ mod tests {
)],
);
assert!(!check_rule(&rule, project1).await);
assert!(!check_rule(&rule, project2).await);
assert!(check_rule(&rule, not_project).await);
assert!(
!check_rule_with_metadata(&rule, &project1, &fs::metadata(&project1).await.unwrap())
.await
);
assert!(
!check_rule_with_metadata(&rule, &project2, &fs::metadata(&project2).await.unwrap())
.await
);
assert!(
check_rule_with_metadata(
&rule,
&not_project,
&fs::metadata(&not_project).await.unwrap()
)
.await
);
}
impl PartialEq for RulePerKind {

View file

@ -1,25 +1,24 @@
use std::path::{Path, PathBuf};
use futures_concurrency::future::Join;
use gix_ignore::{glob::search::pattern::List, search::Ignore, Search};
use sd_prisma::prisma::{indexer_rule, PrismaClient};
use std::path::{Path, PathBuf};
use chrono::Utc;
use thiserror::Error;
use futures_concurrency::future::Join;
use gix_ignore::{glob::search::pattern::List, search::Ignore, Search};
use once_cell::sync::Lazy;
use tokio::fs;
use uuid::Uuid;
use super::{IndexerRule, IndexerRuleError, RulePerKind};
use once_cell::sync::Lazy;
use super::{Error, IndexerRule, RulePerKind};
#[derive(Error, Debug)]
#[derive(thiserror::Error, Debug)]
pub enum SeederError {
#[error("Failed to run indexer rules seeder: {0}")]
IndexerRules(#[from] IndexerRuleError),
IndexerRules(#[from] Error),
#[error("An error occurred with the database while applying migrations: {0}")]
DatabaseError(#[from] prisma_client_rust::QueryError),
#[error("Failed to parse indexer rules based on external system")]
InhirentedExternalRules,
InheritedExternalRules,
}
#[derive(Debug)]
@ -29,7 +28,7 @@ pub struct GitIgnoreRules {
impl GitIgnoreRules {
pub async fn get_rules_if_in_git_repo(
library_root: &Path,
location_root: &Path,
current: &Path,
) -> Option<Result<Self, SeederError>> {
let mut git_repo = None;
@ -38,7 +37,7 @@ impl GitIgnoreRules {
for ancestor in current
.ancestors()
.take_while(|&path| path.starts_with(library_root))
.take_while(|&path| path.starts_with(location_root))
{
let git_ignore = ancestor.join(".gitignore");
@ -54,13 +53,16 @@ impl GitIgnoreRules {
}
let git_repo = git_repo?;
Some(Self::parse_gitrepo(git_repo, ignores).await)
Some(Self::parse_git_repo(git_repo, ignores).await)
}
async fn parse_gitrepo(git_repo: &Path, gitignores: Vec<PathBuf>) -> Result<Self, SeederError> {
async fn parse_git_repo(
git_repo: &Path,
git_ignores: Vec<PathBuf>,
) -> Result<Self, SeederError> {
let mut search = Search::default();
let gitignores = gitignores
let git_ignores = git_ignores
.into_iter()
.map(Self::parse_git_ignore)
.collect::<Vec<_>>()
@ -68,7 +70,7 @@ impl GitIgnoreRules {
.await;
search
.patterns
.extend(gitignores.into_iter().filter_map(Result::ok));
.extend(git_ignores.into_iter().filter_map(Result::ok));
let git_exclude_rules = Self::parse_git_exclude(git_repo.join(".git")).await;
if let Ok(rules) = git_exclude_rules {
@ -86,11 +88,11 @@ impl GitIgnoreRules {
if let Ok(Some(patterns)) = List::from_file(gitignore, None, true, &mut buf) {
Ok(patterns)
} else {
Err(SeederError::InhirentedExternalRules)
Err(SeederError::InheritedExternalRules)
}
})
.await
.map_err(|_| SeederError::InhirentedExternalRules)?
.map_err(|_| SeederError::InheritedExternalRules)?
}
async fn parse_git_exclude(dot_git: PathBuf) -> Result<Vec<List<Ignore>>, SeederError> {
@ -98,10 +100,10 @@ impl GitIgnoreRules {
let mut buf = Vec::new();
Search::from_git_dir(dot_git.as_ref(), None, &mut buf)
.map(|search| search.patterns)
.map_err(|_| SeederError::InhirentedExternalRules)
.map_err(|_| SeederError::InheritedExternalRules)
})
.await
.map_err(|_| SeederError::InhirentedExternalRules)?
.map_err(|_| SeederError::InheritedExternalRules)?
}
async fn is_git_repo(path: &Path) -> bool {
@ -179,8 +181,8 @@ pub async fn new_or_existing_library(db: &PrismaClient) -> Result<(), SeederErro
.into_iter()
.enumerate()
{
let pub_id = sd_utils::uuid_to_bytes(Uuid::from_u128(i as u128));
let rules = rmp_serde::to_vec_named(&rule.rules).map_err(IndexerRuleError::from)?;
let pub_id = sd_utils::uuid_to_bytes(&Uuid::from_u128(i as u128));
let rules = rmp_serde::to_vec_named(&rule.rules).map_err(Error::from)?;
let data = vec![
name::set(Some(rule.name.to_string())),

View file

@ -9,7 +9,10 @@ edition = { workspace = true }
[dependencies]
# Spacedrive Sub-crates
sd-prisma = { path = "../../../crates/prisma" }
sd-utils = { path = "../../../crates/utils" }
# Workspace dependencies
prisma-client-rust = { workspace = true }
serde = { workspace = true }
serde = { workspace = true, features = ["derive"] }
specta = { workspace = true }
uuid = { workspace = true, features = ["v4", "serde"] }

View file

@ -29,8 +29,16 @@
#![allow(clippy::missing_errors_doc, clippy::module_name_repetitions)]
use sd_prisma::prisma::{file_path, job, label, location, object};
use sd_utils::{from_bytes_to_uuid, uuid_to_bytes};
use std::{borrow::Cow, fmt};
use serde::{Deserialize, Serialize};
use specta::Type;
use uuid::Uuid;
// File Path selectables!
file_path::select!(file_path_id { id });
file_path::select!(file_path_pub_id { pub_id });
file_path::select!(file_path_pub_and_cas_ids { id pub_id cas_id });
file_path::select!(file_path_just_pub_id_materialized_path {
@ -62,7 +70,10 @@ file_path::select!(file_path_for_media_processor {
name
extension
cas_id
object_id
object: select {
id
pub_id
}
});
file_path::select!(file_path_to_isolate {
location_id
@ -137,6 +148,11 @@ file_path::select!(file_path_to_full_path {
path
}
});
file_path::select!(file_path_to_create_object {
id
pub_id
date_created
});
// File Path includes!
file_path::include!(file_path_with_object { object });
@ -157,6 +173,7 @@ file_path::include!(file_path_for_frontend {
});
// Object selectables!
object::select!(object_ids { id pub_id });
object::select!(object_for_file_identifier {
pub_id
file_paths: select { pub_id cas_id extension is_dir materialized_path name }
@ -222,6 +239,14 @@ job::select!(job_without_data {
date_estimated_completion
});
// Location selectables!
location::select!(location_ids_and_path {
id
pub_id
instance_id
path
});
// Location includes!
location::include!(location_with_indexer_rules {
indexer_rules: select { indexer_rule }
@ -284,3 +309,220 @@ label::include!((take: i64) => label_with_objects {
}
}
});
#[derive(Debug, Serialize, Deserialize, Hash, PartialEq, Eq, Type)]
#[serde(transparent)]
pub struct CasId<'cas_id>(Cow<'cas_id, str>);
impl Clone for CasId<'_> {
fn clone(&self) -> CasId<'static> {
CasId(Cow::Owned(self.0.clone().into_owned()))
}
}
impl<'cas_id> CasId<'cas_id> {
#[must_use]
pub fn as_str(&self) -> &str {
self.0.as_ref()
}
#[must_use]
pub fn to_owned(&self) -> CasId<'static> {
CasId(Cow::Owned(self.0.clone().into_owned()))
}
#[must_use]
pub fn into_owned(self) -> CasId<'static> {
CasId(Cow::Owned(self.0.clone().into_owned()))
}
}
impl From<&CasId<'_>> for file_path::cas_id::Type {
fn from(CasId(cas_id): &CasId<'_>) -> Self {
Some(cas_id.clone().into_owned())
}
}
impl<'cas_id> From<&'cas_id str> for CasId<'cas_id> {
fn from(cas_id: &'cas_id str) -> Self {
Self(Cow::Borrowed(cas_id))
}
}
impl<'cas_id> From<&'cas_id String> for CasId<'cas_id> {
fn from(cas_id: &'cas_id String) -> Self {
Self(Cow::Borrowed(cas_id))
}
}
impl From<String> for CasId<'static> {
fn from(cas_id: String) -> Self {
Self(cas_id.into())
}
}
impl From<CasId<'_>> for String {
fn from(CasId(cas_id): CasId<'_>) -> Self {
cas_id.into_owned()
}
}
impl From<&CasId<'_>> for String {
fn from(CasId(cas_id): &CasId<'_>) -> Self {
cas_id.clone().into_owned()
}
}
#[derive(Debug, Serialize, Deserialize, Hash, PartialEq, Eq, Clone)]
#[serde(transparent)]
#[repr(transparent)]
pub struct FilePathPubId(PubId);
#[derive(Debug, Serialize, Deserialize, Hash, PartialEq, Eq, Clone)]
#[serde(transparent)]
#[repr(transparent)]
pub struct ObjectPubId(PubId);
#[derive(Debug, Serialize, Deserialize, Hash, PartialEq, Eq, Clone)]
enum PubId {
Uuid(Uuid),
Vec(Vec<u8>),
}
impl PubId {
fn new() -> Self {
Self::Uuid(Uuid::new_v4())
}
fn to_db(&self) -> Vec<u8> {
match self {
Self::Uuid(uuid) => uuid_to_bytes(uuid),
Self::Vec(bytes) => bytes.clone(),
}
}
}
impl Default for PubId {
fn default() -> Self {
Self::new()
}
}
impl From<Uuid> for PubId {
fn from(uuid: Uuid) -> Self {
Self::Uuid(uuid)
}
}
impl From<Vec<u8>> for PubId {
fn from(bytes: Vec<u8>) -> Self {
Self::Vec(bytes)
}
}
impl From<&Vec<u8>> for PubId {
fn from(bytes: &Vec<u8>) -> Self {
Self::Vec(bytes.clone())
}
}
impl From<&[u8]> for PubId {
fn from(bytes: &[u8]) -> Self {
Self::Vec(bytes.to_vec())
}
}
impl From<PubId> for Vec<u8> {
fn from(pub_id: PubId) -> Self {
match pub_id {
PubId::Uuid(uuid) => uuid_to_bytes(&uuid),
PubId::Vec(bytes) => bytes,
}
}
}
impl From<PubId> for Uuid {
fn from(pub_id: PubId) -> Self {
match pub_id {
PubId::Uuid(uuid) => uuid,
PubId::Vec(bytes) => from_bytes_to_uuid(&bytes),
}
}
}
impl fmt::Display for PubId {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::Uuid(uuid) => write!(f, "{uuid}"),
Self::Vec(bytes) => write!(f, "{}", from_bytes_to_uuid(bytes)),
}
}
}
macro_rules! delegate_pub_id {
($($type_name:ty),+ $(,)?) => {
$(
impl From<::uuid::Uuid> for $type_name {
fn from(uuid: ::uuid::Uuid) -> Self {
Self(uuid.into())
}
}
impl From<Vec<u8>> for $type_name {
fn from(bytes: Vec<u8>) -> Self {
Self(bytes.into())
}
}
impl From<&Vec<u8>> for $type_name {
fn from(bytes: &Vec<u8>) -> Self {
Self(bytes.into())
}
}
impl From<&[u8]> for $type_name {
fn from(bytes: &[u8]) -> Self {
Self(bytes.into())
}
}
impl From<$type_name> for Vec<u8> {
fn from(pub_id: $type_name) -> Self {
pub_id.0.into()
}
}
impl From<$type_name> for ::uuid::Uuid {
fn from(pub_id: $type_name) -> Self {
pub_id.0.into()
}
}
impl ::std::fmt::Display for $type_name {
fn fmt(&self, f: &mut ::std::fmt::Formatter<'_>) -> ::std::fmt::Result {
write!(f, "{}", self.0)
}
}
impl $type_name {
#[must_use]
pub fn new() -> Self {
Self(PubId::new())
}
#[must_use]
pub fn to_db(&self) -> Vec<u8> {
self.0.to_db()
}
}
impl Default for $type_name {
fn default() -> Self {
Self::new()
}
}
)+
};
}
delegate_pub_id!(FilePathPubId, ObjectPubId);

View file

@ -114,10 +114,10 @@ impl Actor {
}
State::Ingesting(event) => {
debug!(
"ingesting {} operations: {} to {}",
event.messages.len(),
event.messages.first().unwrap().3.timestamp.as_u64(),
event.messages.last().unwrap().3.timestamp.as_u64(),
messages_count = event.messages.len(),
first_message = event.messages.first().unwrap().3.timestamp.as_u64(),
last_message = event.messages.last().unwrap().3.timestamp.as_u64(),
"Ingesting operations;",
);
for (instance, data) in event.messages.0 {

View file

@ -175,7 +175,7 @@ impl Manager {
.crdt_operation()
.find_many(vec![
crdt_operation::instance::is(vec![instance::pub_id::equals(uuid_to_bytes(
instance_uuid,
&instance_uuid,
))]),
crdt_operation::timestamp::gt(timestamp.as_u64() as i64),
])
@ -204,7 +204,7 @@ impl Manager {
.map(|(instance_id, timestamp)| {
prisma_client_rust::and![
$op::instance::is(vec![instance::pub_id::equals(uuid_to_bytes(
*instance_id
instance_id
))]),
$op::timestamp::gt(timestamp.as_u64() as i64)
]
@ -216,7 +216,7 @@ impl Manager {
.clocks
.iter()
.map(|(instance_id, _)| {
uuid_to_bytes(*instance_id)
uuid_to_bytes(instance_id)
})
.collect()
)
@ -263,7 +263,7 @@ impl Manager {
.map(|(instance_id, timestamp)| {
prisma_client_rust::and![
$op::instance::is(vec![instance::pub_id::equals(uuid_to_bytes(
*instance_id
instance_id
))]),
$op::timestamp::gt(timestamp.as_u64() as i64)
]
@ -275,7 +275,7 @@ impl Manager {
.clocks
.iter()
.map(|(instance_id, _)| {
uuid_to_bytes(*instance_id)
uuid_to_bytes(instance_id)
})
.collect()
)

View file

@ -30,11 +30,11 @@ async fn write_test_location(
(
instance.sync.shared_create(
prisma_sync::location::SyncId {
pub_id: uuid_to_bytes(id),
pub_id: uuid_to_bytes(&id),
},
sync_ops,
),
instance.db.location().create(uuid_to_bytes(id), db_ops),
instance.db.location().create(uuid_to_bytes(&id), db_ops),
)
})
.await?)

View file

@ -36,7 +36,7 @@ impl Instance {
db.instance()
.create(
uuid_to_bytes(id),
uuid_to_bytes(&id),
vec![],
vec![],
Utc::now().into(),
@ -73,7 +73,7 @@ impl Instance {
left.db
.instance()
.create(
uuid_to_bytes(right.id),
uuid_to_bytes(&right.id),
vec![],
vec![],
Utc::now().into(),

View file

@ -150,15 +150,19 @@ async fn start_backup(node: Arc<Node>, library: Arc<Library>) -> Uuid {
match do_backup(bkp_id, &node, &library).await {
Ok(path) => {
info!(
"Backup '{bkp_id}' for library '{}' created at '{path:?}'!",
library.id
backup_id = %bkp_id,
library_id = %library.id,
path = %path.display(),
"Backup created!;",
);
invalidate_query!(library, "backups.getAll");
}
Err(e) => {
error!(
"Error with backup '{bkp_id}' for library '{}': {e:?}",
library.id
backup_id = %bkp_id,
library_id = %library.id,
?e,
"Error with backup for library;",
);
// TODO: Alert user something went wrong
@ -282,10 +286,10 @@ async fn do_backup(id: Uuid, node: &Node, library: &Library) -> Result<PathBuf,
async fn start_restore(node: Arc<Node>, path: PathBuf) {
match restore_backup(&node, &path).await {
Ok(Header { id, library_id, .. }) => {
info!("Restored to '{id}' for library '{library_id}'!",);
info!(%id, %library_id, "Restored backup for library!");
}
Err(e) => {
error!("Error restoring backup '{}': {e:#?}", path.display());
error!(path = %path.display(), ?e, "Error restoring backup;");
// TODO: Alert user something went wrong
}

View file

@ -155,9 +155,9 @@ mod library {
&library.db,
&library.sync,
&node.libraries,
instance.uuid,
&instance.uuid,
instance.identity,
instance.node_id,
&instance.node_id,
RemoteIdentity::from_str(&instance.node_remote_identity)
.expect("malformed remote identity in the DB"),
instance.metadata,
@ -304,8 +304,8 @@ mod locations {
.body(ByteStream::from_body_0_4(Full::from("Hello, world!")))
.send()
.await
.map_err(|err| {
tracing::error!("S3 error: {err:?}");
.map_err(|e| {
tracing::error!(?e, "S3 error;");
rspc::Error::new(
rspc::ErrorCode::InternalServerError,
"Failed to upload to S3".to_string(),

View file

@ -7,11 +7,13 @@ use crate::{
library::Library,
object::{
fs::{error::FileSystemJobsError, find_available_filename_for_duplicate},
media::exif_metadata_extractor::{can_extract_exif_data_for_image, extract_exif_data},
// media::exif_metadata_extractor::{can_extract_exif_data_for_image, extract_exif_data},
},
};
use sd_core_file_path_helper::IsolatedFilePathData;
use sd_core_heavy_lifting::media_processor::exif_media_data;
use sd_file_ext::{
extensions::{Extension, ImageExtension},
kind::ObjectKind,
@ -64,18 +66,18 @@ pub(crate) fn mount() -> AlphaRouter<Ctx> {
};
let image_extension = ImageExtension::from_str(extension).map_err(|e| {
error!("Failed to parse image extension: {e:#?}");
error!(?e, "Failed to parse image extension;");
rspc::Error::new(
ErrorCode::BadRequest,
"Invalid image extension".to_string(),
)
})?;
if !can_extract_exif_data_for_image(&image_extension) {
if !exif_media_data::can_extract(image_extension) {
return Ok(None);
}
let exif_data = extract_exif_data(full_path)
let exif_data = exif_media_data::extract(full_path)
.await
.map_err(|e| {
rspc::Error::with_cause(
@ -91,7 +93,7 @@ pub(crate) fn mount() -> AlphaRouter<Ctx> {
Some(v) if v == ObjectKind::Audio || v == ObjectKind::Video => {
let ffmpeg_data = MediaData::FFmpeg(
FFmpegMetadata::from_path(full_path).await.map_err(|e| {
error!("{e:#?}");
error!(?e, "Failed to extract ffmpeg metadata;");
rspc::Error::with_cause(
ErrorCode::InternalServerError,
e.to_string(),
@ -206,14 +208,15 @@ pub(crate) fn mount() -> AlphaRouter<Ctx> {
))
})?;
Ok(())
Ok::<_, rspc::Error>(())
}
Err(e) if e.kind() == io::ErrorKind::NotFound => Ok(()),
Err(e) => Err(FileIOError::from((
path,
e,
"Failed to get file metadata for deletion",
))),
))
.into()),
}
})
.collect::<Vec<_>>()
@ -384,9 +387,10 @@ pub(crate) fn mount() -> AlphaRouter<Ctx> {
fs::rename(&old_path, &new_path).await.map_err(|e| {
error!(
"Failed to rename file from: '{}' to: '{}'; Error: {e:#?}",
old_path.display(),
new_path.display()
old_path = %old_path.display(),
new_path = %new_path.display(),
?e,
"Failed to rename file;",
);
let e = FileIOError::from((old_path, e, "Failed to rename file"));
rspc::Error::with_cause(ErrorCode::Conflict, e.to_string(), e)
@ -493,7 +497,7 @@ impl EphemeralFileSystemOps {
let target = target_dir.join(name);
Some((source, target))
} else {
warn!("Skipping file with no name: '{}'", source.display());
warn!(source = %source.display(), "Skipping file with no name;");
None
}
})
@ -615,7 +619,7 @@ impl EphemeralFileSystemOps {
let target = target_dir.join(name);
Some((source, target))
} else {
warn!("Skipping file with no name: '{}'", source.display());
warn!(source = %source.display(), "Skipping file with no name;");
None
}
})

View file

@ -9,12 +9,13 @@ use crate::{
old_copy::OldFileCopierJobInit, old_cut::OldFileCutterJobInit,
old_delete::OldFileDeleterJobInit, old_erase::OldFileEraserJobInit,
},
media::{exif_media_data_from_prisma_data, ffmpeg_data_from_prisma_data},
// media::{exif_media_data_from_prisma_data, ffmpeg_data_from_prisma_data},
},
old_job::Job,
old_job::OldJob,
};
use sd_core_file_path_helper::{FilePathError, IsolatedFilePathData};
use sd_core_heavy_lifting::media_processor::{exif_media_data, ffmpeg_media_data};
use sd_core_prisma_helpers::{
file_path_to_isolate, file_path_to_isolate_with_id, object_with_file_paths,
object_with_media_data,
@ -127,13 +128,13 @@ pub(crate) fn mount() -> AlphaRouter<Ctx> {
.and_then(|obj| {
Some(match obj.kind {
Some(v) if v == ObjectKind::Image as i32 => MediaData::Exif(
exif_media_data_from_prisma_data(obj.exif_data?),
exif_media_data::from_prisma_data(obj.exif_data?),
),
Some(v)
if v == ObjectKind::Audio as i32
|| v == ObjectKind::Video as i32 =>
{
MediaData::FFmpeg(ffmpeg_data_from_prisma_data(
MediaData::FFmpeg(ffmpeg_media_data::from_prisma_data(
obj.ffmpeg_data?,
))
}
@ -476,8 +477,8 @@ pub(crate) fn mount() -> AlphaRouter<Ctx> {
Ok(()) => Ok(()),
Err(e) if e.kind() == io::ErrorKind::NotFound => {
warn!(
"File not found in the file system, will remove from database: {}",
full_path.display()
path = %full_path.display(),
"File not found in the file system, will remove from database;",
);
library
.db
@ -495,7 +496,7 @@ pub(crate) fn mount() -> AlphaRouter<Ctx> {
}
}
}
_ => Job::new(args)
_ => OldJob::new(args)
.spawn(&node, &library)
.await
.map_err(Into::into),
@ -560,7 +561,7 @@ pub(crate) fn mount() -> AlphaRouter<Ctx> {
Ok(())
}
_ => Job::new(args)
_ => OldJob::new(args)
.spawn(&node, &library)
.await
.map_err(Into::into),
@ -642,10 +643,11 @@ pub(crate) fn mount() -> AlphaRouter<Ctx> {
})
.await
.map_err(|e| {
error!("{e:#?}");
rspc::Error::new(
error!(?e, "Failed to convert image;");
rspc::Error::with_cause(
ErrorCode::InternalServerError,
"Had an internal problem converting image".to_string(),
e,
)
})??;
@ -706,7 +708,7 @@ pub(crate) fn mount() -> AlphaRouter<Ctx> {
.procedure("eraseFiles", {
R.with2(library())
.mutation(|(node, library), args: OldFileEraserJobInit| async move {
Job::new(args)
OldJob::new(args)
.spawn(&node, &library)
.await
.map_err(Into::into)
@ -715,7 +717,7 @@ pub(crate) fn mount() -> AlphaRouter<Ctx> {
.procedure("copyFiles", {
R.with2(library())
.mutation(|(node, library), args: OldFileCopierJobInit| async move {
Job::new(args)
OldJob::new(args)
.spawn(&node, &library)
.await
.map_err(Into::into)
@ -724,7 +726,7 @@ pub(crate) fn mount() -> AlphaRouter<Ctx> {
.procedure("cutFiles", {
R.with2(library())
.mutation(|(node, library), args: OldFileCutterJobInit| async move {
Job::new(args)
OldJob::new(args)
.spawn(&node, &library)
.await
.map_err(Into::into)
@ -878,10 +880,11 @@ pub(crate) fn mount() -> AlphaRouter<Ctx> {
} else {
fs::rename(&from, &to).await.map_err(|e| {
error!(
"Failed to rename file from: '{}' to: '{}'; Error: {e:#?}",
from.display(),
to.display()
);
from = %from.display(),
to = %to.display(),
?e,
"Failed to rename file;",
);
rspc::Error::with_cause(
ErrorCode::Conflict,
"Failed to rename file".to_string(),

View file

@ -1,21 +1,22 @@
use crate::{
context::NodeContext,
invalidate_query,
location::{find_location, LocationError},
object::{
media::OldMediaProcessorJobInit,
old_file_identifier::old_file_identifier_job::OldFileIdentifierJobInit,
validation::old_validator_job::OldObjectValidatorJobInit,
},
old_job::{Job, JobReport, JobStatus, OldJobs},
object::validation::old_validator_job::OldObjectValidatorJobInit,
old_job::{JobStatus, OldJob, OldJobReport},
};
use sd_core_prisma_helpers::job_without_data;
use sd_core_heavy_lifting::{
file_identifier::FileIdentifier, job_system::report, media_processor::job::MediaProcessor,
JobId, JobSystemError, Report,
};
use sd_prisma::prisma::{job, location, SortOrder};
use std::{
collections::{hash_map::Entry, BTreeMap, HashMap, VecDeque},
path::PathBuf,
sync::Arc,
time::Instant,
};
@ -30,6 +31,8 @@ use uuid::Uuid;
use super::{utils::library, CoreEvent, Ctx, R};
const TEN_MINUTES: Duration = Duration::from_secs(60 * 10);
pub(crate) fn mount() -> AlphaRouter<Ctx> {
R.router()
.procedure("progress", {
@ -41,7 +44,7 @@ pub(crate) fn mount() -> AlphaRouter<Ctx> {
.subscription(|(node, _), _: ()| async move {
let mut event_bus_rx = node.event_bus.0.subscribe();
// debounce per-job
let mut intervals = BTreeMap::<Uuid, Instant>::new();
let mut intervals = BTreeMap::<JobId, Instant>::new();
async_stream::stream! {
loop {
@ -62,6 +65,9 @@ pub(crate) fn mount() -> AlphaRouter<Ctx> {
yield progress_event;
*instant = Instant::now();
// remove stale jobs that didn't receive a progress for more than 10 minutes
intervals.retain(|_, instant| instant.elapsed() < TEN_MINUTES);
}
}
})
@ -73,44 +79,53 @@ pub(crate) fn mount() -> AlphaRouter<Ctx> {
// this is to ensure the client will always get the correct initial state
// - jobs are sorted in to groups by their action
// - TODO: refactor grouping system to a many-to-many table
#[derive(Debug, Clone, Serialize, Deserialize, Type)]
#[derive(Debug, Clone, Serialize, Type)]
pub struct JobGroup {
id: Uuid,
id: JobId,
running_job_id: Option<JobId>,
action: Option<String>,
status: JobStatus,
status: report::Status,
created_at: DateTime<Utc>,
jobs: VecDeque<JobReport>,
jobs: VecDeque<Report>,
}
R.with2(library())
.query(|(node, library), _: ()| async move {
let mut groups: HashMap<String, JobGroup> = HashMap::new();
let job_reports: Vec<JobReport> = library
let job_reports: Vec<Report> = library
.db
.job()
.find_many(vec![])
.order_by(job::date_created::order(SortOrder::Desc))
.take(100)
.select(job_without_data::select())
.exec()
.await?
.into_iter()
.flat_map(JobReport::try_from)
.flat_map(|job| {
if let Ok(report) = Report::try_from(job.clone()) {
Some(report)
} else {
// TODO(fogodev): this is a temporary fix for the old job system
OldJobReport::try_from(job).map(Into::into).ok()
}
})
.collect();
let active_reports_by_id = node.old_jobs.get_active_reports_with_id().await;
let mut active_reports_by_id = node.job_system.get_active_reports().await;
active_reports_by_id.extend(
node.old_jobs
.get_active_reports_with_id()
.await
.into_iter()
.map(|(id, old_report)| (id, old_report.into())),
);
for job in job_reports {
// action name and group key are computed from the job data
let (action_name, group_key) = job.get_meta();
let (action_name, group_key) = job.get_action_name_and_group_key();
trace!(
"job {:#?}, action_name {}, group_key {:?}",
job,
action_name,
group_key
);
trace!(?job, %action_name, ?group_key);
// if the job is running, use the in-memory report
let report = active_reports_by_id.get(&job.id).unwrap_or(&job);
@ -122,7 +137,10 @@ pub(crate) fn mount() -> AlphaRouter<Ctx> {
Entry::Vacant(entry) => {
entry.insert(JobGroup {
id: job.parent_id.unwrap_or(job.id),
action: Some(action_name.clone()),
running_job_id: (job.status == report::Status::Running
|| job.status == report::Status::Paused)
.then_some(job.id),
action: Some(action_name),
status: job.status,
jobs: [report.clone()].into_iter().collect(),
created_at: job.created_at.unwrap_or(Utc::now()),
@ -132,8 +150,10 @@ pub(crate) fn mount() -> AlphaRouter<Ctx> {
Entry::Occupied(mut entry) => {
let group = entry.get_mut();
// protect paused status from being overwritten
if report.status != JobStatus::Paused {
if report.status == report::Status::Running
|| report.status == report::Status::Paused
{
group.running_job_id = Some(report.id);
group.status = report.status;
}
@ -146,6 +166,7 @@ pub(crate) fn mount() -> AlphaRouter<Ctx> {
job.id.to_string(),
JobGroup {
id: job.id,
running_job_id: Some(job.id),
action: None,
status: job.status,
jobs: [report.clone()].into_iter().collect(),
@ -164,7 +185,14 @@ pub(crate) fn mount() -> AlphaRouter<Ctx> {
.procedure("isActive", {
R.with2(library())
.query(|(node, library), _: ()| async move {
Ok(node.old_jobs.has_active_workers(library.id).await)
let library_id = library.id;
Ok(node
.job_system
.has_active_jobs(NodeContext {
node: Arc::clone(&node),
library,
})
.await || node.old_jobs.has_active_workers(library_id).await)
})
})
.procedure("clear", {
@ -204,30 +232,56 @@ pub(crate) fn mount() -> AlphaRouter<Ctx> {
// pause job
.procedure("pause", {
R.with2(library())
.mutation(|(node, library), id: Uuid| async move {
let ret = OldJobs::pause(&node.old_jobs, id).await.map_err(Into::into);
.mutation(|(node, library), job_id: JobId| async move {
if let Err(e) = node.job_system.pause(job_id).await {
if matches!(e, JobSystemError::NotFound(_)) {
// If the job is not found, it can be a job from the old job system
node.old_jobs.pause(job_id).await?;
} else {
return Err(e.into());
}
}
invalidate_query!(library, "jobs.isActive");
invalidate_query!(library, "jobs.reports");
ret
Ok(())
})
})
.procedure("resume", {
R.with2(library())
.mutation(|(node, library), id: Uuid| async move {
let ret = OldJobs::resume(&node.old_jobs, id)
.await
.map_err(Into::into);
.mutation(|(node, library), job_id: JobId| async move {
if let Err(e) = node.job_system.resume(job_id).await {
if matches!(e, JobSystemError::NotFound(_)) {
// If the job is not found, it can be a job from the old job system
node.old_jobs.resume(job_id).await?;
} else {
return Err(e.into());
}
}
invalidate_query!(library, "jobs.isActive");
invalidate_query!(library, "jobs.reports");
ret
Ok(())
})
})
.procedure("cancel", {
R.with2(library())
.mutation(|(node, library), id: Uuid| async move {
let ret = OldJobs::cancel(&node.old_jobs, id)
.await
.map_err(Into::into);
.mutation(|(node, library), job_id: JobId| async move {
if let Err(e) = node.job_system.cancel(job_id).await {
if matches!(e, JobSystemError::NotFound(_)) {
// If the job is not found, it can be a job from the old job system
node.old_jobs.cancel(job_id).await?;
} else {
return Err(e.into());
}
}
invalidate_query!(library, "jobs.isActive");
invalidate_query!(library, "jobs.reports");
ret
Ok(())
})
})
.procedure("generateThumbsForLocation", {
@ -250,50 +304,50 @@ pub(crate) fn mount() -> AlphaRouter<Ctx> {
return Err(LocationError::IdNotFound(id).into());
};
Job::new(OldMediaProcessorJobInit {
location,
sub_path: Some(path),
regenerate_thumbnails: regenerate,
regenerate_labels: false,
})
.spawn(&node, &library)
.await
.map_err(Into::into)
},
)
})
.procedure("generateLabelsForLocation", {
#[derive(Type, Deserialize)]
pub struct GenerateLabelsForLocationArgs {
pub id: location::id::Type,
pub path: PathBuf,
#[serde(default)]
pub regenerate: bool,
}
R.with2(library()).mutation(
|(node, library),
GenerateLabelsForLocationArgs {
id,
path,
regenerate,
}: GenerateLabelsForLocationArgs| async move {
let Some(location) = find_location(&library, id).exec().await? else {
return Err(LocationError::IdNotFound(id).into());
};
Job::new(OldMediaProcessorJobInit {
location,
sub_path: Some(path),
regenerate_thumbnails: false,
regenerate_labels: regenerate,
})
.spawn(&node, &library)
.await
.map_err(Into::into)
node.job_system
.dispatch(
MediaProcessor::new(location, Some(path), regenerate)?,
id,
NodeContext {
node: Arc::clone(&node),
library,
},
)
.await
.map_err(Into::into)
},
)
})
// .procedure("generateLabelsForLocation", {
// #[derive(Type, Deserialize)]
// pub struct GenerateLabelsForLocationArgs {
// pub id: location::id::Type,
// pub path: PathBuf,
// #[serde(default)]
// pub regenerate: bool,
// }
// R.with2(library()).mutation(
// |(node, library),
// GenerateLabelsForLocationArgs {
// id,
// path,
// regenerate,
// }: GenerateLabelsForLocationArgs| async move {
// let Some(location) = find_location(&library, id).exec().await? else {
// return Err(LocationError::IdNotFound(id).into());
// };
// OldJob::new(OldMediaProcessorJobInit {
// location,
// sub_path: Some(path),
// regenerate_thumbnails: false,
// regenerate_labels: regenerate,
// })
// .spawn(&node, &library)
// .await
// .map_err(Into::into)
// },
// )
// })
.procedure("objectValidator", {
#[derive(Type, Deserialize)]
pub struct ObjectValidatorArgs {
@ -307,7 +361,7 @@ pub(crate) fn mount() -> AlphaRouter<Ctx> {
return Err(LocationError::IdNotFound(args.id).into());
};
Job::new(OldObjectValidatorJobInit {
OldJob::new(OldObjectValidatorJobInit {
location,
sub_path: Some(args.path),
})
@ -324,18 +378,22 @@ pub(crate) fn mount() -> AlphaRouter<Ctx> {
}
R.with2(library()).mutation(
|(node, library), args: IdentifyUniqueFilesArgs| async move {
let Some(location) = find_location(&library, args.id).exec().await? else {
return Err(LocationError::IdNotFound(args.id).into());
|(node, library), IdentifyUniqueFilesArgs { id, path }: IdentifyUniqueFilesArgs| async move {
let Some(location) = find_location(&library, id).exec().await? else {
return Err(LocationError::IdNotFound(id).into());
};
Job::new(OldFileIdentifierJobInit {
location,
sub_path: Some(args.path),
})
.spawn(&node, &library)
.await
.map_err(Into::into)
node.job_system
.dispatch(
FileIdentifier::new(location, Some(path))?,
id,
NodeContext {
node: Arc::clone(&node),
library,
},
)
.await
.map_err(Into::into)
},
)
})

View file

@ -1,8 +1,7 @@
use crate::{
invalidate_query, library::Library, object::media::old_thumbnail::get_indexed_thumb_key,
};
use crate::{invalidate_query, library::Library};
use sd_core_prisma_helpers::label_with_objects;
use sd_core_heavy_lifting::media_processor::ThumbKey;
use sd_core_prisma_helpers::{label_with_objects, CasId};
use sd_prisma::{
prisma::{label, label_on_object, object, SortOrder},
@ -49,7 +48,9 @@ pub(crate) fn mount() -> AlphaRouter<Ctx> {
file_path_data
.cas_id
.as_ref()
.map(|cas_id| get_indexed_thumb_key(cas_id, library.id))
.map(CasId::from)
.map(CasId::into_owned)
.map(|cas_id| ThumbKey::new_indexed(cas_id, library.id))
}) // Filter out None values and transform each element to Vec<Vec<String>>
.collect::<Vec<_>>(), // Collect into Vec<Vec<Vec<String>>>
})

View file

@ -8,6 +8,7 @@ use crate::{
use futures::StreamExt;
use prisma_client_rust::raw;
use sd_core_heavy_lifting::JobId;
use sd_file_ext::kind::ObjectKind;
use sd_p2p::RemoteIdentity;
use sd_prisma::prisma::{indexer_rule, object, statistics};
@ -106,7 +107,7 @@ pub(crate) fn mount() -> AlphaRouter<Ctx> {
match STATISTICS_UPDATERS.lock().await.entry(library.id) {
Entry::Occupied(entry) => {
if entry.get().send(Instant::now()).await.is_err() {
error!("Failed to send statistics update request");
error!("Failed to send statistics update request;");
}
}
Entry::Vacant(entry) => {
@ -181,13 +182,13 @@ pub(crate) fn mount() -> AlphaRouter<Ctx> {
}: DefaultLocations,
node: Arc<Node>,
library: Arc<Library>,
) -> Result<(), rspc::Error> {
) -> Result<Option<JobId>, rspc::Error> {
// If all of them are false, we skip
if [!desktop, !documents, !downloads, !pictures, !music, !videos]
.into_iter()
.all(identity)
{
return Ok(());
return Ok(None);
}
let Some(default_locations_paths) = UserDirs::new() else {
@ -242,7 +243,7 @@ pub(crate) fn mount() -> AlphaRouter<Ctx> {
.await
.map_err(rspc::Error::from)?
else {
return Ok(());
return Ok(None);
};
let scan_state = ScanState::try_from(location.scan_state)?;
@ -271,7 +272,7 @@ pub(crate) fn mount() -> AlphaRouter<Ctx> {
})
.fold(&mut maybe_error, |maybe_error, res| {
if let Err(e) = res {
error!("Failed to create default location: {e:#?}");
error!(?e, "Failed to create default location;");
*maybe_error = Some(e);
}
maybe_error
@ -283,7 +284,7 @@ pub(crate) fn mount() -> AlphaRouter<Ctx> {
debug!("Created default locations");
Ok(())
Ok(None)
}
R.mutation(
@ -296,7 +297,7 @@ pub(crate) fn mount() -> AlphaRouter<Ctx> {
let library = node.libraries.create(name, None, &node).await?;
debug!("Created library {}", library.id);
debug!(%library.id, "Created library;");
if let Some(locations) = default_locations {
create_default_locations_on_library_creation(
@ -381,16 +382,19 @@ pub(crate) fn mount() -> AlphaRouter<Ctx> {
for _ in 0..5 {
match library.db._execute_raw(raw!("VACUUM;")).exec().await {
Ok(_) => break,
Err(err) => {
Err(e) => {
warn!(
"Failed to vacuum DB for library '{}', retrying...: {err:#?}",
library.id
%library.id,
?e,
"Failed to vacuum DB for library, retrying...;",
);
tokio::time::sleep(Duration::from_millis(500)).await;
}
}
}
info!("Successfully vacuumed DB for library '{}'", library.id);
info!(%library.id, "Successfully vacuumed DB;");
Ok(())
}),
)
@ -421,7 +425,7 @@ async fn update_statistics_loop(
Message::Tick => {
if last_received_at.elapsed() < FIVE_MINUTES {
if let Err(e) = update_library_statistics(&node, &library).await {
error!("Failed to update library statistics: {e:#?}");
error!(?e, "Failed to update library statistics;");
} else {
invalidate_query!(&library, "library.statistics");
}

View file

@ -1,16 +1,15 @@
use crate::{
invalidate_query,
location::{
delete_location, find_location, indexer::OldIndexerJobInit, light_scan_location,
non_indexed::NonIndexedPathItem, relink_location, scan_location, scan_location_sub_path,
LocationCreateArgs, LocationError, LocationUpdateArgs, ScanState,
delete_location, find_location, light_scan_location, non_indexed::NonIndexedPathItem,
relink_location, scan_location, scan_location_sub_path, LocationCreateArgs, LocationError,
LocationUpdateArgs, ScanState,
},
object::old_file_identifier::old_file_identifier_job::OldFileIdentifierJobInit,
old_job::StatefulJob,
p2p::PeerMetadata,
util::AbortOnDrop,
};
use sd_core_heavy_lifting::{media_processor::ThumbKey, JobName};
use sd_core_indexer_rules::IndexerRuleCreateArgs;
use sd_core_prisma_helpers::{
file_path_for_frontend, label_with_objects, location_with_indexer_rules, object_with_file_paths,
@ -29,28 +28,24 @@ use tracing::{debug, error};
use super::{utils::library, Ctx, R};
// it includes the shard hex formatted as ([["f02", "cab34a76fbf3469f"]])
// Will be None if no thumbnail exists
pub type ThumbnailKey = Vec<String>;
#[derive(Serialize, Type, Debug)]
#[serde(tag = "type")]
pub enum ExplorerItem {
Path {
// provide the frontend with the thumbnail key explicitly
thumbnail: Option<ThumbnailKey>,
thumbnail: Option<ThumbKey>,
// this tells the frontend if a thumbnail actually exists or not
has_created_thumbnail: bool,
// we can't actually modify data from PCR types, thats why computed properties are used on ExplorerItem
item: Box<file_path_for_frontend::Data>,
},
Object {
thumbnail: Option<ThumbnailKey>,
thumbnail: Option<ThumbKey>,
has_created_thumbnail: bool,
item: object_with_file_paths::Data,
},
NonIndexedPath {
thumbnail: Option<ThumbnailKey>,
thumbnail: Option<ThumbKey>,
has_created_thumbnail: bool,
item: NonIndexedPathItem,
},
@ -61,7 +56,7 @@ pub enum ExplorerItem {
item: PeerMetadata,
},
Label {
thumbnails: Vec<ThumbnailKey>,
thumbnails: Vec<ThumbKey>,
item: label_with_objects::Data,
},
}
@ -347,7 +342,7 @@ pub(crate) fn mount() -> AlphaRouter<Ctx> {
.exec()
.await?;
debug!("Disconnected {count} file paths from objects");
debug!(%count, "Disconnected file paths from objects;");
// library.orphan_remover.invoke().await;
}
@ -409,13 +404,15 @@ pub(crate) fn mount() -> AlphaRouter<Ctx> {
sub_path,
}: LightScanArgs| async move {
if node
.old_jobs
.has_job_running(|job_identity| {
job_identity.target_location == location_id
&& (job_identity.name == <OldIndexerJobInit as StatefulJob>::NAME
|| job_identity.name
== <OldFileIdentifierJobInit as StatefulJob>::NAME)
})
.job_system
.check_running_jobs(
vec![
JobName::Indexer,
JobName::FileIdentifier,
JobName::MediaProcessor,
],
location_id,
)
.await
{
return Err(rspc::Error::new(
@ -433,7 +430,7 @@ pub(crate) fn mount() -> AlphaRouter<Ctx> {
let handle = tokio::spawn(async move {
if let Err(e) = light_scan_location(node, library, location, sub_path).await
{
error!("light scan error: {e:#?}");
error!(?e, "Light scan error;");
}
});

View file

@ -8,6 +8,7 @@ use crate::{
Node,
};
use sd_core_heavy_lifting::media_processor::ThumbKey;
use sd_p2p::RemoteIdentity;
use sd_prisma::prisma::file_path;
@ -54,7 +55,7 @@ pub type Router = rspc::Router<Ctx>;
#[derive(Debug, Clone, Serialize, Type)]
pub enum CoreEvent {
NewThumbnail {
thumb_key: Vec<String>,
thumb_key: ThumbKey,
},
NewIdentifiedObjects {
file_path_ids: Vec<file_path::id::Type>,
@ -175,7 +176,7 @@ pub(crate) fn mount() -> Arc<Router> {
.await
.map(|_| true)
}
.map_err(|err| rspc::Error::new(ErrorCode::InternalServerError, err.to_string()))?;
.map_err(|e| rspc::Error::new(ErrorCode::InternalServerError, e.to_string()))?;
match feature {
BackendFeature::CloudSync => {

View file

@ -82,8 +82,9 @@ pub(crate) fn mount() -> AlphaRouter<Ctx> {
new_model = sd_ai::old_image_labeler::YoloV8::model(Some(&version))
.map_err(|e| {
error!(
"Failed to crate image_detection model: '{}'; Error: {e:#?}",
&version,
%version,
?e,
"Failed to crate image_detection model;",
);
})
.ok();
@ -94,8 +95,8 @@ pub(crate) fn mount() -> AlphaRouter<Ctx> {
}
})
.await
.map_err(|err| {
error!("Failed to write config: {}", err);
.map_err(|e| {
error!(?e, "Failed to write config;");
rspc::Error::new(
ErrorCode::InternalServerError,
"error updating config".into(),
@ -186,21 +187,14 @@ pub(crate) fn mount() -> AlphaRouter<Ctx> {
pub background_processing_percentage: u8, // 0-100
}
R.mutation(
|node,
UpdateThumbnailerPreferences {
background_processing_percentage,
}: UpdateThumbnailerPreferences| async move {
|node, UpdateThumbnailerPreferences { .. }: UpdateThumbnailerPreferences| async move {
node.config
.update_preferences(|preferences| {
preferences
.thumbnailer
.set_background_processing_percentage(
background_processing_percentage,
);
.update_preferences(|_| {
// TODO(fogodev): introduce configurable workers count to task system
})
.await
.map_err(|e| {
error!("failed to update thumbnailer preferences: {e:#?}");
error!(?e, "Failed to update thumbnailer preferences;");
rspc::Error::with_cause(
ErrorCode::InternalServerError,
"Failed to update thumbnailer preferences".to_string(),

View file

@ -56,12 +56,12 @@ pub(crate) fn mount() -> AlphaRouter<Ctx> {
.find_many(vec![])
.exec()
.await
.map_err(|err| {
.map_err(|e| {
rspc::Error::new(
ErrorCode::InternalServerError,
format!(
"Failed to get notifications for library '{}': {}",
library.id, err
library.id, e
),
)
})?
@ -69,12 +69,12 @@ pub(crate) fn mount() -> AlphaRouter<Ctx> {
.map(|n| {
Ok(Notification {
id: NotificationId::Library(library.id, n.id as u32),
data: rmp_serde::from_slice(&n.data).map_err(|err| {
data: rmp_serde::from_slice(&n.data).map_err(|e| {
rspc::Error::new(
ErrorCode::InternalServerError,
format!(
"Failed to get notifications for library '{}': {}",
library.id, err
library.id, e
),
)
})?,
@ -108,8 +108,8 @@ pub(crate) fn mount() -> AlphaRouter<Ctx> {
.delete_many(vec![notification::id::equals(id as i32)])
.exec()
.await
.map_err(|err| {
rspc::Error::new(ErrorCode::InternalServerError, err.to_string())
.map_err(|e| {
rspc::Error::new(ErrorCode::InternalServerError, e.to_string())
})?;
}
NotificationId::Node(id) => {
@ -119,8 +119,8 @@ pub(crate) fn mount() -> AlphaRouter<Ctx> {
.retain(|n| n.id != NotificationId::Node(id));
})
.await
.map_err(|err| {
rspc::Error::new(ErrorCode::InternalServerError, err.to_string())
.map_err(|e| {
rspc::Error::new(ErrorCode::InternalServerError, e.to_string())
})?;
}
}
@ -135,9 +135,7 @@ pub(crate) fn mount() -> AlphaRouter<Ctx> {
cfg.notifications = vec![];
})
.await
.map_err(|err| {
rspc::Error::new(ErrorCode::InternalServerError, err.to_string())
})?;
.map_err(|e| rspc::Error::new(ErrorCode::InternalServerError, e.to_string()))?;
join_all(
node.libraries

View file

@ -89,20 +89,20 @@ pub(crate) fn mount() -> AlphaRouter<Ctx> {
))?
.new_stream()
.await
.map_err(|err| {
.map_err(|e| {
rspc::Error::new(
ErrorCode::InternalServerError,
format!("error in peer.new_stream: {:?}", err),
format!("error in peer.new_stream: {:?}", e),
)
})?;
stream
.write_all(&Header::Ping.to_bytes())
.await
.map_err(|err| {
.map_err(|e| {
rspc::Error::new(
ErrorCode::InternalServerError,
format!("error sending ping header: {:?}", err),
format!("error sending ping header: {:?}", e),
)
})?;

View file

@ -2,12 +2,12 @@ use crate::{
api::{locations::ExplorerItem, utils::library},
library::Library,
location::{non_indexed, LocationError},
object::media::old_thumbnail::get_indexed_thumb_key,
util::{unsafe_streamed_query, BatchedStream},
};
use prisma_client_rust::Operator;
use sd_core_prisma_helpers::{file_path_for_frontend, object_with_file_paths};
use sd_core_heavy_lifting::media_processor::ThumbKey;
use sd_core_prisma_helpers::{file_path_for_frontend, object_with_file_paths, CasId};
use sd_prisma::prisma::{self, PrismaClient};
use std::path::PathBuf;
@ -217,21 +217,23 @@ pub fn mount() -> AlphaRouter<Ctx> {
let mut items = Vec::with_capacity(file_paths.len());
for file_path in file_paths {
let has_created_thumbnail = if let Some(cas_id) = &file_path.cas_id {
library
.thumbnail_exists(&node, cas_id)
.await
.map_err(LocationError::from)?
} else {
false
};
let has_created_thumbnail =
if let Some(cas_id) = file_path.cas_id.as_ref().map(CasId::from) {
library
.thumbnail_exists(&node, &cas_id)
.await
.map_err(LocationError::from)?
} else {
false
};
items.push(ExplorerItem::Path {
thumbnail: file_path
.cas_id
.as_ref()
// .filter(|_| thumbnail_exists_locally)
.map(|i| get_indexed_thumb_key(i, library.id)),
.map(CasId::from)
.map(CasId::into_owned)
.map(|cas_id| ThumbKey::new_indexed(cas_id, library.id)),
has_created_thumbnail,
item: Box::new(file_path),
})
@ -332,9 +334,11 @@ pub fn mount() -> AlphaRouter<Ctx> {
.file_paths
.iter()
.map(|fp| fp.cas_id.as_ref())
.find_map(|c| c);
.find_map(|c| c)
.map(CasId::from)
.map(|cas_id| cas_id.to_owned());
let has_created_thumbnail = if let Some(cas_id) = cas_id {
let has_created_thumbnail = if let Some(cas_id) = &cas_id {
library.thumbnail_exists(&node, cas_id).await.map_err(|e| {
rspc::Error::with_cause(
ErrorCode::InternalServerError,
@ -348,8 +352,7 @@ pub fn mount() -> AlphaRouter<Ctx> {
items.push(ExplorerItem::Object {
thumbnail: cas_id
// .filter(|_| thumbnail_exists_locally)
.map(|cas_id| get_indexed_thumb_key(cas_id, library.id)),
.map(|cas_id| ThumbKey::new_indexed(cas_id, library.id)),
item: object,
has_created_thumbnail,
});

View file

@ -82,7 +82,7 @@ pub(crate) fn mount() -> AlphaRouter<Ctx> {
// https://docs.rs/serde/latest/serde/de/struct.IgnoredAny.html
if let Err(e) = serde_json::from_str::<IgnoredAny>(&s) {
error!("failed to parse filters: {e:#?}");
error!(?e, "Failed to parse filters;");
None
} else {
Some(s)

View file

@ -221,7 +221,7 @@ pub(crate) fn mount() -> AlphaRouter<Ctx> {
.iter()
.filter(|fp| fp.is_dir.unwrap_or_default() && fp.object.is_none())
.map(|fp| {
let id = uuid_to_bytes(Uuid::new_v4());
let id = uuid_to_bytes(&Uuid::new_v4());
sync_params.extend(sync.shared_create(
prisma_sync::object::SyncId { pub_id: id.clone() },

View file

@ -132,6 +132,19 @@ impl InvalidRequests {
#[macro_export]
// #[allow(clippy::crate_in_macro_def)]
macro_rules! invalidate_query {
($ctx:expr, $query:ident) => {{
let ctx: &$crate::library::Library = &$ctx; // Assert the context is the correct type
let query: &'static str = $query;
::tracing::trace!(target: "sd_core::invalidate-query", "invalidate_query!(\"{}\") at {}", query, concat!(file!(), ":", line!()));
// The error are ignored here because they aren't mission critical. If they fail the UI might be outdated for a bit.
ctx.emit($crate::api::CoreEvent::InvalidateOperation(
$crate::api::utils::InvalidateOperationEvent::dangerously_create(query, serde_json::Value::Null, None)
))
}};
($ctx:expr, $key:literal) => {{
let ctx: &$crate::library::Library = &$ctx; // Assert the context is the correct type
@ -324,8 +337,12 @@ pub(crate) fn mount_invalidate() -> AlphaRouter<Ctx> {
) => {
let key = match to_key(&(key, arg)) {
Ok(key) => key,
Err(err) => {
warn!("Error deriving key for invalidate operation '{:?}': {:?}", first_event, err);
Err(e) => {
warn!(
?first_event,
?e,
"Error deriving key for invalidate operation;"
);
continue;
}
};
@ -345,7 +362,10 @@ pub(crate) fn mount_invalidate() -> AlphaRouter<Ctx> {
}
event = event_bus_rx.recv() => {
let Ok(event) = event else {
warn!("Shutting down invalidation manager thread due to the core event bus being dropped!");
warn!(
"Shutting down invalidation manager thread \
due to the core event bus being dropped!"
);
break;
};
@ -359,8 +379,12 @@ pub(crate) fn mount_invalidate() -> AlphaRouter<Ctx> {
Ok(key) => {
buf.insert(key, op);
},
Err(err) => {
warn!("Error deriving key for invalidate operation '{:?}': {:?}", op, err);
Err(e) => {
warn!(
?op,
?e,
"Error deriving key for invalidate operation;",
);
},
}
},
@ -383,7 +407,10 @@ pub(crate) fn mount_invalidate() -> AlphaRouter<Ctx> {
Ok(_) => {}
// All receivers are shutdown means that all clients are disconnected.
Err(_) => {
debug!("Shutting down invalidation manager! This is normal if all clients disconnects.");
debug!(
"Shutting down invalidation manager! \
This is normal if all clients disconnects."
);
manager_thread_active.swap(false, Ordering::Relaxed);
break;
}

View file

@ -42,7 +42,6 @@ pub async fn run_actor(
break;
}
Request::Messages { timestamps, .. } => timestamps,
_ => continue,
};
let (ops_ids, ops): (Vec<_>, Vec<_>) = err_break!(
@ -60,10 +59,10 @@ pub async fn run_actor(
}
debug!(
"Sending {} messages ({:?} to {:?}) to ingester",
ops.len(),
ops.first().map(|operation| operation.timestamp.as_u64()),
ops.last().map(|operation| operation.timestamp.as_u64()),
messages_count = ops.len(),
first_message = ?ops.first().map(|operation| operation.timestamp.as_u64()),
last_message = ?ops.last().map(|operation| operation.timestamp.as_u64()),
"Sending messages to ingester",
);
let (wait_tx, wait_rx) = tokio::sync::oneshot::channel::<()>();

View file

@ -97,7 +97,7 @@ macro_rules! err_break {
match $e {
Ok(d) => d,
Err(e) => {
tracing::error!("{e}");
tracing::error!(?e);
break;
}
}

View file

@ -56,7 +56,7 @@ pub async fn run_actor(
.map(|id| {
db.cloud_crdt_operation()
.find_first(vec![cloud_crdt_operation::instance::is(vec![
instance::pub_id::equals(uuid_to_bytes(*id)),
instance::pub_id::equals(uuid_to_bytes(id)),
])])
.order_by(cloud_crdt_operation::timestamp::order(
SortOrder::Desc,
@ -76,8 +76,10 @@ pub async fn run_actor(
let cloud_timestamp = d.map(|d| d.timestamp).unwrap_or_default() as u64;
debug!(
"Instance {id}, Sync Timestamp {}, Cloud Timestamp {cloud_timestamp}",
sync_timestamp.as_u64()
instance_id = %id,
sync_timestamp = sync_timestamp.as_u64(),
%cloud_timestamp,
"Comparing sync timestamps",
);
let max_timestamp = Ord::max(cloud_timestamp, sync_timestamp.as_u64());
@ -118,7 +120,10 @@ pub async fn run_actor(
.await
);
info!("Received {} collections", collections.len());
info!(
collections_count = collections.len(),
"Received collections;",
);
if collections.is_empty() {
break;
@ -165,9 +170,9 @@ pub async fn run_actor(
&db,
&sync,
&libraries,
collection.instance_uuid,
&collection.instance_uuid,
instance.identity,
instance.node_id,
&instance.node_id,
RemoteIdentity::from_str(&instance.node_remote_identity)
.expect("malformed remote identity in the DB"),
node.p2p.peer_metadata(),
@ -185,14 +190,10 @@ pub async fn run_actor(
let operations = compressed_operations.into_ops();
debug!(
"Processing collection. Instance {}, Start {:?}, End {:?}",
&collection.instance_uuid,
operations
.first()
.map(|operation| operation.timestamp.as_u64()),
operations
.last()
.map(|operation| operation.timestamp.as_u64()),
instance_id = %collection.instance_uuid,
start = ?operations.first().map(|operation| operation.timestamp.as_u64()),
end = ?operations.last().map(|operation| operation.timestamp.as_u64()),
"Processing collection",
);
err_break!(write_cloud_ops_to_db(operations, &db).await);
@ -247,9 +248,9 @@ pub async fn upsert_instance(
db: &PrismaClient,
sync: &sd_core_sync::Manager,
libraries: &Libraries,
uuid: Uuid,
uuid: &Uuid,
identity: RemoteIdentity,
node_id: Uuid,
node_id: &Uuid,
node_remote_identity: RemoteIdentity,
metadata: HashMap<String, String>,
) -> prisma_client_rust::Result<()> {
@ -276,7 +277,7 @@ pub async fn upsert_instance(
.exec()
.await?;
sync.timestamps.write().await.entry(uuid).or_default();
sync.timestamps.write().await.entry(*uuid).or_default();
// Called again so the new instances are picked up
libraries.update_instances_by_id(library_id).await;

View file

@ -52,8 +52,8 @@ pub async fn run_actor(
use sd_cloud_api::library::message_collections::do_add;
debug!(
"Preparing to send {} instances' operations to cloud",
req_adds.len()
total_operations = req_adds.len(),
"Preparing to send instance's operations to cloud;"
);
// gets new operations for each instance to send to cloud
@ -84,10 +84,7 @@ pub async fn run_actor(
use base64::prelude::*;
debug!(
"Instance {}: {} to {}",
req_add.instance_uuid, start_time, end_time
);
debug!(instance_id = %req_add.instance_uuid, %start_time, %end_time);
instances.push(do_add::Input {
uuid: req_add.instance_uuid,

229
core/src/context.rs Normal file
View file

@ -0,0 +1,229 @@
use crate::{api::CoreEvent, invalidate_query, library::Library, old_job::JobProgressEvent, Node};
use sd_core_heavy_lifting::{
job_system::report::{Report, Status},
OuterContext, ProgressUpdate, UpdateEvent,
};
use std::{
ops::{Deref, DerefMut},
sync::{
atomic::{AtomicU8, Ordering},
Arc,
},
};
use chrono::{DateTime, Utc};
use tokio::{spawn, sync::RwLock};
use tracing::{error, trace};
use uuid::Uuid;
#[derive(Clone)]
pub struct NodeContext {
pub node: Arc<Node>,
pub library: Arc<Library>,
}
pub trait NodeContextExt: sealed::Sealed {
fn library(&self) -> &Arc<Library>;
}
mod sealed {
pub trait Sealed {}
}
impl sealed::Sealed for NodeContext {}
impl NodeContextExt for NodeContext {
fn library(&self) -> &Arc<Library> {
&self.library
}
}
impl OuterContext for NodeContext {
fn id(&self) -> Uuid {
self.library.id
}
fn db(&self) -> &Arc<sd_prisma::prisma::PrismaClient> {
&self.library.db
}
fn sync(&self) -> &Arc<sd_core_sync::Manager> {
&self.library.sync
}
fn invalidate_query(&self, query: &'static str) {
invalidate_query!(self.library, query)
}
fn query_invalidator(&self) -> impl Fn(&'static str) + Send + Sync {
|query| {
invalidate_query!(self.library, query);
}
}
fn report_update(&self, update: UpdateEvent) {
// FIX-ME: Remove this conversion once we have a proper atomic updates system
let event = match update {
UpdateEvent::NewThumbnail { thumb_key } => CoreEvent::NewThumbnail { thumb_key },
UpdateEvent::NewIdentifiedObjects { file_path_ids } => {
CoreEvent::NewIdentifiedObjects { file_path_ids }
}
};
self.node.emit(event);
}
fn get_data_directory(&self) -> &std::path::Path {
&self.node.data_dir
}
}
#[derive(Clone)]
pub struct JobContext<OuterCtx: OuterContext + NodeContextExt> {
outer_ctx: OuterCtx,
report: Arc<RwLock<Report>>,
start_time: DateTime<Utc>,
report_update_counter: Arc<AtomicU8>,
}
impl<OuterCtx: OuterContext + NodeContextExt> OuterContext for JobContext<OuterCtx> {
fn id(&self) -> Uuid {
self.outer_ctx.id()
}
fn db(&self) -> &Arc<sd_prisma::prisma::PrismaClient> {
self.outer_ctx.db()
}
fn sync(&self) -> &Arc<sd_core_sync::Manager> {
self.outer_ctx.sync()
}
fn invalidate_query(&self, query: &'static str) {
self.outer_ctx.invalidate_query(query);
}
fn query_invalidator(&self) -> impl Fn(&'static str) + Send + Sync {
self.outer_ctx.query_invalidator()
}
fn report_update(&self, update: UpdateEvent) {
self.outer_ctx.report_update(update);
}
fn get_data_directory(&self) -> &std::path::Path {
self.outer_ctx.get_data_directory()
}
}
impl<OuterCtx: OuterContext + NodeContextExt> sd_core_heavy_lifting::JobContext<OuterCtx>
for JobContext<OuterCtx>
{
fn new(report: Report, outer_ctx: OuterCtx) -> Self {
Self {
report: Arc::new(RwLock::new(report)),
outer_ctx,
start_time: Utc::now(),
report_update_counter: Arc::new(AtomicU8::new(0)),
}
}
async fn progress(&self, updates: impl IntoIterator<Item = ProgressUpdate> + Send) {
let mut report = self.report.write().await;
// protect against updates if job is not running
if report.status != Status::Running {
return;
};
let mut changed_phase = false;
for update in updates {
match update {
ProgressUpdate::TaskCount(task_count) => {
report.task_count = task_count as i32;
}
ProgressUpdate::CompletedTaskCount(completed_task_count) => {
report.completed_task_count = completed_task_count as i32;
}
ProgressUpdate::Message(message) => {
trace!(job_id = %report.id, %message, "job message;");
report.message = message;
}
ProgressUpdate::Phase(phase) => {
trace!(
job_id = %report.id,
"changing phase: {} -> {phase};",
report.phase
);
report.phase = phase;
changed_phase = true;
}
}
}
// Calculate elapsed time
let elapsed = Utc::now() - self.start_time;
// Calculate remaining time
let task_count = report.task_count as usize;
let completed_task_count = report.completed_task_count as usize;
let remaining_task_count = task_count.saturating_sub(completed_task_count);
// Adding 1 to avoid division by zero
let remaining_time_per_task = elapsed / (completed_task_count + 1) as i32;
let remaining_time = remaining_time_per_task * remaining_task_count as i32;
// Update the report with estimated remaining time
report.estimated_completion = Utc::now()
.checked_add_signed(remaining_time)
.unwrap_or(Utc::now());
let library = self.outer_ctx.library();
let counter = self.report_update_counter.fetch_add(1, Ordering::AcqRel);
if counter == 50 || counter == 0 || changed_phase {
self.report_update_counter.store(1, Ordering::Release);
spawn({
let db = Arc::clone(&library.db);
let mut report = report.clone();
async move {
if let Err(e) = report.update(&db).await {
error!(
?e,
"Failed to update job report on debounced job progress event;"
);
}
}
});
}
// emit a CoreEvent
library.emit(CoreEvent::JobProgress(JobProgressEvent {
id: report.id,
library_id: library.id,
task_count: report.task_count,
completed_task_count: report.completed_task_count,
estimated_completion: report.estimated_completion,
phase: report.phase.clone(),
message: report.message.clone(),
}));
}
async fn report(&self) -> impl Deref<Target = Report> {
Arc::clone(&self.report).read_owned().await
}
async fn report_mut(&self) -> impl DerefMut<Target = Report> {
Arc::clone(&self.report).write_owned().await
}
fn get_outer_ctx(&self) -> OuterCtx {
self.outer_ctx.clone()
}
}

View file

@ -1,16 +1,13 @@
use crate::{
api::{utils::InvalidateOperationEvent, CoreEvent},
library::Library,
object::media::old_thumbnail::WEBP_EXTENSION,
p2p::operations::{self, request_file},
util::InfallibleResponse,
Node,
};
use async_stream::stream;
use bytes::Bytes;
use mpsc_to_async_write::MpscToAsyncWrite;
use sd_core_file_path_helper::IsolatedFilePathData;
use sd_core_heavy_lifting::media_processor::WEBP_EXTENSION;
use sd_core_prisma_helpers::file_path_to_handle_custom_uri;
use sd_file_ext::text::is_text;
@ -30,6 +27,7 @@ use std::{
sync::Arc,
};
use async_stream::stream;
use axum::{
body::{self, Body, BoxBody, Full, StreamBody},
extract::{self, State},
@ -39,6 +37,7 @@ use axum::{
routing::get,
Router,
};
use bytes::Bytes;
use http_body::combinators::UnsyncBoxBody;
use hyper::{header, upgrade::OnUpgrade};
use mini_moka::sync::Cache;
@ -56,6 +55,8 @@ mod mpsc_to_async_write;
mod serve_file;
mod utils;
use mpsc_to_async_write::MpscToAsyncWrite;
type CacheKey = (Uuid, file_path::id::Type);
#[derive(Debug, Clone)]
@ -74,8 +75,8 @@ pub enum ServeFrom {
Local,
/// Serve from a specific instance
Remote {
library_identity: RemoteIdentity,
node_identity: RemoteIdentity,
library_identity: Box<RemoteIdentity>,
node_identity: Box<RemoteIdentity>,
library: Arc<Library>,
},
}
@ -102,8 +103,8 @@ async fn request_to_remote_node(
let mut response = match operations::remote_rspc(p2p.clone(), identity, request).await {
Ok(v) => v,
Err(err) => {
warn!("Error doing remote rspc query with '{identity}': {err:?}");
Err(e) => {
warn!(%identity, ?e, "Error doing remote rspc query with;");
return StatusCode::BAD_GATEWAY.into_response();
}
};
@ -120,21 +121,21 @@ async fn request_to_remote_node(
};
tokio::spawn(async move {
let Ok(mut request_upgraded) = request_upgraded.await.map_err(|err| {
warn!("Error upgrading websocket request: {err}");
let Ok(mut request_upgraded) = request_upgraded.await.map_err(|e| {
warn!(?e, "Error upgrading websocket request;");
}) else {
return;
};
let Ok(mut response_upgraded) = response_upgraded.await.map_err(|err| {
warn!("Error upgrading websocket response: {err}");
let Ok(mut response_upgraded) = response_upgraded.await.map_err(|e| {
warn!(?e, "Error upgrading websocket response;");
}) else {
return;
};
copy_bidirectional(&mut request_upgraded, &mut response_upgraded)
.await
.map_err(|err| {
warn!("Error upgrading websocket response: {err}");
.map_err(|e| {
warn!(?e, "Error upgrading websocket response;");
})
.ok();
});
@ -204,8 +205,8 @@ async fn get_or_init_lru_entry(
ServeFrom::Local
} else {
ServeFrom::Remote {
library_identity,
node_identity,
library_identity: Box::new(library_identity),
node_identity: Box::new(node_identity),
library: library.clone(),
}
},
@ -237,9 +238,9 @@ pub fn base_router() -> Router<LocalState> {
.then_some(())
.ok_or_else(|| not_found(()))?;
let file = File::open(&path).await.map_err(|err| {
let file = File::open(&path).await.map_err(|e| {
InfallibleResponse::builder()
.status(if err.kind() == io::ErrorKind::NotFound {
.status(if e.kind() == io::ErrorKind::NotFound {
StatusCode::NOT_FOUND
} else {
StatusCode::INTERNAL_SERVER_ERROR
@ -270,7 +271,7 @@ pub fn base_router() -> Router<LocalState> {
serve_from,
..
},
..
_library,
) = get_or_init_lru_entry(&state, path).await?;
match serve_from {
@ -282,24 +283,23 @@ pub fn base_router() -> Router<LocalState> {
.then_some(())
.ok_or_else(|| not_found(()))?;
let mut file =
File::open(&file_path_full_path).await.map_err(|err| {
InfallibleResponse::builder()
.status(if err.kind() == io::ErrorKind::NotFound {
StatusCode::NOT_FOUND
} else {
StatusCode::INTERNAL_SERVER_ERROR
})
.body(body::boxed(Full::from("")))
})?;
let mut file = File::open(&file_path_full_path).await.map_err(|e| {
InfallibleResponse::builder()
.status(if e.kind() == io::ErrorKind::NotFound {
StatusCode::NOT_FOUND
} else {
StatusCode::INTERNAL_SERVER_ERROR
})
.body(body::boxed(Full::from("")))
})?;
let resp = InfallibleResponse::builder().header(
"Content-Type",
HeaderValue::from_str(
&infer_the_mime_type(&extension, &mut file, &metadata).await?,
)
.map_err(|err| {
error!("Error converting mime-type into header value: {}", err);
.map_err(|e| {
error!(?e, "Error converting mime-type into header value;");
internal_server_error(())
})?,
);
@ -316,15 +316,20 @@ pub fn base_router() -> Router<LocalState> {
let (tx, mut rx) = tokio::sync::mpsc::channel::<io::Result<Bytes>>(150);
request_file(
state.node.p2p.p2p.clone(),
node_identity,
*node_identity,
&library.identity,
file_path_pub_id,
Range::Full,
MpscToAsyncWrite::new(PollSender::new(tx)),
)
.await
.map_err(|err| {
error!("Error requesting file {file_path_pub_id:?} from node {:?}: {err:?}", library.identity.to_remote_identity());
.map_err(|e| {
error!(
%file_path_pub_id,
node_identity = ?library.identity.to_remote_identity(),
?e,
"Error requesting file from other node;",
);
internal_server_error(())
})?;
@ -352,9 +357,9 @@ pub fn base_router() -> Router<LocalState> {
.then_some(())
.ok_or_else(|| not_found(()))?;
let mut file = File::open(&path).await.map_err(|err| {
let mut file = File::open(&path).await.map_err(|e| {
InfallibleResponse::builder()
.status(if err.kind() == io::ErrorKind::NotFound {
.status(if e.kind() == io::ErrorKind::NotFound {
StatusCode::NOT_FOUND
} else {
StatusCode::INTERNAL_SERVER_ERROR
@ -368,8 +373,8 @@ pub fn base_router() -> Router<LocalState> {
None => "text/plain".to_string(),
Some(ext) => infer_the_mime_type(ext, &mut file, &metadata).await?,
})
.map_err(|err| {
error!("Error converting mime-type into header value: {}", err);
.map_err(|e| {
error!(?e, "Error converting mime-type into header value;");
internal_server_error(())
})?,
);
@ -423,8 +428,8 @@ pub fn router(node: Arc<Node>) -> Router<()> {
mut request: Request<Body>| async move {
let identity = match RemoteIdentity::from_str(&identity) {
Ok(identity) => identity,
Err(err) => {
warn!("Error parsing identity '{}': {}", identity, err);
Err(e) => {
warn!(%identity, ?e, "Error parsing identity;");
return (StatusCode::BAD_REQUEST, HeaderMap::new(), vec![])
.into_response();
}

View file

@ -11,8 +11,8 @@ use http_body::Full;
use tracing::debug;
#[track_caller]
pub(crate) fn bad_request(err: impl Debug) -> http::Response<BoxBody> {
debug!("400: Bad Request at {}: {err:?}", Location::caller());
pub(crate) fn bad_request(e: impl Debug) -> http::Response<BoxBody> {
debug!(caller = %Location::caller(), ?e, "400: Bad Request;");
InfallibleResponse::builder()
.status(StatusCode::BAD_REQUEST)
@ -20,8 +20,8 @@ pub(crate) fn bad_request(err: impl Debug) -> http::Response<BoxBody> {
}
#[track_caller]
pub(crate) fn not_found(err: impl Debug) -> http::Response<BoxBody> {
debug!("404: Not Found at {}: {err:?}", Location::caller());
pub(crate) fn not_found(e: impl Debug) -> http::Response<BoxBody> {
debug!(caller = %Location::caller(), ?e, "404: Not Found;");
InfallibleResponse::builder()
.status(StatusCode::NOT_FOUND)
@ -29,11 +29,8 @@ pub(crate) fn not_found(err: impl Debug) -> http::Response<BoxBody> {
}
#[track_caller]
pub(crate) fn internal_server_error(err: impl Debug) -> http::Response<BoxBody> {
debug!(
"500: Internal Server Error at {}: {err:?}",
Location::caller()
);
pub(crate) fn internal_server_error(e: impl Debug) -> http::Response<BoxBody> {
debug!(caller = %Location::caller(), ?e, "500: Internal Server Error;");
InfallibleResponse::builder()
.status(StatusCode::INTERNAL_SERVER_ERROR)
@ -41,8 +38,8 @@ pub(crate) fn internal_server_error(err: impl Debug) -> http::Response<BoxBody>
}
#[track_caller]
pub(crate) fn not_implemented(err: impl Debug) -> http::Response<BoxBody> {
debug!("501: Not Implemented at {}: {err:?}", Location::caller());
pub(crate) fn not_implemented(e: impl Debug) -> http::Response<BoxBody> {
debug!(caller = %Location::caller(), ?e, "501: Not Implemented;");
InfallibleResponse::builder()
.status(StatusCode::NOT_IMPLEMENTED)

View file

@ -4,18 +4,16 @@
use crate::{
api::{CoreEvent, Router},
location::LocationManagerError,
object::media::old_thumbnail::old_actor::OldThumbnailer,
};
use sd_core_heavy_lifting::{media_processor::ThumbnailKind, JobSystem};
use sd_core_prisma_helpers::CasId;
#[cfg(feature = "ai")]
use sd_ai::old_image_labeler::{DownloadModelError, OldImageLabeler, YoloV8};
use sd_utils::error::FileIOError;
use api::notifications::{Notification, NotificationData, NotificationId};
use chrono::{DateTime, Utc};
use node::config;
use notifications::Notifications;
use reqwest::{RequestBuilder, Response};
use sd_task_system::TaskSystem;
use sd_utils::error::FileIOError;
use std::{
fmt,
@ -23,6 +21,9 @@ use std::{
sync::{atomic::AtomicBool, Arc},
};
use chrono::{DateTime, Utc};
use futures_concurrency::future::Join;
use reqwest::{RequestBuilder, Response};
use thiserror::Error;
use tokio::{fs, io, sync::broadcast};
use tracing::{error, info, warn};
@ -34,6 +35,9 @@ use tracing_subscriber::{filter::FromEnvError, prelude::*, EnvFilter};
pub mod api;
mod cloud;
mod context;
#[cfg(feature = "crypto")]
pub(crate) mod crypto;
pub mod custom_uri;
mod env;
pub mod library;
@ -50,7 +54,10 @@ pub(crate) mod volume;
pub use env::Env;
use object::media::old_thumbnail::get_ephemeral_thumbnail_path;
use api::notifications::{Notification, NotificationData, NotificationId};
use context::{JobContext, NodeContext};
use node::config;
use notifications::Notifications;
pub(crate) use sd_core_sync as sync;
@ -65,10 +72,11 @@ pub struct Node {
pub p2p: Arc<p2p::P2PManager>,
pub event_bus: (broadcast::Sender<CoreEvent>, broadcast::Receiver<CoreEvent>),
pub notifications: Notifications,
pub thumbnailer: OldThumbnailer,
pub cloud_sync_flag: Arc<AtomicBool>,
pub env: Arc<env::Env>,
pub http: reqwest::Client,
pub task_system: TaskSystem<sd_core_heavy_lifting::Error>,
pub job_system: JobSystem<NodeContext, JobContext<NodeContext>>,
#[cfg(feature = "ai")]
pub old_image_labeller: Option<OldImageLabeler>,
}
@ -88,7 +96,7 @@ impl Node {
) -> Result<(Arc<Node>, Arc<Router>), NodeError> {
let data_dir = data_dir.as_ref();
info!("Starting core with data directory '{}'", data_dir.display());
info!(data_directory = %data_dir.display(), "Starting core;");
let env = Arc::new(env);
@ -117,22 +125,19 @@ impl Node {
let (old_jobs, jobs_actor) = old_job::OldJobs::new();
let libraries = library::Libraries::new(data_dir.join("libraries")).await?;
let task_system = TaskSystem::new();
let (p2p, start_p2p) = p2p::P2PManager::new(config.clone(), libraries.clone())
.await
.map_err(NodeError::P2PManager)?;
let node = Arc::new(Node {
data_dir: data_dir.to_path_buf(),
job_system: JobSystem::new(task_system.get_dispatcher(), data_dir),
task_system,
old_jobs,
locations,
notifications: notifications::Notifications::new(),
p2p,
thumbnailer: OldThumbnailer::new(
data_dir,
libraries.clone(),
event_bus.0.clone(),
config.preferences_watcher(),
)
.await,
config,
event_bus,
libraries,
@ -146,7 +151,10 @@ impl Node {
)
.await
.map_err(|e| {
error!("Failed to initialize image labeller. AI features will be disabled: {e:#?}");
error!(
?e,
"Failed to initialize image labeller. AI features will be disabled;"
);
})
.ok(),
});
@ -168,6 +176,27 @@ impl Node {
locations_actor.start(node.clone());
node.libraries.init(&node).await?;
jobs_actor.start(node.clone());
node.job_system
.init(
&node
.libraries
.get_all()
.await
.into_iter()
.map(|library| {
(
library.id,
NodeContext {
library,
node: Arc::clone(&node),
},
)
})
.collect(),
)
.await?;
start_p2p(
node.clone(),
axum::Router::new()
@ -188,7 +217,7 @@ impl Node {
.into_make_service(),
);
info!("Spacedrive online.");
info!("Spacedrive online!");
Ok((node, router))
}
@ -212,7 +241,14 @@ impl Node {
std::env::set_var(
"RUST_LOG",
format!("info,sd_core={level},sd_p2p=debug,sd_core::location::manager=info,sd_ai={level}"),
format!(
"info,\
sd_core={level},\
sd_p2p={level},\
sd_core_heavy_lifting={level},\
sd_task_system={level},\
sd_ai={level}"
),
);
}
@ -259,9 +295,18 @@ impl Node {
pub async fn shutdown(&self) {
info!("Spacedrive shutting down...");
self.thumbnailer.shutdown().await;
self.old_jobs.shutdown().await;
self.p2p.shutdown().await;
// Let's shutdown the task system first, as the job system will receive tasks to save
self.task_system.shutdown().await;
(
self.old_jobs.shutdown(),
self.p2p.shutdown(),
self.job_system.shutdown(),
)
.join()
.await;
#[cfg(feature = "ai")]
if let Some(image_labeller) = &self.old_image_labeller {
image_labeller.shutdown().await;
@ -271,12 +316,16 @@ impl Node {
pub(crate) fn emit(&self, event: CoreEvent) {
if let Err(e) = self.event_bus.0.send(event) {
warn!("Error sending event to event bus: {e:?}");
warn!(?e, "Error sending event to event bus;");
}
}
pub async fn ephemeral_thumbnail_exists(&self, cas_id: &str) -> Result<bool, FileIOError> {
let thumb_path = get_ephemeral_thumbnail_path(self, cas_id);
pub async fn ephemeral_thumbnail_exists(
&self,
cas_id: &CasId<'_>,
) -> Result<bool, FileIOError> {
let thumb_path =
ThumbnailKind::Ephemeral.compute_path(self.config.data_directory(), cas_id);
match fs::metadata(&thumb_path).await {
Ok(_) => Ok(true),
@ -301,8 +350,8 @@ impl Node {
Ok(_) => {
self.notifications._internal_send(notification);
}
Err(err) => {
error!("Error saving notification to config: {:?}", err);
Err(e) => {
error!(?e, "Error saving notification to config;");
}
}
}
@ -375,6 +424,9 @@ pub enum NodeError {
InitConfig(#[from] util::debug_initializer::InitConfigError),
#[error("logger error: {0}")]
Logger(#[from] FromEnvError),
#[error(transparent)]
JobSystem(#[from] sd_core_heavy_lifting::JobSystemError),
#[cfg(feature = "ai")]
#[error("ai error: {0}")]
AI(#[from] sd_ai::Error),

View file

@ -130,7 +130,7 @@ impl LibraryConfig {
db.indexer_rule().update_many(
vec![indexer_rule::name::equals(Some(name))],
vec![indexer_rule::pub_id::set(sd_utils::uuid_to_bytes(
Uuid::from_u128(i as u128),
&Uuid::from_u128(i as u128),
))],
)
})
@ -221,7 +221,7 @@ impl LibraryConfig {
maybe_missing(path.size_in_bytes, "file_path.size_in_bytes")
.map_or_else(
|e| {
error!("{e:#?}");
error!(?e);
None
},
Some,
@ -232,9 +232,11 @@ impl LibraryConfig {
Some(size.to_be_bytes().to_vec())
} else {
error!(
"File path <id='{}'> had invalid size: '{}'",
path.id, size_in_bytes
file_path_id = %path.id,
size = %size_in_bytes,
"File path had invalid size;",
);
None
};
@ -463,7 +465,8 @@ impl LibraryConfig {
}
_ => {
error!("Library config version is not handled: {:?}", current);
error!(current_version = ?current, "Library config version is not handled;");
return Err(VersionManagerError::UnexpectedMigration {
current_version: current.int_value(),
next_version: next.int_value(),

View file

@ -1,9 +1,8 @@
use crate::{
api::CoreEvent, cloud, object::media::old_thumbnail::get_indexed_thumbnail_path, sync, Node,
};
use crate::{api::CoreEvent, cloud, sync, Node};
use sd_core_file_path_helper::IsolatedFilePathData;
use sd_core_prisma_helpers::file_path_to_full_path;
use sd_core_heavy_lifting::media_processor::ThumbnailKind;
use sd_core_prisma_helpers::{file_path_to_full_path, CasId};
use sd_p2p::Identity;
use sd_prisma::prisma::{file_path, location, PrismaClient};
@ -121,12 +120,17 @@ impl Library {
// TODO: Remove this once we replace the old invalidation system
pub(crate) fn emit(&self, event: CoreEvent) {
if let Err(e) = self.event_bus_tx.send(event) {
warn!("Error sending event to event bus: {e:?}");
warn!(?e, "Error sending event to event bus;");
}
}
pub async fn thumbnail_exists(&self, node: &Node, cas_id: &str) -> Result<bool, FileIOError> {
let thumb_path = get_indexed_thumbnail_path(node, cas_id, self.id);
pub async fn thumbnail_exists(
&self,
node: &Node,
cas_id: &CasId<'_>,
) -> Result<bool, FileIOError> {
let thumb_path =
ThumbnailKind::Indexed(self.id).compute_path(node.config.data_directory(), cas_id);
match fs::metadata(&thumb_path).await {
Ok(_) => Ok(true),
@ -182,7 +186,7 @@ impl Library {
pub fn do_cloud_sync(&self) {
if let Err(e) = self.do_cloud_sync.send(()) {
warn!("Error sending cloud resync message: {e:?}");
warn!(?e, "Error sending cloud resync message;");
}
}
}

View file

@ -36,7 +36,7 @@ use tokio::{
sync::{broadcast, RwLock},
time::sleep,
};
use tracing::{debug, error, info, warn};
use tracing::{debug, error, info, instrument, warn};
use uuid::Uuid;
use super::{Library, LibraryConfig, LibraryName};
@ -113,9 +113,9 @@ impl Libraries {
.and_then(|v| v.to_str().map(Uuid::from_str))
else {
warn!(
"Attempted to load library from path '{}' \
but it has an invalid filename. Skipping...",
config_path.display()
config_path = %config_path.display(),
"Attempted to load library from path \
but it has an invalid filename. Skipping...;",
);
continue;
};
@ -124,7 +124,11 @@ impl Libraries {
match fs::metadata(&db_path).await {
Ok(_) => {}
Err(e) if e.kind() == io::ErrorKind::NotFound => {
warn!("Found library '{}' but no matching database file was found. Skipping...", config_path.display());
warn!(
config_path = %config_path.display(),
"Found library but no matching database file was found. Skipping...;",
);
continue;
}
Err(e) => return Err(FileIOError::from((db_path, e)).into()),
@ -158,6 +162,7 @@ impl Libraries {
.await
}
#[instrument(skip(self, instance, node), err)]
#[allow(clippy::too_many_arguments)]
pub(crate) async fn create_with_uuid(
self: &Arc<Self>,
@ -189,9 +194,8 @@ impl Libraries {
.await?;
debug!(
"Created library '{}' config at '{}'",
id,
config_path.display()
config_path = %config_path.display(),
"Created library;",
);
let node_cfg = node.config.get().await;
@ -225,12 +229,12 @@ impl Libraries {
)
.await?;
debug!("Loaded library '{id:?}'");
debug!("Loaded library");
if should_seed {
tag::seed::new_library(&library).await?;
sd_core_indexer_rules::seed::new_or_existing_library(&library.db).await?;
debug!("Seeded library '{id:?}'");
debug!("Seeded library");
}
invalidate_query!(library, "library.list");
@ -325,7 +329,7 @@ impl Libraries {
.exec()
.await
.map(|locations| locations.into_iter().filter_map(|location| location.path))
.map_err(|e| error!("Failed to fetch locations for library deletion: {e:#?}"))
.map_err(|e| error!(?e, "Failed to fetch locations for library deletion;"))
{
location_paths
.map(|location_path| async move {
@ -343,7 +347,7 @@ impl Libraries {
.into_iter()
.for_each(|res| {
if let Err(e) = res {
error!("Failed to remove library from location metadata: {e:#?}");
error!(?e, "Failed to remove library from location metadata;");
}
});
}
@ -371,7 +375,7 @@ impl Libraries {
.remove(id)
.expect("we have exclusive access and checked it exists!");
info!("Removed Library <id='{}'>", library.id);
info!(%library.id, "Removed Library;");
invalidate_query!(library, "library.list");
@ -420,6 +424,16 @@ impl Libraries {
self.libraries.read().await.get(library_id).is_some()
}
#[instrument(
skip_all,
fields(
library_id = %id,
db_path = %db_path.as_ref().display(),
config_path = %config_path.as_ref().display(),
%should_seed,
),
err,
)]
/// load the library from a given path.
pub async fn load(
self: &Arc<Self>,
@ -479,8 +493,9 @@ impl Libraries {
|| curr_metadata != Some(node.p2p.peer_metadata())
{
info!(
"Detected that the library '{}' has changed node from '{}' to '{}'. Reconciling node data...",
id, instance_node_id, node_config.id
old_node_id = %instance_node_id,
new_node_id = %node_config.id,
"Detected that the library has changed nodes. Reconciling node data...",
);
// ensure
@ -593,12 +608,12 @@ impl Libraries {
.await?
{
if let Err(e) = node.locations.add(location.id, library.clone()).await {
error!("Failed to watch location on startup: {e}");
error!(?e, "Failed to watch location on startup;");
};
}
if let Err(e) = node.old_jobs.clone().cold_resume(node, &library).await {
error!("Failed to resume jobs for library. {:#?}", e);
error!(?e, "Failed to resume jobs for library;");
}
tokio::spawn({
@ -639,20 +654,20 @@ impl Libraries {
if should_update {
warn!("Library instance on cloud is outdated. Updating...");
if let Err(err) =
sd_cloud_api::library::update_instance(
node.cloud_api_config().await,
library.id,
this_instance.uuid,
Some(node_config.id),
Some(node_config.identity.to_remote_identity()),
Some(node.p2p.peer_metadata()),
)
.await
if let Err(e) = sd_cloud_api::library::update_instance(
node.cloud_api_config().await,
library.id,
this_instance.uuid,
Some(node_config.id),
Some(node_config.identity.to_remote_identity()),
Some(node.p2p.peer_metadata()),
)
.await
{
error!(
"Failed to updating instance '{}' on cloud: {:#?}",
this_instance.uuid, err
instance_uuid = %this_instance.uuid,
?e,
"Failed to updating instance on cloud;",
);
}
}
@ -661,29 +676,26 @@ impl Libraries {
if lib.name != *library.config().await.name {
warn!("Library name on cloud is outdated. Updating...");
if let Err(err) = sd_cloud_api::library::update(
if let Err(e) = sd_cloud_api::library::update(
node.cloud_api_config().await,
library.id,
Some(lib.name),
)
.await
{
error!(
"Failed to update library name on cloud: {:#?}",
err
);
error!(?e, "Failed to update library name on cloud;");
}
}
for instance in lib.instances {
if let Err(err) = cloud::sync::receive::upsert_instance(
if let Err(e) = cloud::sync::receive::upsert_instance(
library.id,
&library.db,
&library.sync,
&node.libraries,
instance.uuid,
&instance.uuid,
instance.identity,
instance.node_id,
&instance.node_id,
RemoteIdentity::from_str(
&instance.node_remote_identity,
)
@ -692,10 +704,7 @@ impl Libraries {
)
.await
{
error!(
"Failed to create instance from cloud: {:#?}",
err
);
error!(?e, "Failed to create instance on cloud;");
}
}
}

View file

@ -37,8 +37,8 @@ pub async fn update_library_statistics(
.find_many(vec![])
.exec()
.await
.unwrap_or_else(|err| {
error!("Failed to get locations: {:#?}", err);
.unwrap_or_else(|e| {
error!(?e, "Failed to get locations;");
vec![]
})
.into_iter()
@ -79,7 +79,7 @@ pub async fn update_library_statistics(
.exec()
.await?;
info!("Updated library statistics: {:?}", stats);
info!(?stats, "Updated library statistics;");
Ok(stats)
}

View file

@ -81,35 +81,33 @@ pub enum LocationError {
}
impl From<LocationError> for rspc::Error {
fn from(err: LocationError) -> Self {
fn from(e: LocationError) -> Self {
use LocationError::*;
match err {
match e {
// Not found errors
PathNotFound(_)
| UuidNotFound(_)
| IdNotFound(_)
| FilePath(FilePathError::IdNotFound(_) | FilePathError::NotFound(_)) => {
Self::with_cause(ErrorCode::NotFound, err.to_string(), err)
Self::with_cause(ErrorCode::NotFound, e.to_string(), e)
}
// User's fault errors
NotDirectory(_) | NestedLocation(_) | LocationAlreadyExists(_) => {
Self::with_cause(ErrorCode::BadRequest, err.to_string(), err)
Self::with_cause(ErrorCode::BadRequest, e.to_string(), e)
}
// Custom error message is used to differentiate these errors in the frontend
// TODO: A better solution would be for rspc to support sending custom data alongside errors
NeedRelink { .. } => {
Self::with_cause(ErrorCode::Conflict, "NEED_RELINK".to_owned(), err)
}
NeedRelink { .. } => Self::with_cause(ErrorCode::Conflict, "NEED_RELINK".to_owned(), e),
AddLibraryToMetadata(_) => {
Self::with_cause(ErrorCode::Conflict, "ADD_LIBRARY".to_owned(), err)
Self::with_cause(ErrorCode::Conflict, "ADD_LIBRARY".to_owned(), e)
}
// Internal errors
MissingField(missing_error) => missing_error.into(),
_ => Self::with_cause(ErrorCode::InternalServerError, err.to_string(), err),
_ => Self::with_cause(ErrorCode::InternalServerError, e.to_string(), e),
}
}
}

View file

@ -1,546 +0,0 @@
use crate::library::Library;
use sd_core_file_path_helper::{FilePathError, IsolatedFilePathData, IsolatedFilePathDataParts};
use sd_core_indexer_rules::IndexerRuleError;
use sd_core_prisma_helpers::file_path_pub_and_cas_ids;
use sd_prisma::{
prisma::{file_path, location, PrismaClient},
prisma_sync,
};
use sd_sync::*;
use sd_utils::{db::inode_to_db, error::FileIOError, from_bytes_to_uuid, msgpack};
use std::{collections::HashMap, path::Path};
use chrono::Utc;
use futures_concurrency::future::TryJoin;
use itertools::Itertools;
use prisma_client_rust::operator::or;
use rspc::ErrorCode;
use serde::{Deserialize, Serialize};
use thiserror::Error;
use tracing::{trace, warn};
use super::location_with_indexer_rules;
pub mod old_indexer_job;
mod old_shallow;
mod old_walk;
use old_walk::WalkedEntry;
pub use old_indexer_job::OldIndexerJobInit;
pub use old_shallow::*;
#[derive(Serialize, Deserialize, Debug)]
pub struct OldIndexerJobSaveStep {
chunk_idx: usize,
walked: Vec<WalkedEntry>,
}
#[derive(Serialize, Deserialize, Debug)]
pub struct OldIndexerJobUpdateStep {
chunk_idx: usize,
to_update: Vec<WalkedEntry>,
}
/// Error type for the indexer module
#[derive(Error, Debug)]
pub enum IndexerError {
// Not Found errors
#[error("indexer rule not found: <id='{0}'>")]
IndexerRuleNotFound(i32),
#[error("received sub path not in database: <path='{}'>", .0.display())]
SubPathNotFound(Box<Path>),
// Internal Errors
#[error("Database Error: {}", .0.to_string())]
Database(#[from] prisma_client_rust::QueryError),
#[error(transparent)]
FileIO(#[from] FileIOError),
#[error(transparent)]
FilePath(#[from] FilePathError),
// Mixed errors
#[error(transparent)]
IndexerRules(#[from] IndexerRuleError),
}
impl From<IndexerError> for rspc::Error {
fn from(err: IndexerError) -> Self {
match err {
IndexerError::IndexerRuleNotFound(_) | IndexerError::SubPathNotFound(_) => {
rspc::Error::with_cause(ErrorCode::NotFound, err.to_string(), err)
}
IndexerError::IndexerRules(rule_err) => rule_err.into(),
_ => rspc::Error::with_cause(ErrorCode::InternalServerError, err.to_string(), err),
}
}
}
async fn execute_indexer_save_step(
location: &location_with_indexer_rules::Data,
OldIndexerJobSaveStep { walked, .. }: &OldIndexerJobSaveStep,
library: &Library,
) -> Result<i64, IndexerError> {
let Library { sync, db, .. } = library;
let (sync_stuff, paths): (Vec<_>, Vec<_>) = walked
.iter()
.map(|entry| {
let IsolatedFilePathDataParts {
materialized_path,
is_dir,
name,
extension,
..
} = &entry.iso_file_path.to_parts();
use file_path::*;
let pub_id = sd_utils::uuid_to_bytes(entry.pub_id);
let (sync_params, db_params): (Vec<_>, Vec<_>) = [
(
(
location::NAME,
msgpack!(prisma_sync::location::SyncId {
pub_id: location.pub_id.clone()
}),
),
location_id::set(Some(location.id)),
),
sync_db_entry!(materialized_path.to_string(), materialized_path),
sync_db_entry!(name.to_string(), name),
sync_db_entry!(*is_dir, is_dir),
sync_db_entry!(extension.to_string(), extension),
sync_db_entry!(
entry.metadata.size_in_bytes.to_be_bytes().to_vec(),
size_in_bytes_bytes
),
sync_db_entry!(inode_to_db(entry.metadata.inode), inode),
{
let v = entry.metadata.created_at.into();
sync_db_entry!(v, date_created)
},
{
let v = entry.metadata.modified_at.into();
sync_db_entry!(v, date_modified)
},
{
let v = Utc::now().into();
sync_db_entry!(v, date_indexed)
},
sync_db_entry!(entry.metadata.hidden, hidden),
]
.into_iter()
.unzip();
(
sync.shared_create(
prisma_sync::file_path::SyncId {
pub_id: sd_utils::uuid_to_bytes(entry.pub_id),
},
sync_params,
),
file_path::create_unchecked(pub_id, db_params),
)
})
.unzip();
let count = sync
.write_ops(
db,
(
sync_stuff.into_iter().flatten().collect(),
db.file_path().create_many(paths).skip_duplicates(),
),
)
.await?;
trace!("Inserted {count} records");
Ok(count)
}
async fn execute_indexer_update_step(
update_step: &OldIndexerJobUpdateStep,
Library { sync, db, .. }: &Library,
) -> Result<i64, IndexerError> {
let (sync_stuff, paths_to_update): (Vec<_>, Vec<_>) = update_step
.to_update
.iter()
.map(|entry| async move {
let IsolatedFilePathDataParts { is_dir, .. } = &entry.iso_file_path.to_parts();
let pub_id = sd_utils::uuid_to_bytes(entry.pub_id);
let should_unlink_object = if let Some(object_id) = entry.maybe_object_id {
db.file_path()
.count(vec![file_path::object_id::equals(Some(object_id))])
.exec()
.await? > 1
} else {
false
};
use file_path::*;
let (sync_params, db_params): (Vec<_>, Vec<_>) = [
// As this file was updated while Spacedrive was offline, we mark the object_id and cas_id as null
// So this file_path will be updated at file identifier job
should_unlink_object
.then_some(((object_id::NAME, msgpack!(nil)), object::disconnect())),
Some(((cas_id::NAME, msgpack!(nil)), cas_id::set(None))),
Some(sync_db_entry!(*is_dir, is_dir)),
Some(sync_db_entry!(
entry.metadata.size_in_bytes.to_be_bytes().to_vec(),
size_in_bytes_bytes
)),
Some(sync_db_entry!(inode_to_db(entry.metadata.inode), inode)),
Some({
let v = entry.metadata.created_at.into();
sync_db_entry!(v, date_created)
}),
Some({
let v = entry.metadata.modified_at.into();
sync_db_entry!(v, date_modified)
}),
Some(sync_db_entry!(entry.metadata.hidden, hidden)),
]
.into_iter()
.flatten()
.unzip();
Ok::<_, IndexerError>((
sync_params
.into_iter()
.map(|(field, value)| {
sync.shared_update(
prisma_sync::file_path::SyncId {
pub_id: pub_id.clone(),
},
field,
value,
)
})
.collect::<Vec<_>>(),
db.file_path()
.update(file_path::pub_id::equals(pub_id), db_params)
.select(file_path::select!({ id })),
))
})
.collect::<Vec<_>>()
.try_join()
.await?
.into_iter()
.unzip();
let updated = sync
.write_ops(
db,
(sync_stuff.into_iter().flatten().collect(), paths_to_update),
)
.await?;
trace!("Updated {updated:?} records");
Ok(updated.len() as i64)
}
fn iso_file_path_factory(
location_id: location::id::Type,
location_path: &Path,
) -> impl Fn(&Path, bool) -> Result<IsolatedFilePathData<'static>, IndexerError> + '_ {
move |path, is_dir| {
IsolatedFilePathData::new(location_id, location_path, path, is_dir).map_err(Into::into)
}
}
async fn remove_non_existing_file_paths(
to_remove: impl IntoIterator<Item = file_path_pub_and_cas_ids::Data>,
db: &PrismaClient,
sync: &sd_core_sync::Manager,
) -> Result<u64, IndexerError> {
let (sync_params, db_params): (Vec<_>, Vec<_>) = to_remove
.into_iter()
.map(|d| {
(
sync.shared_delete(prisma_sync::file_path::SyncId { pub_id: d.pub_id }),
d.id,
)
})
.unzip();
sync.write_ops(
db,
(
sync_params,
db.file_path()
.delete_many(vec![file_path::id::in_vec(db_params)]),
),
)
.await?;
Ok(0)
}
// TODO: Change this macro to a fn when we're able to return
// `impl Fn(Vec<file_path::WhereParam>) -> impl Future<Output = Result<Vec<file_path_walker::Data>, IndexerError>>`
// Maybe when TAITs arrive
#[macro_export]
macro_rules! file_paths_db_fetcher_fn {
($db:expr) => {{
|found_paths| async {
// Each found path is a AND with 4 terms, and SQLite has a expression tree limit of 1000 terms
// so we will use chunks of 200 just to be safe
// FIXME: Can't pass this chunks variable direct to _batch because of lifetime issues
let chunks = found_paths
.into_iter()
.chunks(200)
.into_iter()
.map(|founds| {
$db.file_path()
.find_many(vec![::prisma_client_rust::operator::or(
founds.collect::<Vec<_>>(),
)])
.select(::sd_core_prisma_helpers::file_path_walker::select())
})
.collect::<Vec<_>>();
$db._batch(chunks)
.await
.map(|fetched| fetched.into_iter().flatten().collect::<Vec<_>>())
.map_err(Into::into)
}
}};
}
// TODO: Change this macro to a fn when we're able to return
// `impl Fn(&Path, Vec<file_path::WhereParam>) -> impl Future<Output = Result<Vec<file_path_just_pub_id::Data>, IndexerError>>`
// Maybe when TAITs arrive
// FIXME: (fogodev) I was receiving this error here https://github.com/rust-lang/rust/issues/74497
#[macro_export]
macro_rules! to_remove_db_fetcher_fn {
($location_id:expr, $db:expr) => {{
|parent_iso_file_path, unique_location_id_materialized_path_name_extension_params| async {
let location_id: ::sd_prisma::prisma::location::id::Type = $location_id;
let db: &::sd_prisma::prisma::PrismaClient = $db;
let parent_iso_file_path: ::sd_core_file_path_helper::IsolatedFilePathData<
'static,
> = parent_iso_file_path;
let unique_location_id_materialized_path_name_extension_params: ::std::vec::Vec<
::sd_prisma::prisma::file_path::WhereParam,
> = unique_location_id_materialized_path_name_extension_params;
// FIXME: Can't pass this chunks variable direct to _batch because of lifetime issues
let chunks = unique_location_id_materialized_path_name_extension_params
.into_iter()
.chunks(200)
.into_iter()
.map(|unique_params| {
db.file_path()
.find_many(vec![::prisma_client_rust::operator::or(
unique_params.collect(),
)])
.select(::sd_prisma::prisma::file_path::select!({ id }))
})
.collect::<::std::vec::Vec<_>>();
let founds_ids = db._batch(chunks).await.map(|founds_chunk| {
founds_chunk
.into_iter()
.map(|file_paths| file_paths.into_iter().map(|file_path| file_path.id))
.flatten()
.collect::<::std::collections::HashSet<_>>()
})?;
// NOTE: This batch size can be increased if we wish to trade memory for more performance
const BATCH_SIZE: i64 = 1000;
let mut to_remove = vec![];
let mut cursor = 1;
loop {
let found = $db.file_path()
.find_many(vec![
::sd_prisma::prisma::file_path::location_id::equals(Some(location_id)),
::sd_prisma::prisma::file_path::materialized_path::equals(Some(
parent_iso_file_path
.materialized_path_for_children()
.expect("the received isolated file path must be from a directory"),
)),
])
.order_by(::sd_prisma::prisma::file_path::id::order(::sd_prisma::prisma::SortOrder::Asc))
.take(BATCH_SIZE)
.cursor(::sd_prisma::prisma::file_path::id::equals(cursor))
.select(::sd_prisma::prisma::file_path::select!({ id pub_id cas_id }))
.exec()
.await?;
let should_stop = (found.len() as i64) < BATCH_SIZE;
if let Some(last) = found.last() {
cursor = last.id;
} else {
break;
}
to_remove.extend(
found
.into_iter()
.filter(|file_path| !founds_ids.contains(&file_path.id))
.map(|file_path| ::sd_core_prisma_helpers::file_path_pub_and_cas_ids::Data {
id: file_path.id,
pub_id: file_path.pub_id,
cas_id: file_path.cas_id,
}),
);
if should_stop {
break;
}
}
Ok(to_remove)
}
}};
}
pub async fn reverse_update_directories_sizes(
base_path: impl AsRef<Path>,
location_id: location::id::Type,
location_path: impl AsRef<Path>,
library: &Library,
) -> Result<(), FilePathError> {
let base_path = base_path.as_ref();
let location_path = location_path.as_ref();
let Library { sync, db, .. } = library;
let ancestors = base_path
.ancestors()
.take_while(|&ancestor| ancestor != location_path)
.map(|ancestor| IsolatedFilePathData::new(location_id, location_path, ancestor, true))
.collect::<Result<Vec<_>, _>>()?;
let chunked_queries = ancestors
.iter()
.chunks(200)
.into_iter()
.map(|ancestors_iso_file_paths_chunk| {
db.file_path()
.find_many(vec![or(ancestors_iso_file_paths_chunk
.into_iter()
.map(file_path::WhereParam::from)
.collect::<Vec<_>>())])
.select(file_path::select!({ pub_id materialized_path name }))
})
.collect::<Vec<_>>();
let mut pub_id_by_ancestor_materialized_path = db
._batch(chunked_queries)
.await?
.into_iter()
.flatten()
.filter_map(
|file_path| match (file_path.materialized_path, file_path.name) {
(Some(materialized_path), Some(name)) => {
Some((format!("{materialized_path}{name}/"), (file_path.pub_id, 0)))
}
_ => {
warn!(
"Found a file_path missing its materialized_path or name: <pub_id='{:#?}'>",
from_bytes_to_uuid(&file_path.pub_id)
);
None
}
},
)
.collect::<HashMap<_, _>>();
db.file_path()
.find_many(vec![
file_path::location_id::equals(Some(location_id)),
file_path::materialized_path::in_vec(
ancestors
.iter()
.map(|ancestor_iso_file_path| {
ancestor_iso_file_path
.materialized_path_for_children()
.expect("each ancestor is a directory")
})
.collect(),
),
])
.select(file_path::select!({ materialized_path size_in_bytes_bytes }))
.exec()
.await?
.into_iter()
.for_each(|file_path| {
if let Some(materialized_path) = file_path.materialized_path {
if let Some((_, size)) =
pub_id_by_ancestor_materialized_path.get_mut(&materialized_path)
{
*size += file_path
.size_in_bytes_bytes
.map(|size_in_bytes_bytes| {
u64::from_be_bytes([
size_in_bytes_bytes[0],
size_in_bytes_bytes[1],
size_in_bytes_bytes[2],
size_in_bytes_bytes[3],
size_in_bytes_bytes[4],
size_in_bytes_bytes[5],
size_in_bytes_bytes[6],
size_in_bytes_bytes[7],
])
})
.unwrap_or_else(|| {
warn!("Got a directory missing its size in bytes");
0
});
}
} else {
warn!("Corrupt database possessing a file_path entry without materialized_path");
}
});
let to_sync_and_update = ancestors
.into_iter()
.filter_map(|ancestor_iso_file_path| {
if let Some((pub_id, size)) = pub_id_by_ancestor_materialized_path.remove(
&ancestor_iso_file_path
.materialized_path_for_children()
.expect("each ancestor is a directory"),
) {
let size_bytes = size.to_be_bytes().to_vec();
Some((
sync.shared_update(
prisma_sync::file_path::SyncId {
pub_id: pub_id.clone(),
},
file_path::size_in_bytes_bytes::NAME,
msgpack!(size_bytes.clone()),
),
db.file_path().update(
file_path::pub_id::equals(pub_id),
vec![file_path::size_in_bytes_bytes::set(Some(size_bytes))],
),
))
} else {
warn!("Got a missing ancestor for a file_path in the database, maybe we have a corruption");
None
}
})
.unzip::<_, _, Vec<_>, Vec<_>>();
sync.write_ops(db, to_sync_and_update).await?;
Ok(())
}

View file

@ -1,660 +0,0 @@
use crate::{
file_paths_db_fetcher_fn, invalidate_query,
library::Library,
location::{location_with_indexer_rules, update_location_size, ScanState},
old_job::{
CurrentStep, JobError, JobInitOutput, JobReportUpdate, JobResult, JobRunMetadata,
JobStepOutput, StatefulJob, WorkerContext,
},
to_remove_db_fetcher_fn,
};
use sd_core_file_path_helper::{
ensure_file_path_exists, ensure_sub_path_is_directory, ensure_sub_path_is_in_location,
IsolatedFilePathData,
};
use sd_core_indexer_rules::IndexerRule;
use sd_prisma::{
prisma::{file_path, location},
prisma_sync,
};
use sd_sync::*;
use sd_utils::{db::maybe_missing, from_bytes_to_uuid, msgpack};
use std::{
collections::HashMap,
hash::{Hash, Hasher},
path::{Path, PathBuf},
sync::Arc,
time::Duration,
};
use itertools::Itertools;
use prisma_client_rust::operator::or;
use serde::{Deserialize, Serialize};
use serde_json::json;
use tokio::time::Instant;
use tracing::{debug, info, warn};
use super::{
execute_indexer_save_step, execute_indexer_update_step, iso_file_path_factory,
old_walk::{keep_walking, walk, ToWalkEntry, WalkResult},
remove_non_existing_file_paths, reverse_update_directories_sizes, IndexerError,
OldIndexerJobSaveStep, OldIndexerJobUpdateStep,
};
/// BATCH_SIZE is the number of files to index at each step, writing the chunk of files metadata in the database.
const BATCH_SIZE: usize = 1000;
/// `IndexerJobInit` receives a `location::Data` object to be indexed
/// and possibly a `sub_path` to be indexed. The `sub_path` is used when
/// we want do index just a part of a location.
#[derive(Serialize, Deserialize, Debug)]
pub struct OldIndexerJobInit {
pub location: location_with_indexer_rules::Data,
pub sub_path: Option<PathBuf>,
}
impl Hash for OldIndexerJobInit {
fn hash<H: Hasher>(&self, state: &mut H) {
self.location.id.hash(state);
if let Some(ref sub_path) = self.sub_path {
sub_path.hash(state);
}
}
}
/// `IndexerJobData` contains the state of the indexer job, which includes a `location_path` that
/// is cached and casted on `PathBuf` from `local_path` column in the `location` table. It also
/// contains some metadata for logging purposes.
#[derive(Serialize, Deserialize, Debug)]
pub struct OldIndexerJobData {
location_path: PathBuf,
indexed_path: PathBuf,
indexer_rules: Vec<IndexerRule>,
}
#[derive(Serialize, Deserialize, Default, Debug)]
pub struct OldIndexerJobRunMetadata {
db_write_time: Duration,
scan_read_time: Duration,
total_paths: u64,
total_updated_paths: u64,
total_save_steps: u64,
total_update_steps: u64,
indexed_count: u64,
updated_count: u64,
removed_count: u64,
paths_and_sizes: HashMap<PathBuf, u64>,
}
impl JobRunMetadata for OldIndexerJobRunMetadata {
fn update(&mut self, new_data: Self) {
self.db_write_time += new_data.db_write_time;
self.scan_read_time += new_data.scan_read_time;
self.total_paths += new_data.total_paths;
self.total_updated_paths += new_data.total_updated_paths;
self.total_save_steps += new_data.total_save_steps;
self.total_update_steps += new_data.total_update_steps;
self.indexed_count += new_data.indexed_count;
self.removed_count += new_data.removed_count;
for (path, size) in new_data.paths_and_sizes {
*self.paths_and_sizes.entry(path).or_default() += size;
}
}
}
#[derive(Clone)]
pub enum ScanProgress {
ChunkCount(usize),
SavedChunks(usize),
UpdatedChunks(usize),
Message(String),
}
impl OldIndexerJobData {
fn on_scan_progress(ctx: &WorkerContext, progress: Vec<ScanProgress>) {
ctx.progress(
progress
.into_iter()
.map(|p| match p {
ScanProgress::ChunkCount(c) => JobReportUpdate::TaskCount(c),
ScanProgress::SavedChunks(p) | ScanProgress::UpdatedChunks(p) => {
JobReportUpdate::CompletedTaskCount(p)
}
ScanProgress::Message(m) => JobReportUpdate::Message(m),
})
.collect(),
)
}
}
/// `IndexerJobStepInput` defines the action that should be executed in the current step
#[derive(Serialize, Deserialize, Debug)]
pub enum OldIndexerJobStepInput {
Save(OldIndexerJobSaveStep),
Walk(ToWalkEntry),
Update(OldIndexerJobUpdateStep),
}
/// A `IndexerJob` is a stateful job that walks a directory and indexes all files.
/// First it walks the directory and generates a list of files to index, chunked into
/// batches of [`BATCH_SIZE`]. Then for each chunk it write the file metadata to the database.
#[async_trait::async_trait]
impl StatefulJob for OldIndexerJobInit {
type Data = OldIndexerJobData;
type Step = OldIndexerJobStepInput;
type RunMetadata = OldIndexerJobRunMetadata;
const NAME: &'static str = "indexer";
const IS_BATCHED: bool = true;
fn target_location(&self) -> location::id::Type {
self.location.id
}
/// Creates a vector of valid path buffers from a directory, chunked into batches of `BATCH_SIZE`.
async fn init(
&self,
ctx: &WorkerContext,
data: &mut Option<Self::Data>,
) -> Result<JobInitOutput<Self::RunMetadata, Self::Step>, JobError> {
let init = self;
let location_id = init.location.id;
let location_path = maybe_missing(&init.location.path, "location.path").map(Path::new)?;
let db = Arc::clone(&ctx.library.db);
let sync = &ctx.library.sync;
let indexer_rules = init
.location
.indexer_rules
.iter()
.map(|rule| IndexerRule::try_from(&rule.indexer_rule))
.collect::<Result<Vec<_>, _>>()
.map_err(IndexerError::from)?;
let to_walk_path = match &init.sub_path {
Some(sub_path) if sub_path != Path::new("") => {
let full_path = ensure_sub_path_is_in_location(location_path, sub_path)
.await
.map_err(IndexerError::from)?;
ensure_sub_path_is_directory(location_path, sub_path)
.await
.map_err(IndexerError::from)?;
ensure_file_path_exists(
sub_path,
&IsolatedFilePathData::new(location_id, location_path, &full_path, true)
.map_err(IndexerError::from)?,
&db,
IndexerError::SubPathNotFound,
)
.await?;
full_path
}
_ => location_path.to_path_buf(),
};
let scan_start = Instant::now();
let WalkResult {
walked,
to_update,
to_walk,
to_remove,
errors,
paths_and_sizes,
} = walk(
&location_path,
&to_walk_path,
&indexer_rules,
update_notifier_fn(ctx),
file_paths_db_fetcher_fn!(&db),
to_remove_db_fetcher_fn!(location_id, &db),
iso_file_path_factory(location_id, location_path),
50_000,
)
.await?;
let scan_read_time = scan_start.elapsed();
let to_remove = to_remove.collect::<Vec<_>>();
debug!(
"Walker at indexer job found {} file_paths to be removed",
to_remove.len()
);
ctx.node
.thumbnailer
.remove_indexed_cas_ids(
to_remove
.iter()
.filter_map(|file_path| file_path.cas_id.clone())
.collect::<Vec<_>>(),
ctx.library.id,
)
.await;
let db_delete_start = Instant::now();
// TODO pass these uuids to sync system
let removed_count = remove_non_existing_file_paths(to_remove, &db, sync).await?;
let db_delete_time = db_delete_start.elapsed();
let total_new_paths = &mut 0;
let total_updated_paths = &mut 0;
let to_walk_count = to_walk.len();
let to_save_chunks = &mut 0;
let to_update_chunks = &mut 0;
let steps = walked
.chunks(BATCH_SIZE)
.into_iter()
.enumerate()
.map(|(i, chunk)| {
let chunk_steps = chunk.collect::<Vec<_>>();
*total_new_paths += chunk_steps.len() as u64;
*to_save_chunks += 1;
OldIndexerJobStepInput::Save(OldIndexerJobSaveStep {
chunk_idx: i,
walked: chunk_steps,
})
})
.chain(
to_update
.chunks(BATCH_SIZE)
.into_iter()
.enumerate()
.map(|(i, chunk)| {
let chunk_updates = chunk.collect::<Vec<_>>();
*total_updated_paths += chunk_updates.len() as u64;
*to_update_chunks += 1;
OldIndexerJobStepInput::Update(OldIndexerJobUpdateStep {
chunk_idx: i,
to_update: chunk_updates,
})
}),
)
.chain(to_walk.into_iter().map(OldIndexerJobStepInput::Walk))
.collect::<Vec<_>>();
debug!("Walker at indexer job found {total_updated_paths} file_paths to be updated");
OldIndexerJobData::on_scan_progress(
ctx,
vec![
ScanProgress::ChunkCount(*to_save_chunks + *to_update_chunks),
ScanProgress::Message(format!(
"Starting saving {total_new_paths} files or directories, \
{total_updated_paths} files or directories to update, \
there still {to_walk_count} directories to index",
)),
],
);
*data = Some(OldIndexerJobData {
location_path: location_path.to_path_buf(),
indexed_path: to_walk_path,
indexer_rules,
});
Ok((
OldIndexerJobRunMetadata {
db_write_time: db_delete_time,
scan_read_time,
total_paths: *total_new_paths,
total_updated_paths: *total_updated_paths,
indexed_count: 0,
updated_count: 0,
removed_count,
total_save_steps: *to_save_chunks as u64,
total_update_steps: *to_update_chunks as u64,
paths_and_sizes,
},
steps,
errors
.into_iter()
.map(|e| format!("{e}"))
.collect::<Vec<_>>()
.into(),
)
.into())
}
/// Process each chunk of entries in the indexer job, writing to the `file_path` table
async fn execute_step(
&self,
ctx: &WorkerContext,
CurrentStep { step, .. }: CurrentStep<'_, Self::Step>,
data: &Self::Data,
run_metadata: &Self::RunMetadata,
) -> Result<JobStepOutput<Self::Step, Self::RunMetadata>, JobError> {
let init = self;
let mut new_metadata = Self::RunMetadata::default();
match step {
OldIndexerJobStepInput::Save(step) => {
let start_time = Instant::now();
OldIndexerJobData::on_scan_progress(
ctx,
vec![
ScanProgress::SavedChunks(step.chunk_idx + 1),
ScanProgress::Message(format!(
"Writing chunk {} of {} to database",
step.chunk_idx, run_metadata.total_save_steps
)),
],
);
let count = execute_indexer_save_step(&init.location, step, &ctx.library).await?;
new_metadata.indexed_count = count as u64;
new_metadata.db_write_time = start_time.elapsed();
Ok(new_metadata.into())
}
OldIndexerJobStepInput::Update(to_update) => {
let start_time = Instant::now();
OldIndexerJobData::on_scan_progress(
ctx,
vec![
ScanProgress::UpdatedChunks(to_update.chunk_idx + 1),
ScanProgress::Message(format!(
"Updating chunk {} of {} to database",
to_update.chunk_idx, run_metadata.total_save_steps
)),
],
);
let count = execute_indexer_update_step(to_update, &ctx.library).await?;
new_metadata.updated_count = count as u64;
new_metadata.db_write_time = start_time.elapsed();
Ok(new_metadata.into())
}
OldIndexerJobStepInput::Walk(to_walk_entry) => {
let location_id = init.location.id;
let location_path =
maybe_missing(&init.location.path, "location.path").map(Path::new)?;
let db = Arc::clone(&ctx.library.db);
let sync = &ctx.library.sync;
let scan_start = Instant::now();
let WalkResult {
walked,
to_update,
to_walk,
to_remove,
errors,
paths_and_sizes,
} = keep_walking(
location_path,
to_walk_entry,
&data.indexer_rules,
update_notifier_fn(ctx),
file_paths_db_fetcher_fn!(&db),
to_remove_db_fetcher_fn!(location_id, &db),
iso_file_path_factory(location_id, location_path),
)
.await?;
new_metadata.paths_and_sizes = paths_and_sizes;
new_metadata.scan_read_time = scan_start.elapsed();
let db_delete_time = Instant::now();
// TODO pass these uuids to sync system
new_metadata.removed_count =
remove_non_existing_file_paths(to_remove, &db, sync).await?;
new_metadata.db_write_time = db_delete_time.elapsed();
let to_walk_count = to_walk.len();
let more_steps = walked
.chunks(BATCH_SIZE)
.into_iter()
.enumerate()
.map(|(i, chunk)| {
let chunk_steps = chunk.collect::<Vec<_>>();
new_metadata.total_paths += chunk_steps.len() as u64;
new_metadata.total_save_steps += 1;
OldIndexerJobStepInput::Save(OldIndexerJobSaveStep {
chunk_idx: i,
walked: chunk_steps,
})
})
.chain(to_update.chunks(BATCH_SIZE).into_iter().enumerate().map(
|(i, chunk)| {
let chunk_updates = chunk.collect::<Vec<_>>();
new_metadata.total_updated_paths += chunk_updates.len() as u64;
new_metadata.total_update_steps += 1;
OldIndexerJobStepInput::Update(OldIndexerJobUpdateStep {
chunk_idx: i,
to_update: chunk_updates,
})
},
))
.chain(to_walk.into_iter().map(OldIndexerJobStepInput::Walk))
.collect::<Vec<_>>();
OldIndexerJobData::on_scan_progress(
ctx,
vec![
ScanProgress::ChunkCount(more_steps.len() - to_walk_count),
ScanProgress::Message(format!(
"Scanned {} more files or directories; \
{} more directories to scan and {} more entries to update",
new_metadata.total_paths,
to_walk_count,
new_metadata.total_updated_paths
)),
],
);
Ok((
more_steps,
new_metadata,
errors
.into_iter()
.map(|e| format!("{e}"))
.collect::<Vec<_>>()
.into(),
)
.into())
}
}
}
async fn finalize(
&self,
ctx: &WorkerContext,
data: &Option<Self::Data>,
run_metadata: &Self::RunMetadata,
) -> JobResult {
let init = self;
let indexed_path_str = data
.as_ref()
.map(|data| Ok(data.indexed_path.to_string_lossy().to_string()))
.unwrap_or_else(|| maybe_missing(&init.location.path, "location.path").cloned())?;
info!(
"Scan of {indexed_path_str} completed in {:?}. {} new files found, \
indexed {} files in db, updated {} entries. db write completed in {:?}",
run_metadata.scan_read_time,
run_metadata.total_paths,
run_metadata.indexed_count,
run_metadata.total_updated_paths,
run_metadata.db_write_time,
);
if run_metadata.indexed_count > 0 || run_metadata.removed_count > 0 {
invalidate_query!(ctx.library, "search.paths");
}
if run_metadata.total_updated_paths > 0 {
// Invoking orphan remover here as we probably have some orphans objects due to updates
// ctx.library.orphan_remover.invoke().await;
}
if run_metadata.indexed_count > 0
|| run_metadata.removed_count > 0
|| run_metadata.updated_count > 0
{
if let Some(data) = data {
update_directories_sizes(
&run_metadata.paths_and_sizes,
init.location.id,
&data.indexed_path,
&ctx.library,
)
.await?;
if data.indexed_path != data.location_path {
reverse_update_directories_sizes(
&data.indexed_path,
init.location.id,
&data.location_path,
&ctx.library,
)
.await
.map_err(IndexerError::from)?;
}
update_location_size(init.location.id, &ctx.library)
.await
.map_err(IndexerError::from)?;
ctx.library
.db
.location()
.update(
location::id::equals(init.location.id),
vec![location::scan_state::set(ScanState::Indexed as i32)],
)
.exec()
.await
.map_err(IndexerError::from)?;
}
}
// FIXME(fogodev): This is currently a workaround to don't save paths and sizes in the
// metadata after a job is completed, as it's pretty heavy. A proper fix isn't needed
// right now as I already changed it in the new indexer job. And this old one
// will be removed eventually.
let run_metadata = Self::RunMetadata {
db_write_time: run_metadata.db_write_time,
scan_read_time: run_metadata.scan_read_time,
total_paths: run_metadata.total_paths,
total_updated_paths: run_metadata.total_updated_paths,
total_save_steps: run_metadata.total_save_steps,
total_update_steps: run_metadata.total_update_steps,
indexed_count: run_metadata.indexed_count,
updated_count: run_metadata.updated_count,
removed_count: run_metadata.removed_count,
paths_and_sizes: HashMap::new(),
};
Ok(Some(json!({"init: ": init, "run_metadata": run_metadata})))
}
}
fn update_notifier_fn(ctx: &WorkerContext) -> impl FnMut(&Path, usize) + '_ {
move |path, total_entries| {
OldIndexerJobData::on_scan_progress(
ctx,
vec![ScanProgress::Message(format!(
"{total_entries} entries found at {}",
path.display()
))],
);
}
}
async fn update_directories_sizes(
paths_and_sizes: &HashMap<PathBuf, u64>,
location_id: location::id::Type,
location_path: impl AsRef<Path>,
library: &Library,
) -> Result<(), IndexerError> {
let location_path = location_path.as_ref();
let Library { db, sync, .. } = library;
let chunked_queries = paths_and_sizes
.keys()
.chunks(200)
.into_iter()
.map(|paths_chunk| {
paths_chunk
.into_iter()
.map(|path| {
IsolatedFilePathData::new(location_id, location_path, path, true)
.map(file_path::WhereParam::from)
})
.collect::<Result<Vec<_>, _>>()
.map(|params| {
db.file_path()
.find_many(vec![or(params)])
.select(file_path::select!({ pub_id materialized_path name }))
})
})
.collect::<Result<Vec<_>, _>>()?;
let to_sync_and_update = db
._batch(chunked_queries)
.await?
.into_iter()
.flatten()
.filter_map(
|file_path| match (file_path.materialized_path, file_path.name) {
(Some(materialized_path), Some(name)) => {
let mut directory_full_path = location_path.join(&materialized_path[1..]);
directory_full_path.push(name);
if let Some(size) = paths_and_sizes.get(&directory_full_path) {
let size_bytes = size.to_be_bytes().to_vec();
Some((
sync.shared_update(
prisma_sync::file_path::SyncId {
pub_id: file_path.pub_id.clone(),
},
file_path::size_in_bytes_bytes::NAME,
msgpack!(size_bytes.clone()),
),
db.file_path().update(
file_path::pub_id::equals(file_path.pub_id),
vec![file_path::size_in_bytes_bytes::set(Some(size_bytes))],
),
))
} else {
warn!("Found a file_path without ancestor in the database, possible corruption");
None
}
}
_ => {
warn!(
"Found a file_path missing its materialized_path or name: <pub_id='{:#?}'>",
from_bytes_to_uuid(&file_path.pub_id)
);
None
}
},
)
.unzip::<_, _, Vec<_>, Vec<_>>();
sync.write_ops(db, to_sync_and_update).await?;
Ok(())
}

View file

@ -1,197 +0,0 @@
use crate::{
file_paths_db_fetcher_fn, invalidate_query,
library::Library,
location::{
indexer::{
execute_indexer_update_step, reverse_update_directories_sizes, OldIndexerJobUpdateStep,
},
scan_location_sub_path, update_location_size,
},
old_job::JobError,
to_remove_db_fetcher_fn, Node,
};
use sd_core_file_path_helper::{
check_file_path_exists, ensure_sub_path_is_directory, ensure_sub_path_is_in_location,
IsolatedFilePathData,
};
use sd_core_indexer_rules::IndexerRule;
use sd_utils::db::maybe_missing;
use std::{
collections::HashSet,
path::{Path, PathBuf},
sync::Arc,
};
use futures::future::join_all;
use itertools::Itertools;
use tracing::{debug, error};
use super::{
execute_indexer_save_step, iso_file_path_factory, location_with_indexer_rules,
old_walk::walk_single_dir, remove_non_existing_file_paths, IndexerError, OldIndexerJobSaveStep,
};
/// BATCH_SIZE is the number of files to index at each step, writing the chunk of files metadata in the database.
const BATCH_SIZE: usize = 1000;
pub async fn old_shallow(
location: &location_with_indexer_rules::Data,
sub_path: &PathBuf,
node: &Arc<Node>,
library: &Arc<Library>,
) -> Result<(), JobError> {
let location_id = location.id;
let location_path = maybe_missing(&location.path, "location.path").map(Path::new)?;
let db = library.db.clone();
let sync = &library.sync;
let indexer_rules = location
.indexer_rules
.iter()
.map(|rule| IndexerRule::try_from(&rule.indexer_rule))
.collect::<Result<Vec<_>, _>>()
.map_err(IndexerError::from)?;
let (add_root, to_walk_path) = if sub_path != Path::new("") && sub_path != Path::new("/") {
let full_path = ensure_sub_path_is_in_location(&location_path, &sub_path)
.await
.map_err(IndexerError::from)?;
ensure_sub_path_is_directory(&location_path, &sub_path)
.await
.map_err(IndexerError::from)?;
(
!check_file_path_exists::<IndexerError>(
&IsolatedFilePathData::new(location_id, location_path, &full_path, true)
.map_err(IndexerError::from)?,
&db,
)
.await?,
full_path,
)
} else {
(false, location_path.to_path_buf())
};
let (walked, to_update, to_remove, errors, _s) = {
walk_single_dir(
location_path,
&to_walk_path,
&indexer_rules,
file_paths_db_fetcher_fn!(&db),
to_remove_db_fetcher_fn!(location_id, &db),
iso_file_path_factory(location_id, location_path),
add_root,
)
.await?
};
let to_remove_count = to_remove.len();
node.thumbnailer
.remove_indexed_cas_ids(
to_remove
.iter()
.filter_map(|file_path| file_path.cas_id.clone())
.collect::<Vec<_>>(),
library.id,
)
.await;
errors.into_iter().for_each(|e| error!("{e}"));
remove_non_existing_file_paths(to_remove, &db, sync).await?;
let mut new_directories_to_scan = HashSet::new();
let mut to_create_count = 0;
let save_steps = walked
.chunks(BATCH_SIZE)
.into_iter()
.enumerate()
.map(|(i, chunk)| {
let walked = chunk.collect::<Vec<_>>();
to_create_count += walked.len();
walked
.iter()
.filter_map(|walked_entry| {
walked_entry.iso_file_path.materialized_path_for_children()
})
.for_each(|new_dir| {
new_directories_to_scan.insert(new_dir);
});
OldIndexerJobSaveStep {
chunk_idx: i,
walked,
}
})
.collect::<Vec<_>>();
for step in save_steps {
execute_indexer_save_step(location, &step, library).await?;
}
for scan in join_all(
new_directories_to_scan
.into_iter()
.map(|sub_path| scan_location_sub_path(node, library, location.clone(), sub_path)),
)
.await
{
if let Err(e) = scan {
error!("{e}");
}
}
let mut to_update_count = 0;
let update_steps = to_update
.chunks(BATCH_SIZE)
.into_iter()
.enumerate()
.map(|(i, chunk)| {
let to_update = chunk.collect::<Vec<_>>();
to_update_count += to_update.len();
OldIndexerJobUpdateStep {
chunk_idx: i,
to_update,
}
})
.collect::<Vec<_>>();
for step in update_steps {
execute_indexer_update_step(&step, library).await?;
}
debug!(
"Walker at shallow indexer found: \
To create: {to_create_count}; To update: {to_update_count}; To remove: {to_remove_count};"
);
if to_create_count > 0 || to_update_count > 0 || to_remove_count > 0 {
if to_walk_path != location_path {
reverse_update_directories_sizes(to_walk_path, location_id, location_path, library)
.await
.map_err(IndexerError::from)?;
}
update_location_size(location.id, library)
.await
.map_err(IndexerError::from)?;
invalidate_query!(library, "search.paths");
invalidate_query!(library, "search.objects");
}
// library.orphan_remover.invoke().await;
Ok(())
}

File diff suppressed because it is too large Load diff

View file

@ -1,281 +0,0 @@
use crate::{
library::{Library, LibraryId},
Node,
};
use sd_prisma::prisma::location;
use sd_utils::db::maybe_missing;
use std::{
collections::{HashMap, HashSet},
path::{Path, PathBuf},
sync::Arc,
time::Duration,
};
use tokio::{fs, io::ErrorKind, sync::oneshot, time::sleep};
use tracing::{error, warn};
use uuid::Uuid;
use super::{watcher::LocationWatcher, LocationManagerError};
type LocationAndLibraryKey = (location::id::Type, LibraryId);
const LOCATION_CHECK_INTERVAL: Duration = Duration::from_secs(5);
pub(super) async fn check_online(
location: &location::Data,
node: &Node,
library: &Library,
) -> Result<bool, LocationManagerError> {
let pub_id = Uuid::from_slice(&location.pub_id)?;
let location_path = maybe_missing(&location.path, "location.path").map(Path::new)?;
// TODO(N): This isn't gonna work with removable media and this will likely permanently break if the DB is restored from a backup.
if location.instance_id == Some(library.config().await.instance_id) {
match fs::metadata(&location_path).await {
Ok(_) => {
node.locations.add_online(pub_id).await;
Ok(true)
}
Err(e) if e.kind() == ErrorKind::NotFound => {
node.locations.remove_online(&pub_id).await;
Ok(false)
}
Err(e) => {
error!("Failed to check if location is online: {:#?}", e);
Ok(false)
}
}
} else {
// In this case, we don't have a `local_path`, but this location was marked as online
node.locations.remove_online(&pub_id).await;
Err(LocationManagerError::NonLocalLocation(location.id))
}
}
pub(super) async fn location_check_sleep(
location_id: location::id::Type,
library: Arc<Library>,
) -> (location::id::Type, Arc<Library>) {
sleep(LOCATION_CHECK_INTERVAL).await;
(location_id, library)
}
pub(super) fn watch_location(
location: location::Data,
library_id: LibraryId,
locations_watched: &mut HashMap<LocationAndLibraryKey, LocationWatcher>,
locations_unwatched: &mut HashMap<LocationAndLibraryKey, LocationWatcher>,
) {
let location_id = location.id;
let location_path = location.path.as_ref();
let Some(location_path) = location_path.map(Path::new) else {
return;
};
if let Some(mut watcher) = locations_unwatched.remove(&(location_id, library_id)) {
if watcher.check_path(location_path) {
watcher.watch();
}
locations_watched.insert((location_id, library_id), watcher);
}
}
pub(super) fn unwatch_location(
location: location::Data,
library_id: LibraryId,
locations_watched: &mut HashMap<LocationAndLibraryKey, LocationWatcher>,
locations_unwatched: &mut HashMap<LocationAndLibraryKey, LocationWatcher>,
) {
let location_id = location.id;
let location_path = location.path.as_ref();
let Some(location_path) = location_path.map(Path::new) else {
return;
};
if let Some(mut watcher) = locations_watched.remove(&(location_id, library_id)) {
if watcher.check_path(location_path) {
watcher.unwatch();
}
locations_unwatched.insert((location_id, library_id), watcher);
}
}
pub(super) fn drop_location(
location_id: location::id::Type,
library_id: LibraryId,
message: &str,
locations_watched: &mut HashMap<LocationAndLibraryKey, LocationWatcher>,
locations_unwatched: &mut HashMap<LocationAndLibraryKey, LocationWatcher>,
) {
warn!("{message}: <id='{location_id}', library_id='{library_id}'>",);
if let Some(mut watcher) = locations_watched.remove(&(location_id, library_id)) {
watcher.unwatch();
} else {
locations_unwatched.remove(&(location_id, library_id));
}
}
pub(super) async fn get_location(
location_id: location::id::Type,
library: &Library,
) -> Option<location::Data> {
library
.db
.location()
.find_unique(location::id::equals(location_id))
.exec()
.await
.unwrap_or_else(|err| {
error!("Failed to get location data from location_id: {:#?}", err);
None
})
}
pub(super) async fn handle_remove_location_request(
location_id: location::id::Type,
library: Arc<Library>,
response_tx: oneshot::Sender<Result<(), LocationManagerError>>,
forced_unwatch: &mut HashSet<LocationAndLibraryKey>,
locations_watched: &mut HashMap<LocationAndLibraryKey, LocationWatcher>,
locations_unwatched: &mut HashMap<LocationAndLibraryKey, LocationWatcher>,
to_remove: &mut HashSet<LocationAndLibraryKey>,
) {
let key = (location_id, library.id);
if let Some(location) = get_location(location_id, &library).await {
// TODO(N): This isn't gonna work with removable media and this will likely permanently break if the DB is restored from a backup.
if location.instance_id == Some(library.config().await.instance_id) {
unwatch_location(location, library.id, locations_watched, locations_unwatched);
locations_unwatched.remove(&key);
forced_unwatch.remove(&key);
} else {
drop_location(
location_id,
library.id,
"Dropping location from location manager, because we don't have a `local_path` anymore",
locations_watched,
locations_unwatched
);
}
} else {
drop_location(
location_id,
library.id,
"Removing location from manager, as we failed to fetch from db",
locations_watched,
locations_unwatched,
);
}
// Marking location as removed, so we don't try to check it when the time comes
to_remove.insert(key);
let _ = response_tx.send(Ok(())); // ignore errors, we handle errors on receiver
}
pub(super) async fn handle_stop_watcher_request(
location_id: location::id::Type,
library: Arc<Library>,
response_tx: oneshot::Sender<Result<(), LocationManagerError>>,
forced_unwatch: &mut HashSet<LocationAndLibraryKey>,
locations_watched: &mut HashMap<LocationAndLibraryKey, LocationWatcher>,
locations_unwatched: &mut HashMap<LocationAndLibraryKey, LocationWatcher>,
) {
async fn inner(
location_id: location::id::Type,
library: Arc<Library>,
forced_unwatch: &mut HashSet<LocationAndLibraryKey>,
locations_watched: &mut HashMap<LocationAndLibraryKey, LocationWatcher>,
locations_unwatched: &mut HashMap<LocationAndLibraryKey, LocationWatcher>,
) -> Result<(), LocationManagerError> {
let key = (location_id, library.id);
if !forced_unwatch.contains(&key) && locations_watched.contains_key(&key) {
get_location(location_id, &library)
.await
.ok_or_else(|| LocationManagerError::FailedToStopOrReinitWatcher {
reason: String::from("failed to fetch location from db"),
})
.map(|location| {
unwatch_location(location, library.id, locations_watched, locations_unwatched);
forced_unwatch.insert(key);
})
} else {
Ok(())
}
}
let _ = response_tx.send(
inner(
location_id,
library,
forced_unwatch,
locations_watched,
locations_unwatched,
)
.await,
); // ignore errors, we handle errors on receiver
}
pub(super) async fn handle_reinit_watcher_request(
location_id: location::id::Type,
library: Arc<Library>,
response_tx: oneshot::Sender<Result<(), LocationManagerError>>,
forced_unwatch: &mut HashSet<LocationAndLibraryKey>,
locations_watched: &mut HashMap<LocationAndLibraryKey, LocationWatcher>,
locations_unwatched: &mut HashMap<LocationAndLibraryKey, LocationWatcher>,
) {
async fn inner(
location_id: location::id::Type,
library: Arc<Library>,
forced_unwatch: &mut HashSet<LocationAndLibraryKey>,
locations_watched: &mut HashMap<LocationAndLibraryKey, LocationWatcher>,
locations_unwatched: &mut HashMap<LocationAndLibraryKey, LocationWatcher>,
) -> Result<(), LocationManagerError> {
let key = (location_id, library.id);
if forced_unwatch.contains(&key) && locations_unwatched.contains_key(&key) {
get_location(location_id, &library)
.await
.ok_or_else(|| LocationManagerError::FailedToStopOrReinitWatcher {
reason: String::from("failed to fetch location from db"),
})
.map(|location| {
watch_location(location, library.id, locations_watched, locations_unwatched);
forced_unwatch.remove(&key);
})
} else {
Ok(())
}
}
let _ = response_tx.send(
inner(
location_id,
library,
forced_unwatch,
locations_watched,
locations_unwatched,
)
.await,
); // ignore errors, we handle errors on receiver
}
pub(super) fn handle_ignore_path_request(
location_id: location::id::Type,
library: Arc<Library>,
path: PathBuf,
ignore: bool,
response_tx: oneshot::Sender<Result<(), LocationManagerError>>,
locations_watched: &HashMap<LocationAndLibraryKey, LocationWatcher>,
) {
let _ = response_tx.send(
if let Some(watcher) = locations_watched.get(&(location_id, library.id)) {
watcher.ignore_path(path, ignore)
} else {
Ok(())
},
); // ignore errors, we handle errors on receiver
}

View file

@ -1,6 +1,5 @@
use crate::{
library::{Library, LibraryManagerEvent},
old_job::JobManagerError,
Node,
};
@ -15,19 +14,22 @@ use std::{
sync::Arc,
};
use async_channel as chan;
use futures::executor::block_on;
use thiserror::Error;
use tokio::sync::{
broadcast::{self, Receiver},
mpsc, oneshot, RwLock,
use tokio::{
spawn,
sync::{
broadcast::{self, Receiver},
oneshot, RwLock,
},
};
use tracing::{debug, error};
use tracing::{debug, error, instrument, trace};
use uuid::Uuid;
mod runner;
mod watcher;
mod helpers;
#[derive(Clone, Copy, Debug)]
enum ManagementMessageAction {
Add,
@ -39,13 +41,13 @@ pub struct LocationManagementMessage {
location_id: location::id::Type,
library: Arc<Library>,
action: ManagementMessageAction,
response_tx: oneshot::Sender<Result<(), LocationManagerError>>,
ack: oneshot::Sender<Result<(), LocationManagerError>>,
}
#[derive(Debug)]
enum WatcherManagementMessageAction {
Stop,
Reinit,
Pause,
Resume,
IgnoreEventsForPath { path: PathBuf, ignore: bool },
}
@ -54,51 +56,42 @@ pub struct WatcherManagementMessage {
location_id: location::id::Type,
library: Arc<Library>,
action: WatcherManagementMessageAction,
response_tx: oneshot::Sender<Result<(), LocationManagerError>>,
ack: oneshot::Sender<Result<(), LocationManagerError>>,
}
#[derive(Error, Debug)]
pub enum LocationManagerError {
#[error("Unable to send location management message to location manager actor: (error: {0})")]
ActorSendLocationError(#[from] mpsc::error::SendError<LocationManagementMessage>),
#[error("location not found in database: <id={0}>")]
LocationNotFound(location::id::Type),
#[error("Unable to send path to be ignored by watcher actor: (error: {0})")]
ActorIgnorePathError(#[from] mpsc::error::SendError<watcher::IgnorePath>),
#[error("watcher error: {0}")]
Watcher(#[from] notify::Error),
#[error("Unable to watcher management message to watcher manager actor: (error: {0})")]
ActorIgnorePathMessageError(#[from] mpsc::error::SendError<WatcherManagementMessage>),
#[error("Unable to receive actor response: (error: {0})")]
ActorResponseError(#[from] oneshot::error::RecvError),
#[error("Watcher error: (error: {0})")]
WatcherError(#[from] notify::Error),
#[error("Failed to stop or reinit a watcher: {reason}")]
FailedToStopOrReinitWatcher { reason: String },
#[error("Missing location from database: <id='{0}'>")]
MissingLocation(location::id::Type),
#[error("Non local location: <id='{0}'>")]
#[error("non local location: <id='{0}'>")]
NonLocalLocation(location::id::Type),
#[error("failed to move file '{}' for reason: {reason}", .path.display())]
MoveError { path: Box<Path>, reason: String },
#[error("file still exists on disk after remove event received: <path='{}'>", .0.display())]
FileStillExistsOnDisk(Box<Path>),
#[error("Tried to update a non-existing file: <path='{0}'>")]
UpdateNonExistingFile(PathBuf),
#[error("Database error: {0}")]
#[error("failed to move file '{}' for reason: {reason}", .path.display())]
MoveError {
path: Box<Path>,
reason: &'static str,
},
#[error("database error: {0}")]
Database(#[from] prisma_client_rust::QueryError),
#[error("File path related error (error: {0})")]
FilePath(#[from] FilePathError),
#[error("Corrupted location pub_id on database: (error: {0})")]
#[error("corrupted location pub_id on database: {0}")]
CorruptedLocationPubId(#[from] uuid::Error),
#[error("Job Manager error: (error: {0})")]
JobManager(#[from] JobManagerError),
#[error("missing-field")]
#[error("missing field: {0}")]
MissingField(#[from] MissingFieldError),
#[error(transparent)]
FilePath(#[from] FilePathError),
#[error(transparent)]
IndexerRuler(#[from] sd_core_indexer_rules::Error),
#[error(transparent)]
JobSystem(#[from] sd_core_heavy_lifting::Error),
#[error(transparent)]
FileIO(#[from] FileIOError),
}
@ -107,20 +100,18 @@ type OnlineLocations = BTreeSet<Vec<u8>>;
#[must_use = "'LocationManagerActor::start' must be used to start the actor"]
pub struct LocationManagerActor {
location_management_rx: mpsc::Receiver<LocationManagementMessage>,
watcher_management_rx: mpsc::Receiver<WatcherManagementMessage>,
stop_rx: oneshot::Receiver<()>,
location_management_rx: chan::Receiver<LocationManagementMessage>,
watcher_management_rx: chan::Receiver<WatcherManagementMessage>,
stop_rx: chan::Receiver<()>,
}
impl LocationManagerActor {
pub fn start(self, node: Arc<Node>) {
tokio::spawn({
spawn({
let node = node.clone();
let rx = node.libraries.rx.clone();
async move {
if let Err(err) = rx
if let Err(e) = rx
.subscribe(|event| {
let node = node.clone();
async move {
@ -134,17 +125,18 @@ impl LocationManagerActor {
.await
.unwrap_or_else(|e| {
error!(
"Failed to get locations from database for location manager: {:#?}",
e
);
?e,
"Failed to get locations from database for location manager;",
);
vec![]
}) {
if let Err(e) =
node.locations.add(location.id, library.clone()).await
{
error!(
"Failed to add location to location manager: {:#?}",
e
?e,
"Failed to add location to location manager;",
);
}
}
@ -160,17 +152,46 @@ impl LocationManagerActor {
})
.await
{
error!("Core may become unstable! LocationManager's library manager subscription aborted with error: {err:?}");
error!(
?e,
"Core may become unstable! LocationManager's \
library manager subscription aborted with error;",
);
}
}
});
tokio::spawn(Locations::run_locations_checker(
self.location_management_rx,
self.watcher_management_rx,
self.stop_rx,
node,
));
spawn({
let node = Arc::clone(&node);
let Self {
location_management_rx,
watcher_management_rx,
stop_rx,
} = self;
async move {
while let Err(e) = spawn({
runner::run(
location_management_rx.clone(),
watcher_management_rx.clone(),
stop_rx.clone(),
Arc::clone(&node),
)
})
.await
{
if e.is_panic() {
error!(?e, "Location manager panicked;");
} else {
trace!("Location manager received shutdown signal and will exit...");
break;
}
trace!("Restarting location manager processing task...");
}
debug!("Location manager gracefully shutdown");
}
});
}
}
@ -178,64 +199,62 @@ pub struct Locations {
online_locations: RwLock<OnlineLocations>,
pub online_tx: broadcast::Sender<OnlineLocations>,
location_management_tx: mpsc::Sender<LocationManagementMessage>,
location_management_tx: chan::Sender<LocationManagementMessage>,
watcher_management_tx: mpsc::Sender<WatcherManagementMessage>,
stop_tx: Option<oneshot::Sender<()>>,
watcher_management_tx: chan::Sender<WatcherManagementMessage>,
stop_tx: chan::Sender<()>,
}
impl Locations {
pub fn new() -> (Self, LocationManagerActor) {
let online_tx = broadcast::channel(16).0;
let (location_management_tx, location_management_rx) = chan::bounded(128);
let (watcher_management_tx, watcher_management_rx) = chan::bounded(128);
let (stop_tx, stop_rx) = chan::bounded(1);
{
let (location_management_tx, location_management_rx) = mpsc::channel(128);
let (watcher_management_tx, watcher_management_rx) = mpsc::channel(128);
let (stop_tx, stop_rx) = oneshot::channel();
debug!("Starting location manager actor");
debug!("Starting location manager actor");
(
Self {
online_locations: Default::default(),
online_tx,
location_management_tx,
watcher_management_tx,
stop_tx: Some(stop_tx),
},
LocationManagerActor {
location_management_rx,
watcher_management_rx,
stop_rx,
},
)
}
(
Self {
online_locations: Default::default(),
online_tx: broadcast::channel(16).0,
location_management_tx,
watcher_management_tx,
stop_tx,
},
LocationManagerActor {
location_management_rx,
watcher_management_rx,
stop_rx,
},
)
}
#[instrument(skip(self, library), fields(library_id = %library.id), err)]
#[inline]
#[allow(unused_variables)]
async fn location_management_message(
&self,
location_id: location::id::Type,
library: Arc<Library>,
action: ManagementMessageAction,
) -> Result<(), LocationManagerError> {
{
let (tx, rx) = oneshot::channel();
debug!("Sending location management message to location manager actor: {action:?}");
let (tx, rx) = oneshot::channel();
trace!("Sending location management message to location manager actor");
self.location_management_tx
.send(LocationManagementMessage {
location_id,
library,
action,
response_tx: tx,
})
.await?;
self.location_management_tx
.send(LocationManagementMessage {
location_id,
library,
action,
ack: tx,
})
.await
.expect("Location manager actor channel closed sending new location message");
rx.await?
}
rx.await
.expect("Ack channel closed for location management message response")
}
#[instrument(skip(self, library), fields(library_id = %library.id), err)]
#[inline]
#[allow(unused_variables)]
async fn watcher_management_message(
@ -244,22 +263,21 @@ impl Locations {
library: Arc<Library>,
action: WatcherManagementMessageAction,
) -> Result<(), LocationManagerError> {
{
let (tx, rx) = oneshot::channel();
let (tx, rx) = oneshot::channel();
trace!("Sending watcher management message to location manager actor");
debug!("Sending watcher management message to location manager actor: {action:?}");
self.watcher_management_tx
.send(WatcherManagementMessage {
location_id,
library,
action,
ack: tx,
})
.await
.expect("Location manager actor channel closed sending new watcher message");
self.watcher_management_tx
.send(WatcherManagementMessage {
location_id,
library,
action,
response_tx: tx,
})
.await?;
rx.await?
}
rx.await
.expect("Ack channel closed for watcher management message response")
}
pub async fn add(
@ -280,16 +298,16 @@ impl Locations {
.await
}
pub async fn stop_watcher(
pub async fn pause_watcher(
&self,
location_id: location::id::Type,
library: Arc<Library>,
) -> Result<(), LocationManagerError> {
self.watcher_management_message(location_id, library, WatcherManagementMessageAction::Stop)
self.watcher_management_message(location_id, library, WatcherManagementMessageAction::Pause)
.await
}
pub async fn reinit_watcher(
pub async fn resume_watcher(
&self,
location_id: location::id::Type,
library: Arc<Library>,
@ -297,19 +315,19 @@ impl Locations {
self.watcher_management_message(
location_id,
library,
WatcherManagementMessageAction::Reinit,
WatcherManagementMessageAction::Resume,
)
.await
}
pub async fn temporary_stop(
pub async fn temporary_watcher_pause(
&self,
location_id: location::id::Type,
library: Arc<Library>,
) -> Result<StopWatcherGuard, LocationManagerError> {
self.stop_watcher(location_id, library.clone()).await?;
) -> Result<PauseWatcherGuard<'_>, LocationManagerError> {
self.pause_watcher(location_id, library.clone()).await?;
Ok(StopWatcherGuard {
Ok(PauseWatcherGuard {
location_id,
library: Some(library),
manager: self,
@ -320,8 +338,8 @@ impl Locations {
&self,
location_id: location::id::Type,
library: Arc<Library>,
path: impl AsRef<Path>,
) -> Result<IgnoreEventsForPathGuard, LocationManagerError> {
path: impl AsRef<Path> + Send,
) -> Result<IgnoreEventsForPathGuard<'_>, LocationManagerError> {
let path = path.as_ref().to_path_buf();
self.watcher_management_message(
@ -342,217 +360,6 @@ impl Locations {
})
}
async fn run_locations_checker(
mut location_management_rx: mpsc::Receiver<LocationManagementMessage>,
mut watcher_management_rx: mpsc::Receiver<WatcherManagementMessage>,
mut stop_rx: oneshot::Receiver<()>,
node: Arc<Node>,
) -> Result<(), LocationManagerError> {
use std::collections::{HashMap, HashSet};
use futures::stream::{FuturesUnordered, StreamExt};
use tokio::select;
use tracing::warn;
use helpers::{
check_online, drop_location, get_location, handle_ignore_path_request,
handle_reinit_watcher_request, handle_remove_location_request,
handle_stop_watcher_request, location_check_sleep, unwatch_location, watch_location,
};
use watcher::LocationWatcher;
let mut to_check_futures = FuturesUnordered::new();
let mut to_remove = HashSet::new();
let mut locations_watched = HashMap::new();
let mut locations_unwatched = HashMap::new();
let mut forced_unwatch = HashSet::new();
loop {
select! {
// Location management messages
Some(LocationManagementMessage{
location_id,
library,
action,
response_tx
}) = location_management_rx.recv() => {
match action {
// To add a new location
ManagementMessageAction::Add => {
response_tx.send(
if let Some(location) = get_location(location_id, &library).await {
match check_online(&location, &node, &library).await {
Ok(is_online) => {
LocationWatcher::new(location, library.clone(), node.clone())
.await
.map(|mut watcher| {
if is_online {
watcher.watch();
locations_watched.insert(
(location_id, library.id),
watcher
);
debug!("Location {location_id} is online, watching it");
// info!("Locations watched: {:#?}", locations_watched);
} else {
locations_unwatched.insert(
(location_id, library.id),
watcher
);
}
to_check_futures.push(
location_check_sleep(location_id, library)
);
}
)
},
Err(e) => {
error!("Error while checking online status of location {location_id}: {e}");
Ok(()) // TODO: Probs should be error but that will break startup when location is offline
}
}
} else {
warn!(
"Location not found in database to be watched: {}",
location_id
);
Ok(()) // TODO: Probs should be error but that will break startup when location is offline
}).ok(); // ignore errors, we handle errors on receiver
},
// To remove an location
ManagementMessageAction::Remove => {
handle_remove_location_request(
location_id,
library,
response_tx,
&mut forced_unwatch,
&mut locations_watched,
&mut locations_unwatched,
&mut to_remove,
).await;
},
}
}
// Watcher management messages
Some(WatcherManagementMessage{
location_id,
library,
action,
response_tx,
}) = watcher_management_rx.recv() => {
match action {
// To stop a watcher
WatcherManagementMessageAction::Stop => {
handle_stop_watcher_request(
location_id,
library,
response_tx,
&mut forced_unwatch,
&mut locations_watched,
&mut locations_unwatched,
).await;
},
// To reinit a stopped watcher
WatcherManagementMessageAction::Reinit => {
handle_reinit_watcher_request(
location_id,
library,
response_tx,
&mut forced_unwatch,
&mut locations_watched,
&mut locations_unwatched,
).await;
},
// To ignore or not events for a path
WatcherManagementMessageAction::IgnoreEventsForPath { path, ignore } => {
handle_ignore_path_request(
location_id,
library,
path,
ignore,
response_tx,
&locations_watched,
);
},
}
}
// Periodically checking locations
Some((location_id, library)) = to_check_futures.next() => {
let key = (location_id, library.id);
if to_remove.contains(&key) {
// The time to check came for an already removed library, so we just ignore it
to_remove.remove(&key);
} else if let Some(location) = get_location(location_id, &library).await {
// TODO(N): This isn't gonna work with removable media and this will likely permanently break if the DB is restored from a backup.
if location.instance_id == Some(library.config().await.instance_id) {
let is_online = match check_online(&location, &node, &library).await {
Ok(is_online) => is_online,
Err(e) => {
error!("Error while checking online status of location {location_id}: {e}");
continue;
}
};
if is_online
&& !forced_unwatch.contains(&key)
{
watch_location(
location,
library.id,
&mut locations_watched,
&mut locations_unwatched,
);
} else {
unwatch_location(
location,
library.id,
&mut locations_watched,
&mut locations_unwatched,
);
}
to_check_futures.push(location_check_sleep(location_id, library));
} else {
drop_location(
location_id,
library.id,
"Dropping location from location manager, because \
it isn't a location in the current node",
&mut locations_watched,
&mut locations_unwatched
);
forced_unwatch.remove(&key);
}
} else {
drop_location(
location_id,
library.id,
"Removing location from manager, as we failed to fetch from db",
&mut locations_watched,
&mut locations_unwatched,
);
forced_unwatch.remove(&key);
}
}
_ = &mut stop_rx => {
debug!("Stopping location manager");
break;
}
}
}
Ok(())
}
pub async fn is_online(&self, id: &Uuid) -> bool {
let online_locations = self.online_locations.read().await;
online_locations.iter().any(|v| v == id.as_bytes())
@ -591,29 +398,28 @@ impl Locations {
impl Drop for Locations {
fn drop(&mut self) {
if let Some(stop_tx) = self.stop_tx.take() {
if stop_tx.send(()).is_err() {
error!("Failed to send stop signal to location manager");
}
// SAFETY: This will never block as we only have 1 sender and this channel has 1 slot
if self.stop_tx.send_blocking(()).is_err() {
error!("Failed to send stop signal to location manager");
}
}
}
#[must_use = "this `StopWatcherGuard` must be held for some time, so the watcher is stopped"]
pub struct StopWatcherGuard<'m> {
pub struct PauseWatcherGuard<'m> {
manager: &'m Locations,
location_id: location::id::Type,
library: Option<Arc<Library>>,
}
impl Drop for StopWatcherGuard<'_> {
impl Drop for PauseWatcherGuard<'_> {
fn drop(&mut self) {
// FIXME: change this Drop to async drop in the future
if let Err(e) = block_on(self.manager.reinit_watcher(
if let Err(e) = block_on(self.manager.resume_watcher(
self.location_id,
self.library.take().expect("library should be set"),
)) {
error!("Failed to reinit watcher on stop watcher guard drop: {e}");
error!(?e, "Failed to resume watcher on stop watcher guard drop;");
}
}
}
@ -637,7 +443,7 @@ impl Drop for IgnoreEventsForPathGuard<'_> {
ignore: false,
},
)) {
error!("Failed to un-ignore path on watcher guard drop: {e}");
error!(?e, "Failed to un-ignore path on watcher guard drop;");
}
}
}

View file

@ -0,0 +1,449 @@
use crate::{
library::{Library, LibraryId},
Node,
};
use sd_core_prisma_helpers::location_ids_and_path;
use sd_prisma::prisma::location;
use sd_utils::db::maybe_missing;
use std::{
collections::{HashMap, HashSet},
io::ErrorKind,
path::PathBuf,
pin::pin,
sync::Arc,
time::Duration,
};
use async_channel as chan;
use futures::stream::StreamExt;
use futures_concurrency::stream::Merge;
use tokio::{
fs,
sync::oneshot,
time::{interval, MissedTickBehavior},
};
use tokio_stream::wrappers::IntervalStream;
use tracing::{debug, error, instrument, trace, warn};
use uuid::Uuid;
use super::{
watcher::LocationWatcher, LocationManagementMessage, LocationManagerError,
ManagementMessageAction, WatcherManagementMessage, WatcherManagementMessageAction,
};
type LocationIdAndLibraryId = (location::id::Type, LibraryId);
struct Runner {
node: Arc<Node>,
locations_to_check: HashMap<location::id::Type, Arc<Library>>,
locations_watched: HashMap<LocationIdAndLibraryId, LocationWatcher>,
locations_unwatched: HashMap<LocationIdAndLibraryId, LocationWatcher>,
forced_unwatch: HashSet<LocationIdAndLibraryId>,
}
impl Runner {
fn new(node: Arc<Node>) -> Self {
Self {
node,
locations_to_check: HashMap::new(),
locations_watched: HashMap::new(),
locations_unwatched: HashMap::new(),
forced_unwatch: HashSet::new(),
}
}
async fn add_location(
&mut self,
location_id: i32,
library: Arc<Library>,
) -> Result<(), LocationManagerError> {
if let Some(location) = get_location(location_id, &library).await? {
check_online(&location, &self.node, &library)
.await
.and_then(|is_online| {
LocationWatcher::new(location, Arc::clone(&library), Arc::clone(&self.node))
.map(|mut watcher| {
if is_online {
trace!(%location_id, "Location is online, watching it!;");
watcher.watch();
self.locations_watched
.insert((location_id, library.id), watcher);
} else {
self.locations_unwatched
.insert((location_id, library.id), watcher);
}
self.locations_to_check
.insert(location_id, Arc::clone(&library));
})
})
} else {
Err(LocationManagerError::LocationNotFound(location_id))
}
}
async fn remove_location(
&mut self,
location_id: i32,
library: Arc<Library>,
) -> Result<(), LocationManagerError> {
let key = (location_id, library.id);
if let Some(location) = get_location(location_id, &library).await? {
// TODO(N): This isn't gonna work with removable media and this will likely permanently break if the DB is restored from a backup.
if location.instance_id == Some(library.config().await.instance_id) {
self.unwatch_location(location, library.id);
self.locations_unwatched.remove(&key);
self.forced_unwatch.remove(&key);
} else {
self.drop_location(
location_id,
library.id,
"Dropping location from location manager, because we don't have a `local_path` anymore",
);
}
} else {
self.drop_location(
location_id,
library.id,
"Removing location from location manager, as we failed to fetch from db",
);
}
// Removing location from checker
self.locations_to_check.remove(&location_id);
Ok(())
}
#[instrument(skip(self, reason))]
fn drop_location(
&mut self,
location_id: location::id::Type,
library_id: LibraryId,
reason: &'static str,
) {
warn!(%reason);
if let Some(mut watcher) = self.locations_watched.remove(&(location_id, library_id)) {
watcher.unwatch();
} else {
self.locations_unwatched.remove(&(location_id, library_id));
}
}
fn watch_location(
&mut self,
location_ids_and_path::Data {
id: location_id,
path: maybe_location_path,
..
}: location_ids_and_path::Data,
library_id: LibraryId,
) {
if let Some(location_path) = maybe_location_path {
if let Some(mut watcher) = self.locations_unwatched.remove(&(location_id, library_id)) {
if watcher.check_path(location_path) {
watcher.watch();
}
self.locations_watched
.insert((location_id, library_id), watcher);
}
}
}
fn unwatch_location(
&mut self,
location_ids_and_path::Data {
id: location_id,
path: maybe_location_path,
..
}: location_ids_and_path::Data,
library_id: LibraryId,
) {
if let Some(location_path) = maybe_location_path {
if let Some(mut watcher) = self.locations_watched.remove(&(location_id, library_id)) {
if watcher.check_path(location_path) {
watcher.unwatch();
}
self.locations_unwatched
.insert((location_id, library_id), watcher);
}
}
}
#[instrument(skip(self, library), fields(library_id = %library.id), err)]
async fn pause_watcher(
&mut self,
location_id: location::id::Type,
library: Arc<Library>,
) -> Result<(), LocationManagerError> {
let key = (location_id, library.id);
if !self.forced_unwatch.contains(&key) && self.locations_watched.contains_key(&key) {
get_location(location_id, &library)
.await?
.ok_or(LocationManagerError::LocationNotFound(location_id))
.map(|location| {
self.unwatch_location(location, library.id);
self.forced_unwatch.insert(key);
})
} else {
Ok(())
}
}
#[instrument(skip(self, library), fields(library_id = %library.id), err)]
async fn resume_watcher(
&mut self,
location_id: location::id::Type,
library: Arc<Library>,
) -> Result<(), LocationManagerError> {
let key = (location_id, library.id);
if self.forced_unwatch.contains(&key) && self.locations_unwatched.contains_key(&key) {
get_location(location_id, &library)
.await?
.ok_or(LocationManagerError::LocationNotFound(location_id))
.map(|location| {
self.watch_location(location, library.id);
self.forced_unwatch.remove(&key);
})
} else {
Ok(())
}
}
async fn ignore_events_for_path(
&self,
location_id: location::id::Type,
library: Arc<Library>,
path: PathBuf,
ignore: bool,
) {
if let Some(watcher) = self.locations_watched.get(&(location_id, library.id)) {
watcher.ignore_path(path, ignore).await
}
}
async fn handle_location_management_message(
&mut self,
location_id: location::id::Type,
library: Arc<Library>,
action: ManagementMessageAction,
ack: oneshot::Sender<Result<(), LocationManagerError>>,
) {
ack.send(match action {
ManagementMessageAction::Add => self.add_location(location_id, library).await,
ManagementMessageAction::Remove => self.remove_location(location_id, library).await,
})
.expect("Ack channel closed")
}
async fn handle_watcher_management_message(
&mut self,
location_id: location::id::Type,
library: Arc<Library>,
action: WatcherManagementMessageAction,
ack: oneshot::Sender<Result<(), LocationManagerError>>,
) {
ack.send(match action {
WatcherManagementMessageAction::Pause => self.pause_watcher(location_id, library).await,
WatcherManagementMessageAction::Resume => {
self.resume_watcher(location_id, library).await
}
WatcherManagementMessageAction::IgnoreEventsForPath { path, ignore } => {
self.ignore_events_for_path(location_id, library, path, ignore)
.await;
Ok(())
}
})
.expect("Ack channel closed")
}
async fn check_locations(
&mut self,
locations_to_check_buffer: &mut Vec<(location::id::Type, Arc<Library>)>,
) -> Result<(), Vec<LocationManagerError>> {
let mut errors = vec![];
locations_to_check_buffer.clear();
locations_to_check_buffer.extend(self.locations_to_check.drain());
for (location_id, library) in locations_to_check_buffer.drain(..) {
if let Err(e) = self
.check_single_location(location_id, Arc::clone(&library))
.await
{
self.drop_location(
location_id,
library.id,
"Removing location from manager, as we failed to check if it was online",
);
self.forced_unwatch.remove(&(location_id, library.id));
errors.push(e);
}
}
Ok(())
}
async fn check_single_location(
&mut self,
location_id: i32,
library: Arc<Library>,
) -> Result<(), LocationManagerError> {
let key = (location_id, library.id);
if let Some(location) = get_location(location_id, &library).await? {
// TODO(N): This isn't gonna work with removable media and this will likely permanently break if the DB is restored from a backup.
if location.instance_id == Some(library.config().await.instance_id) {
if check_online(&location, &self.node, &library).await?
&& !self.forced_unwatch.contains(&key)
{
self.watch_location(location, library.id);
} else {
self.unwatch_location(location, library.id);
}
self.locations_to_check.insert(location_id, library);
} else {
self.drop_location(
location_id,
library.id,
"Dropping location from location manager, because \
it isn't a location in the current node",
);
self.forced_unwatch.remove(&key);
}
Ok(())
} else {
Err(LocationManagerError::LocationNotFound(location_id))
}
}
}
pub(super) async fn run(
location_management_rx: chan::Receiver<LocationManagementMessage>,
watcher_management_rx: chan::Receiver<WatcherManagementMessage>,
stop_rx: chan::Receiver<()>,
node: Arc<Node>,
) {
enum StreamMessage {
LocationManagementMessage(LocationManagementMessage),
WatcherManagementMessage(WatcherManagementMessage),
CheckLocations,
Stop,
}
let mut locations_to_check_buffer = vec![];
let mut check_locations_interval = interval(Duration::from_secs(2));
check_locations_interval.set_missed_tick_behavior(MissedTickBehavior::Skip);
let mut runner = Runner::new(node);
let mut msg_stream = pin!((
location_management_rx.map(StreamMessage::LocationManagementMessage),
watcher_management_rx.map(StreamMessage::WatcherManagementMessage),
IntervalStream::new(check_locations_interval).map(|_| StreamMessage::CheckLocations),
stop_rx.map(|()| StreamMessage::Stop),
)
.merge());
while let Some(msg) = msg_stream.next().await {
match msg {
StreamMessage::LocationManagementMessage(LocationManagementMessage {
location_id,
library,
action,
ack,
}) => {
runner
.handle_location_management_message(location_id, library, action, ack)
.await
}
// Watcher management messages
StreamMessage::WatcherManagementMessage(WatcherManagementMessage {
location_id,
library,
action,
ack,
}) => {
runner
.handle_watcher_management_message(location_id, library, action, ack)
.await
}
StreamMessage::CheckLocations => {
if let Err(errors) = runner.check_locations(&mut locations_to_check_buffer).await {
warn!(?errors, "Errors while checking locations;");
}
}
StreamMessage::Stop => {
debug!("Stopping location manager");
break;
}
}
}
}
#[instrument(skip(library), fields(library_id = %library.id), err)]
async fn get_location(
location_id: location::id::Type,
library: &Library,
) -> Result<Option<location_ids_and_path::Data>, LocationManagerError> {
library
.db
.location()
.find_unique(location::id::equals(location_id))
.select(location_ids_and_path::select())
.exec()
.await
.map_err(Into::into)
}
#[instrument(
skip_all,
fields(%location_id, library_id = %library.id),
err,
)]
pub(super) async fn check_online(
location_ids_and_path::Data {
id: location_id,
pub_id,
instance_id,
path,
}: &location_ids_and_path::Data,
node: &Node,
library: &Library,
) -> Result<bool, LocationManagerError> {
let pub_id = Uuid::from_slice(pub_id)?;
// TODO(N): This isn't gonna work with removable media and this will likely permanently break if the DB is restored from a backup.
if *instance_id == Some(library.config().await.instance_id) {
match fs::metadata(maybe_missing(path, "location.path")?).await {
Ok(_) => {
node.locations.add_online(pub_id).await;
Ok(true)
}
Err(e) if e.kind() == ErrorKind::NotFound => {
node.locations.remove_online(&pub_id).await;
Ok(false)
}
Err(e) => {
error!(
?e,
"Failed to check if location is online, will consider as offline;"
);
Ok(false)
}
}
} else {
// In this case, we don't have a `local_path`, but this location was marked as online
node.locations.remove_online(&pub_id).await;
Err(LocationManagerError::NonLocalLocation(*location_id))
}
}

View file

@ -12,40 +12,35 @@ use std::{
sync::Arc,
};
use async_trait::async_trait;
use notify::{
event::{CreateKind, DataChange, ModifyKind, RenameMode},
Event, EventKind,
};
use tokio::{fs, time::Instant};
use tracing::{debug, error, trace};
use tracing::{error, instrument, trace};
use super::{
utils::{create_dir, recalculate_directories_size, remove, rename, update_file},
EventHandler, HUNDRED_MILLIS, ONE_SECOND,
HUNDRED_MILLIS, ONE_SECOND,
};
#[derive(Debug)]
pub(super) struct AndroidEventHandler<'lib> {
pub(super) struct EventHandler {
location_id: location::id::Type,
library: &'lib Arc<Library>,
node: &'lib Arc<Node>,
library: Arc<Library>,
node: Arc<Node>,
last_events_eviction_check: Instant,
rename_from: HashMap<PathBuf, Instant>,
recently_renamed_from: BTreeMap<PathBuf, Instant>,
files_to_update: HashMap<PathBuf, Instant>,
reincident_to_update_files: HashMap<PathBuf, Instant>,
to_recalculate_size: HashMap<PathBuf, Instant>,
path_and_instant_buffer: Vec<(PathBuf, Instant)>,
}
#[async_trait]
impl<'lib> EventHandler<'lib> for AndroidEventHandler<'lib> {
fn new(
location_id: location::id::Type,
library: &'lib Arc<Library>,
node: &'lib Arc<Node>,
) -> Self {
impl super::EventHandler for EventHandler {
fn new(location_id: location::id::Type, library: Arc<Library>, node: Arc<Node>) -> Self {
Self {
location_id,
library,
@ -60,8 +55,19 @@ impl<'lib> EventHandler<'lib> for AndroidEventHandler<'lib> {
}
}
#[instrument(
skip_all,
fields(
location_id = %self.location_id,
library_id = %self.library.id,
waiting_rename_count = %self.recently_renamed_from.len(),
waiting_update_count = %self.files_to_update.len(),
reincident_to_update_files_count = %self.reincident_to_update_files.len(),
waiting_size_count = %self.to_recalculate_size.len(),
),
)]
async fn handle_event(&mut self, event: Event) -> Result<(), LocationManagerError> {
debug!("Received Android event: {:#?}", event);
trace!("Received Android event");
let Event {
kind, mut paths, ..
@ -70,7 +76,7 @@ impl<'lib> EventHandler<'lib> for AndroidEventHandler<'lib> {
match kind {
EventKind::Create(CreateKind::File)
| EventKind::Modify(ModifyKind::Data(DataChange::Any)) => {
// When we receive a create, modify data or metadata events of the abore kinds
// When we receive a create, modify data or metadata events of the above kinds
// we just mark the file to be updated in a near future
// each consecutive event of these kinds that we receive for the same file
// we just store the path again in the map below, with a new instant
@ -101,13 +107,14 @@ impl<'lib> EventHandler<'lib> for AndroidEventHandler<'lib> {
&fs::metadata(path)
.await
.map_err(|e| FileIOError::from((path, e)))?,
self.node,
self.library,
&self.node,
&self.library,
)
.await?;
}
EventKind::Modify(ModifyKind::Name(RenameMode::From)) => {
// Just in case we can't garantee that we receive the Rename From event before the
// Just in case we can't guarantee that we receive the Rename From event before the
// Rename Both event. Just a safeguard
if self.recently_renamed_from.remove(&paths[0]).is_none() {
self.rename_from.insert(paths.remove(0), Instant::now());
@ -115,23 +122,25 @@ impl<'lib> EventHandler<'lib> for AndroidEventHandler<'lib> {
}
EventKind::Modify(ModifyKind::Name(RenameMode::Both)) => {
let from_path = &paths[0];
let to_path = &paths[1];
let to_path = paths.remove(1);
let from_path = paths.remove(0);
self.rename_from.remove(&from_path);
self.rename_from.remove(from_path);
rename(
self.location_id,
to_path,
from_path,
fs::metadata(to_path)
&to_path,
&from_path,
fs::metadata(&to_path)
.await
.map_err(|e| FileIOError::from((to_path, e)))?,
self.library,
.map_err(|e| FileIOError::from((&to_path, e)))?,
&self.library,
)
.await?;
self.recently_renamed_from
.insert(paths.swap_remove(0), Instant::now());
self.recently_renamed_from.insert(from_path, Instant::now());
}
EventKind::Remove(_) => {
let path = paths.remove(0);
if let Some(parent) = path.parent() {
@ -141,10 +150,11 @@ impl<'lib> EventHandler<'lib> for AndroidEventHandler<'lib> {
}
}
remove(self.location_id, &path, self.library).await?;
remove(self.location_id, &path, &self.library).await?;
}
other_event_kind => {
trace!("Other Linux event that we don't handle for now: {other_event_kind:#?}");
_ => {
trace!("Other Android event that we don't handle for now");
}
}
@ -154,11 +164,14 @@ impl<'lib> EventHandler<'lib> for AndroidEventHandler<'lib> {
async fn tick(&mut self) {
if self.last_events_eviction_check.elapsed() > HUNDRED_MILLIS {
if let Err(e) = self.handle_to_update_eviction().await {
error!("Error while handling recently created or update files eviction: {e:#?}");
error!(
?e,
"Error while handling recently created or update files eviction;"
);
}
if let Err(e) = self.handle_rename_from_eviction().await {
error!("Failed to remove file_path: {e:#?}");
error!(?e, "Failed to remove file_path;");
}
self.recently_renamed_from
@ -169,11 +182,11 @@ impl<'lib> EventHandler<'lib> for AndroidEventHandler<'lib> {
&mut self.to_recalculate_size,
&mut self.path_and_instant_buffer,
self.location_id,
self.library,
&self.library,
)
.await
{
error!("Failed to recalculate directories size: {e:#?}");
error!(?e, "Failed to recalculate directories size;");
}
}
@ -182,9 +195,10 @@ impl<'lib> EventHandler<'lib> for AndroidEventHandler<'lib> {
}
}
impl AndroidEventHandler<'_> {
impl EventHandler {
async fn handle_to_update_eviction(&mut self) -> Result<(), LocationManagerError> {
self.path_and_instant_buffer.clear();
let mut should_invalidate = false;
for (path, created_at) in self.files_to_update.drain() {
@ -197,8 +211,11 @@ impl AndroidEventHandler<'_> {
.insert(parent.to_path_buf(), Instant::now());
}
}
self.reincident_to_update_files.remove(&path);
update_file(self.location_id, &path, self.node, self.library).await?;
update_file(self.location_id, &path, &self.node, &self.library).await?;
should_invalidate = true;
}
}
@ -221,14 +238,17 @@ impl AndroidEventHandler<'_> {
.insert(parent.to_path_buf(), Instant::now());
}
}
self.files_to_update.remove(&path);
update_file(self.location_id, &path, self.node, self.library).await?;
update_file(self.location_id, &path, &self.node, &self.library).await?;
should_invalidate = true;
}
}
if should_invalidate {
invalidate_query!(self.library, "search.paths");
invalidate_query!(&self.library, "search.paths");
}
self.reincident_to_update_files
@ -249,21 +269,23 @@ impl AndroidEventHandler<'_> {
.insert(parent.to_path_buf(), Instant::now());
}
}
remove(self.location_id, &path, self.library).await?;
remove(self.location_id, &path, &self.library).await?;
should_invalidate = true;
trace!("Removed file_path due timeout: {}", path.display());
trace!(path = %path.display(), "Removed file_path due timeout;");
} else {
self.path_and_instant_buffer.push((path, instant));
}
}
if should_invalidate {
invalidate_query!(self.library, "search.paths");
invalidate_query!(&self.library, "search.paths");
}
for (path, instant) in self.path_and_instant_buffer.drain(..) {
self.rename_from.insert(path, instant);
}
self.rename_from
.extend(self.path_and_instant_buffer.drain(..));
Ok(())
}

View file

@ -15,45 +15,40 @@ use std::{
sync::Arc,
};
use async_trait::async_trait;
use notify::{
event::{CreateKind, DataChange, MetadataKind, ModifyKind, RenameMode},
Event, EventKind,
};
use tokio::{fs, io, time::Instant};
use tracing::{debug, error, trace, warn};
use tracing::{error, instrument, trace, warn};
use super::{
utils::{
create_dir, create_file, extract_inode_from_path, extract_location_path,
recalculate_directories_size, remove, rename, update_file,
},
EventHandler, INode, InstantAndPath, HUNDRED_MILLIS, ONE_SECOND,
INode, InstantAndPath, HUNDRED_MILLIS, ONE_SECOND,
};
#[derive(Debug)]
pub(super) struct IosEventHandler<'lib> {
pub(super) struct EventHandler {
location_id: location::id::Type,
library: &'lib Arc<Library>,
node: &'lib Arc<Node>,
files_to_update: HashMap<PathBuf, Instant>,
reincident_to_update_files: HashMap<PathBuf, Instant>,
library: Arc<Library>,
node: Arc<Node>,
last_events_eviction_check: Instant,
latest_created_dir: Option<PathBuf>,
old_paths_map: HashMap<INode, InstantAndPath>,
new_paths_map: HashMap<INode, InstantAndPath>,
paths_map_buffer: Vec<(INode, InstantAndPath)>,
files_to_update: HashMap<PathBuf, Instant>,
reincident_to_update_files: HashMap<PathBuf, Instant>,
to_recalculate_size: HashMap<PathBuf, Instant>,
path_and_instant_buffer: Vec<(PathBuf, Instant)>,
paths_map_buffer: Vec<(INode, InstantAndPath)>,
}
#[async_trait]
impl<'lib> EventHandler<'lib> for IosEventHandler<'lib> {
fn new(
location_id: location::id::Type,
library: &'lib Arc<Library>,
node: &'lib Arc<Node>,
) -> Self
impl super::EventHandler for EventHandler {
fn new(location_id: location::id::Type, library: Arc<Library>, node: Arc<Node>) -> Self
where
Self: Sized,
{
@ -61,38 +56,54 @@ impl<'lib> EventHandler<'lib> for IosEventHandler<'lib> {
location_id,
library,
node,
files_to_update: HashMap::new(),
reincident_to_update_files: HashMap::new(),
last_events_eviction_check: Instant::now(),
latest_created_dir: None,
old_paths_map: HashMap::new(),
new_paths_map: HashMap::new(),
paths_map_buffer: Vec::new(),
files_to_update: HashMap::new(),
reincident_to_update_files: HashMap::new(),
to_recalculate_size: HashMap::new(),
path_and_instant_buffer: Vec::new(),
paths_map_buffer: Vec::new(),
}
}
#[instrument(
skip_all,
fields(
location_id = %self.location_id,
library_id = %self.library.id,
latest_created_dir = ?self.latest_created_dir,
old_paths_map_count = %self.old_paths_map.len(),
new_paths_map = %self.new_paths_map.len(),
waiting_update_count = %self.files_to_update.len(),
reincident_to_update_files_count = %self.reincident_to_update_files.len(),
waiting_size_count = %self.to_recalculate_size.len(),
),
)]
async fn handle_event(&mut self, event: Event) -> Result<(), LocationManagerError> {
trace!("Received iOS event");
let Event {
kind, mut paths, ..
} = event;
match kind {
EventKind::Create(CreateKind::Folder) => {
let path = &paths[0];
let path = paths.remove(0);
create_dir(
self.location_id,
path,
&fs::metadata(path)
&path,
&fs::metadata(&path)
.await
.map_err(|e| FileIOError::from((path, e)))?,
self.node,
self.library,
.map_err(|e| FileIOError::from((&path, e)))?,
&self.node,
&self.library,
)
.await?;
self.latest_created_dir = Some(paths.remove(0));
self.latest_created_dir = Some(path);
}
EventKind::Create(CreateKind::File)
@ -100,12 +111,13 @@ impl<'lib> EventHandler<'lib> for IosEventHandler<'lib> {
| EventKind::Modify(ModifyKind::Metadata(
MetadataKind::WriteTime | MetadataKind::Extended,
)) => {
// When we receive a create, modify data or metadata events of the abore kinds
// When we receive a create, modify data or metadata events of the above kinds
// we just mark the file to be updated in a near future
// each consecutive event of these kinds that we receive for the same file
// we just store the path again in the map below, with a new instant
// that effectively resets the timer for the file to be updated <- Copied from macos.rs
let path = paths.remove(0);
if self.files_to_update.contains_key(&path) {
if let Some(old_instant) =
self.files_to_update.insert(path.clone(), Instant::now())
@ -118,6 +130,7 @@ impl<'lib> EventHandler<'lib> for IosEventHandler<'lib> {
self.files_to_update.insert(path, Instant::now());
}
}
EventKind::Modify(ModifyKind::Name(RenameMode::Any)) => {
self.handle_single_rename_event(paths.remove(0)).await?;
}
@ -125,18 +138,22 @@ impl<'lib> EventHandler<'lib> for IosEventHandler<'lib> {
// For some reason, iOS doesn't have a Delete Event, so the vent type comes up as this.
// Delete Event
EventKind::Modify(ModifyKind::Metadata(MetadataKind::Any)) => {
debug!("File has been deleted: {:#?}", paths);
let path = paths.remove(0);
trace!(path = %path.display(), "File has been deleted;");
if let Some(parent) = path.parent() {
if parent != Path::new("") {
self.to_recalculate_size
.insert(parent.to_path_buf(), Instant::now());
}
}
remove(self.location_id, &path, self.library).await?; //FIXME: Find out why this freezes the watcher
remove(self.location_id, &path, &self.library).await?; //FIXME: Find out why this freezes the watcher
}
other_event_kind => {
trace!("Other iOS event that we don't handle for now: {other_event_kind:#?}");
_ => {
trace!("Other iOS event that we don't handle for now");
}
}
@ -146,16 +163,19 @@ impl<'lib> EventHandler<'lib> for IosEventHandler<'lib> {
async fn tick(&mut self) {
if self.last_events_eviction_check.elapsed() > HUNDRED_MILLIS {
if let Err(e) = self.handle_to_update_eviction().await {
error!("Error while handling recently created or update files eviction: {e:#?}");
error!(
?e,
"Error while handling recently created or update files eviction;"
);
}
// Cleaning out recently renamed files that are older than 100 milliseconds
if let Err(e) = self.handle_rename_create_eviction().await {
error!("Failed to create file_path on iOS : {e:#?}");
error!(?e, "Failed to create file_path on iOS;");
}
if let Err(e) = self.handle_rename_remove_eviction().await {
error!("Failed to remove file_path: {e:#?}");
error!(?e, "Failed to remove file_path;");
}
if !self.to_recalculate_size.is_empty() {
@ -163,11 +183,11 @@ impl<'lib> EventHandler<'lib> for IosEventHandler<'lib> {
&mut self.to_recalculate_size,
&mut self.path_and_instant_buffer,
self.location_id,
self.library,
&self.library,
)
.await
{
error!("Failed to recalculate directories size: {e:#?}");
error!(?e, "Failed to recalculate directories size;");
}
}
@ -176,7 +196,7 @@ impl<'lib> EventHandler<'lib> for IosEventHandler<'lib> {
}
}
impl IosEventHandler<'_> {
impl EventHandler {
async fn handle_to_update_eviction(&mut self) -> Result<(), LocationManagerError> {
self.path_and_instant_buffer.clear();
let mut should_invalidate = false;
@ -191,8 +211,11 @@ impl IosEventHandler<'_> {
.insert(parent.to_path_buf(), Instant::now());
}
}
self.reincident_to_update_files.remove(&path);
update_file(self.location_id, &path, self.node, self.library).await?;
update_file(self.location_id, &path, &self.node, &self.library).await?;
should_invalidate = true;
}
}
@ -215,8 +238,11 @@ impl IosEventHandler<'_> {
.insert(parent.to_path_buf(), Instant::now());
}
}
self.files_to_update.remove(&path);
update_file(self.location_id, &path, self.node, self.library).await?;
update_file(self.location_id, &path, &self.node, &self.library).await?;
should_invalidate = true;
}
}
@ -246,8 +272,14 @@ impl IosEventHandler<'_> {
if metadata.is_dir() {
// Don't need to dispatch a recalculate directory event as `create_dir` dispatches
// a `scan_location_sub_path` function, which recalculates the size already
create_dir(self.location_id, &path, &metadata, self.node, self.library)
.await?;
create_dir(
self.location_id,
&path,
&metadata,
&self.node,
&self.library,
)
.await?;
} else {
if let Some(parent) = path.parent() {
if parent != Path::new("") {
@ -255,11 +287,19 @@ impl IosEventHandler<'_> {
.insert(parent.to_path_buf(), Instant::now());
}
}
create_file(self.location_id, &path, &metadata, self.node, self.library)
.await?;
create_file(
self.location_id,
&path,
&metadata,
&self.node,
&self.library,
)
.await?;
}
trace!("Created file_path due timeout: {}", path.display());
trace!(path = %path.display(), "Created file_path due timeout;");
should_invalidate = true;
}
} else {
@ -289,8 +329,11 @@ impl IosEventHandler<'_> {
.insert(parent.to_path_buf(), Instant::now());
}
}
remove(self.location_id, &path, self.library).await?;
trace!("Removed file_path due timeout: {}", path.display());
remove(self.location_id, &path, &self.library).await?;
trace!(path = %path.display(), "Removed file_path due timeout;");
should_invalidate = true;
} else {
self.paths_map_buffer.push((inode, (instant, path)));
@ -313,10 +356,10 @@ impl IosEventHandler<'_> {
match fs::metadata(&path).await {
Ok(meta) => {
// File or directory exists, so this can be a "new path" to an actual rename/move or a creation
trace!("Path exists: {}", path.display());
trace!(path = %path.display(), "Path exists;");
let inode = get_inode(&meta);
let location_path = extract_location_path(self.location_id, self.library).await?;
let location_path = extract_location_path(self.location_id, &self.library).await?;
if !check_file_path_exists::<FilePathError>(
&IsolatedFilePathData::new(
@ -331,21 +374,22 @@ impl IosEventHandler<'_> {
{
if let Some((_, old_path)) = self.old_paths_map.remove(&inode) {
trace!(
"Got a match new -> old: {} -> {}",
path.display(),
old_path.display()
old_path = %old_path.display(),
new_path = %path.display(),
"Got a match new -> old;",
);
// We found a new path for this old path, so we can rename it
rename(self.location_id, &path, &old_path, meta, self.library).await?;
rename(self.location_id, &path, &old_path, meta, &self.library).await?;
} else {
trace!("No match for new path yet: {}", path.display());
trace!(path = %path.display(), "No match for new path yet;");
self.new_paths_map.insert(inode, (Instant::now(), path));
}
} else {
warn!(
"Received rename event for a file that already exists in the database: {}",
path.display()
path = %path.display(),
"Received rename event for a file that already exists in the database;",
);
}
}
@ -353,23 +397,25 @@ impl IosEventHandler<'_> {
// File or directory does not exist in the filesystem, if it exists in the database,
// then we try pairing it with the old path from our map
trace!("Path doesn't exists: {}", path.display());
trace!(path = %path.display(), "Path doesn't exists;");
let inode =
match extract_inode_from_path(self.location_id, &path, self.library).await {
match extract_inode_from_path(self.location_id, &path, &self.library).await {
Ok(inode) => inode,
Err(LocationManagerError::FilePath(FilePathError::NotFound(_))) => {
// temporary file, we can ignore it
return Ok(());
}
Err(e) => return Err(e),
};
if let Some((_, new_path)) = self.new_paths_map.remove(&inode) {
trace!(
"Got a match old -> new: {} -> {}",
path.display(),
new_path.display()
old_path = %path.display(),
new_path = %new_path.display(),
"Got a match old -> new;",
);
// We found a new path for this old path, so we can rename it
@ -380,11 +426,12 @@ impl IosEventHandler<'_> {
fs::metadata(&new_path)
.await
.map_err(|e| FileIOError::from((&new_path, e)))?,
self.library,
&self.library,
)
.await?;
} else {
trace!("No match for old path yet: {}", path.display());
trace!(path = %path.display(), "No match for old path yet;");
// We didn't find a new path for this old path, so we store ir for later
self.old_paths_map.insert(inode, (Instant::now(), path));
}

View file

@ -17,40 +17,35 @@ use std::{
sync::Arc,
};
use async_trait::async_trait;
use notify::{
event::{CreateKind, DataChange, ModifyKind, RenameMode},
Event, EventKind,
};
use tokio::{fs, time::Instant};
use tracing::{error, trace};
use tracing::{error, instrument, trace};
use super::{
utils::{create_dir, recalculate_directories_size, remove, rename, update_file},
EventHandler, HUNDRED_MILLIS, ONE_SECOND,
HUNDRED_MILLIS, ONE_SECOND,
};
#[derive(Debug)]
pub(super) struct LinuxEventHandler<'lib> {
pub(super) struct EventHandler {
location_id: location::id::Type,
library: &'lib Arc<Library>,
node: &'lib Arc<Node>,
library: Arc<Library>,
node: Arc<Node>,
last_events_eviction_check: Instant,
rename_from: HashMap<PathBuf, Instant>,
recently_renamed_from: BTreeMap<PathBuf, Instant>,
files_to_update: HashMap<PathBuf, Instant>,
reincident_to_update_files: HashMap<PathBuf, Instant>,
to_recalculate_size: HashMap<PathBuf, Instant>,
path_and_instant_buffer: Vec<(PathBuf, Instant)>,
}
#[async_trait]
impl<'lib> EventHandler<'lib> for LinuxEventHandler<'lib> {
fn new(
location_id: location::id::Type,
library: &'lib Arc<Library>,
node: &'lib Arc<Node>,
) -> Self {
impl super::EventHandler for EventHandler {
fn new(location_id: location::id::Type, library: Arc<Library>, node: Arc<Node>) -> Self {
Self {
location_id,
library,
@ -65,8 +60,19 @@ impl<'lib> EventHandler<'lib> for LinuxEventHandler<'lib> {
}
}
#[instrument(
skip_all,
fields(
location_id = %self.location_id,
library_id = %self.library.id,
waiting_rename_count = %self.recently_renamed_from.len(),
waiting_update_count = %self.files_to_update.len(),
reincident_to_update_files_count = %self.reincident_to_update_files.len(),
waiting_size_count = %self.to_recalculate_size.len(),
),
)]
async fn handle_event(&mut self, event: Event) -> Result<(), LocationManagerError> {
trace!("Received Linux event: {:#?}", event);
trace!("Received Linux event");
let Event {
kind, mut paths, ..
@ -81,6 +87,7 @@ impl<'lib> EventHandler<'lib> for LinuxEventHandler<'lib> {
// we just store the path again in the map below, with a new instant
// that effectively resets the timer for the file to be updated
let path = paths.remove(0);
if self.files_to_update.contains_key(&path) {
if let Some(old_instant) =
self.files_to_update.insert(path.clone(), Instant::now())
@ -95,22 +102,23 @@ impl<'lib> EventHandler<'lib> for LinuxEventHandler<'lib> {
}
EventKind::Create(CreateKind::Folder) => {
let path = &paths[0];
let path = paths.remove(0);
// Don't need to dispatch a recalculate directory event as `create_dir` dispatches
// a `scan_location_sub_path` function, which recalculates the size already
create_dir(
self.location_id,
path,
&fs::metadata(path)
&path,
&fs::metadata(&path)
.await
.map_err(|e| FileIOError::from((path, e)))?,
self.node,
self.library,
.map_err(|e| FileIOError::from((&path, e)))?,
&self.node,
&self.library,
)
.await?;
}
EventKind::Modify(ModifyKind::Name(RenameMode::From)) => {
// Just in case we can't guarantee that we receive the Rename From event before the
// Rename Both event. Just a safeguard
@ -120,23 +128,24 @@ impl<'lib> EventHandler<'lib> for LinuxEventHandler<'lib> {
}
EventKind::Modify(ModifyKind::Name(RenameMode::Both)) => {
let from_path = &paths[0];
let to_path = &paths[1];
let to_path = paths.remove(1);
let from_path = paths.remove(0);
self.rename_from.remove(from_path);
self.rename_from.remove(&from_path);
rename(
self.location_id,
to_path,
from_path,
fs::metadata(to_path)
&to_path,
&from_path,
fs::metadata(&to_path)
.await
.map_err(|e| FileIOError::from((to_path, e)))?,
self.library,
.map_err(|e| FileIOError::from((&to_path, e)))?,
&self.library,
)
.await?;
self.recently_renamed_from
.insert(paths.swap_remove(0), Instant::now());
self.recently_renamed_from.insert(from_path, Instant::now());
}
EventKind::Remove(_) => {
let path = paths.remove(0);
if let Some(parent) = path.parent() {
@ -146,10 +155,11 @@ impl<'lib> EventHandler<'lib> for LinuxEventHandler<'lib> {
}
}
remove(self.location_id, &path, self.library).await?;
remove(self.location_id, &path, &self.library).await?;
}
other_event_kind => {
trace!("Other Linux event that we don't handle for now: {other_event_kind:#?}");
_ => {
trace!("Other Linux event that we don't handle for now");
}
}
@ -159,11 +169,14 @@ impl<'lib> EventHandler<'lib> for LinuxEventHandler<'lib> {
async fn tick(&mut self) {
if self.last_events_eviction_check.elapsed() > HUNDRED_MILLIS {
if let Err(e) = self.handle_to_update_eviction().await {
error!("Error while handling recently created or update files eviction: {e:#?}");
error!(
?e,
"Error while handling recently created or update files eviction;"
);
}
if let Err(e) = self.handle_rename_from_eviction().await {
error!("Failed to remove file_path: {e:#?}");
error!(?e, "Failed to remove file_path;");
}
self.recently_renamed_from
@ -174,11 +187,11 @@ impl<'lib> EventHandler<'lib> for LinuxEventHandler<'lib> {
&mut self.to_recalculate_size,
&mut self.path_and_instant_buffer,
self.location_id,
self.library,
&self.library,
)
.await
{
error!("Failed to recalculate directories size: {e:#?}");
error!(?e, "Failed to recalculate directories size;");
}
}
@ -187,9 +200,10 @@ impl<'lib> EventHandler<'lib> for LinuxEventHandler<'lib> {
}
}
impl LinuxEventHandler<'_> {
impl EventHandler {
async fn handle_to_update_eviction(&mut self) -> Result<(), LocationManagerError> {
self.path_and_instant_buffer.clear();
let mut should_invalidate = false;
for (path, created_at) in self.files_to_update.drain() {
@ -202,8 +216,11 @@ impl LinuxEventHandler<'_> {
.insert(parent.to_path_buf(), Instant::now());
}
}
self.reincident_to_update_files.remove(&path);
update_file(self.location_id, &path, self.node, self.library).await?;
update_file(self.location_id, &path, &self.node, &self.library).await?;
should_invalidate = true;
}
}
@ -226,8 +243,11 @@ impl LinuxEventHandler<'_> {
.insert(parent.to_path_buf(), Instant::now());
}
}
self.files_to_update.remove(&path);
update_file(self.location_id, &path, self.node, self.library).await?;
update_file(self.location_id, &path, &self.node, &self.library).await?;
should_invalidate = true;
}
}
@ -244,6 +264,7 @@ impl LinuxEventHandler<'_> {
async fn handle_rename_from_eviction(&mut self) -> Result<(), LocationManagerError> {
self.path_and_instant_buffer.clear();
let mut should_invalidate = false;
for (path, instant) in self.rename_from.drain() {
@ -254,9 +275,12 @@ impl LinuxEventHandler<'_> {
.insert(parent.to_path_buf(), Instant::now());
}
}
remove(self.location_id, &path, self.library).await?;
remove(self.location_id, &path, &self.library).await?;
should_invalidate = true;
trace!("Removed file_path due timeout: {}", path.display());
trace!(path = %path.display(), "Removed file_path due timeout;");
} else {
self.path_and_instant_buffer.push((path, instant));
}
@ -266,9 +290,8 @@ impl LinuxEventHandler<'_> {
invalidate_query!(self.library, "search.paths");
}
for (path, instant) in self.path_and_instant_buffer.drain(..) {
self.rename_from.insert(path, instant);
}
self.rename_from
.extend(self.path_and_instant_buffer.drain(..));
Ok(())
}

View file

@ -24,45 +24,40 @@ use std::{
sync::Arc,
};
use async_trait::async_trait;
use notify::{
event::{CreateKind, DataChange, MetadataKind, ModifyKind, RenameMode},
Event, EventKind,
};
use tokio::{fs, io, time::Instant};
use tracing::{error, trace, warn};
use tracing::{error, instrument, trace, warn};
use super::{
utils::{
create_dir, create_file, extract_inode_from_path, extract_location_path,
recalculate_directories_size, remove, rename, update_file,
},
EventHandler, INode, InstantAndPath, HUNDRED_MILLIS, ONE_SECOND,
INode, InstantAndPath, HUNDRED_MILLIS, ONE_SECOND,
};
#[derive(Debug)]
pub(super) struct MacOsEventHandler<'lib> {
pub(super) struct EventHandler {
location_id: location::id::Type,
library: &'lib Arc<Library>,
node: &'lib Arc<Node>,
files_to_update: HashMap<PathBuf, Instant>,
reincident_to_update_files: HashMap<PathBuf, Instant>,
library: Arc<Library>,
node: Arc<Node>,
last_events_eviction_check: Instant,
latest_created_dir: Option<PathBuf>,
old_paths_map: HashMap<INode, InstantAndPath>,
new_paths_map: HashMap<INode, InstantAndPath>,
paths_map_buffer: Vec<(INode, InstantAndPath)>,
files_to_update: HashMap<PathBuf, Instant>,
reincident_to_update_files: HashMap<PathBuf, Instant>,
to_recalculate_size: HashMap<PathBuf, Instant>,
path_and_instant_buffer: Vec<(PathBuf, Instant)>,
paths_map_buffer: Vec<(INode, InstantAndPath)>,
}
#[async_trait]
impl<'lib> EventHandler<'lib> for MacOsEventHandler<'lib> {
fn new(
location_id: location::id::Type,
library: &'lib Arc<Library>,
node: &'lib Arc<Node>,
) -> Self
impl super::EventHandler for EventHandler {
fn new(location_id: location::id::Type, library: Arc<Library>, node: Arc<Node>) -> Self
where
Self: Sized,
{
@ -70,20 +65,33 @@ impl<'lib> EventHandler<'lib> for MacOsEventHandler<'lib> {
location_id,
library,
node,
files_to_update: HashMap::new(),
reincident_to_update_files: HashMap::new(),
last_events_eviction_check: Instant::now(),
latest_created_dir: None,
old_paths_map: HashMap::new(),
new_paths_map: HashMap::new(),
paths_map_buffer: Vec::new(),
files_to_update: HashMap::new(),
reincident_to_update_files: HashMap::new(),
to_recalculate_size: HashMap::new(),
path_and_instant_buffer: Vec::new(),
paths_map_buffer: Vec::new(),
}
}
#[instrument(
skip_all,
fields(
location_id = %self.location_id,
library_id = %self.library.id,
latest_created_dir = ?self.latest_created_dir,
old_paths_map_count = %self.old_paths_map.len(),
new_paths_map = %self.new_paths_map.len(),
waiting_update_count = %self.files_to_update.len(),
reincident_to_update_files_count = %self.reincident_to_update_files.len(),
waiting_size_count = %self.to_recalculate_size.len(),
),
)]
async fn handle_event(&mut self, event: Event) -> Result<(), LocationManagerError> {
trace!("Received MacOS event: {:#?}", event);
trace!("Received MacOS event");
let Event {
kind, mut paths, ..
@ -91,8 +99,9 @@ impl<'lib> EventHandler<'lib> for MacOsEventHandler<'lib> {
match kind {
EventKind::Create(CreateKind::Folder) => {
let path = &paths[0];
if let Some(ref latest_created_dir) = self.latest_created_dir.take() {
let path = paths.remove(0);
if let Some(latest_created_dir) = self.latest_created_dir.take() {
if path == latest_created_dir {
// NOTE: This is a MacOS specific event that happens when a folder is created
// trough Finder. It creates a folder but 2 events are triggered in
@ -105,18 +114,27 @@ impl<'lib> EventHandler<'lib> for MacOsEventHandler<'lib> {
// Don't need to dispatch a recalculate directory event as `create_dir` dispatches
// a `scan_location_sub_path` function, which recalculates the size already
let metadata = match fs::metadata(&path).await {
Ok(metadata) => metadata,
Err(e) if e.kind() == io::ErrorKind::NotFound => {
// temporary file, bailing out
return Ok(());
}
Err(e) => return Err(FileIOError::from((&path, e)).into()),
};
create_dir(
self.location_id,
path,
&fs::metadata(path)
.await
.map_err(|e| FileIOError::from((path, e)))?,
self.node,
self.library,
&path,
&metadata,
&self.node,
&self.library,
)
.await?;
self.latest_created_dir = Some(paths.remove(0));
self.latest_created_dir = Some(path);
}
EventKind::Create(CreateKind::File)
| EventKind::Modify(ModifyKind::Data(DataChange::Content))
| EventKind::Modify(ModifyKind::Metadata(
@ -128,6 +146,7 @@ impl<'lib> EventHandler<'lib> for MacOsEventHandler<'lib> {
// we just store the path again in the map below, with a new instant
// that effectively resets the timer for the file to be updated
let path = paths.remove(0);
if self.files_to_update.contains_key(&path) {
if let Some(old_instant) =
self.files_to_update.insert(path.clone(), Instant::now())
@ -140,22 +159,24 @@ impl<'lib> EventHandler<'lib> for MacOsEventHandler<'lib> {
self.files_to_update.insert(path, Instant::now());
}
}
EventKind::Modify(ModifyKind::Name(RenameMode::Any)) => {
self.handle_single_rename_event(paths.remove(0)).await?;
}
EventKind::Remove(_) => {
let path = paths.remove(0);
if let Some(parent) = path.parent() {
if parent != Path::new("") {
self.to_recalculate_size
.insert(parent.to_path_buf(), Instant::now());
}
}
remove(self.location_id, &path, self.library).await?;
remove(self.location_id, &path, &self.library).await?;
}
other_event_kind => {
trace!("Other MacOS event that we don't handle for now: {other_event_kind:#?}");
_ => {
trace!("Other MacOS event that we don't handle for now");
}
}
@ -165,16 +186,19 @@ impl<'lib> EventHandler<'lib> for MacOsEventHandler<'lib> {
async fn tick(&mut self) {
if self.last_events_eviction_check.elapsed() > HUNDRED_MILLIS {
if let Err(e) = self.handle_to_update_eviction().await {
error!("Error while handling recently created or update files eviction: {e:#?}");
error!(
?e,
"Error while handling recently created or update files eviction;"
);
}
// Cleaning out recently renamed files that are older than 100 milliseconds
if let Err(e) = self.handle_rename_create_eviction().await {
error!("Failed to create file_path on MacOS : {e:#?}");
error!(?e, "Failed to create file_path on MacOS;");
}
if let Err(e) = self.handle_rename_remove_eviction().await {
error!("Failed to remove file_path: {e:#?}");
error!(?e, "Failed to remove file_path;");
}
if !self.to_recalculate_size.is_empty() {
@ -182,11 +206,11 @@ impl<'lib> EventHandler<'lib> for MacOsEventHandler<'lib> {
&mut self.to_recalculate_size,
&mut self.path_and_instant_buffer,
self.location_id,
self.library,
&self.library,
)
.await
{
error!("Failed to recalculate directories size: {e:#?}");
error!(?e, "Failed to recalculate directories size;");
}
}
@ -195,9 +219,10 @@ impl<'lib> EventHandler<'lib> for MacOsEventHandler<'lib> {
}
}
impl MacOsEventHandler<'_> {
impl EventHandler {
async fn handle_to_update_eviction(&mut self) -> Result<(), LocationManagerError> {
self.path_and_instant_buffer.clear();
let mut should_invalidate = false;
for (path, created_at) in self.files_to_update.drain() {
@ -211,7 +236,7 @@ impl MacOsEventHandler<'_> {
}
}
self.reincident_to_update_files.remove(&path);
update_file(self.location_id, &path, self.node, self.library).await?;
update_file(self.location_id, &path, &self.node, &self.library).await?;
should_invalidate = true;
}
}
@ -235,7 +260,7 @@ impl MacOsEventHandler<'_> {
}
}
self.files_to_update.remove(&path);
update_file(self.location_id, &path, self.node, self.library).await?;
update_file(self.location_id, &path, &self.node, &self.library).await?;
should_invalidate = true;
}
}
@ -253,20 +278,32 @@ impl MacOsEventHandler<'_> {
async fn handle_rename_create_eviction(&mut self) -> Result<(), LocationManagerError> {
// Just to make sure that our buffer is clean
self.paths_map_buffer.clear();
let mut should_invalidate = false;
for (inode, (instant, path)) in self.new_paths_map.drain() {
if instant.elapsed() > HUNDRED_MILLIS {
if !self.files_to_update.contains_key(&path) {
let metadata = fs::metadata(&path)
.await
.map_err(|e| FileIOError::from((&path, e)))?;
let metadata = match fs::metadata(&path).await {
Ok(metadata) => metadata,
Err(e) if e.kind() == io::ErrorKind::NotFound => {
// temporary file, bailing out
return Ok(());
}
Err(e) => return Err(FileIOError::from((&path, e)).into()),
};
if metadata.is_dir() {
// Don't need to dispatch a recalculate directory event as `create_dir` dispatches
// a `scan_location_sub_path` function, which recalculates the size already
create_dir(self.location_id, &path, &metadata, self.node, self.library)
.await?;
create_dir(
self.location_id,
&path,
&metadata,
&self.node,
&self.library,
)
.await?;
} else {
if let Some(parent) = path.parent() {
if parent != Path::new("") {
@ -274,11 +311,18 @@ impl MacOsEventHandler<'_> {
.insert(parent.to_path_buf(), Instant::now());
}
}
create_file(self.location_id, &path, &metadata, self.node, self.library)
.await?;
create_file(
self.location_id,
&path,
&metadata,
&self.node,
&self.library,
)
.await?;
}
trace!("Created file_path due timeout: {}", path.display());
trace!(path = %path.display(), "Created file_path due timeout;");
should_invalidate = true;
}
} else {
@ -298,6 +342,7 @@ impl MacOsEventHandler<'_> {
async fn handle_rename_remove_eviction(&mut self) -> Result<(), LocationManagerError> {
// Just to make sure that our buffer is clean
self.paths_map_buffer.clear();
let mut should_invalidate = false;
for (inode, (instant, path)) in self.old_paths_map.drain() {
@ -308,8 +353,11 @@ impl MacOsEventHandler<'_> {
.insert(parent.to_path_buf(), Instant::now());
}
}
remove(self.location_id, &path, self.library).await?;
trace!("Removed file_path due timeout: {}", path.display());
remove(self.location_id, &path, &self.library).await?;
trace!(path = %path.display(), "Removed file_path due timeout;");
should_invalidate = true;
} else {
self.paths_map_buffer.push((inode, (instant, path)));
@ -332,10 +380,10 @@ impl MacOsEventHandler<'_> {
match fs::metadata(&path).await {
Ok(meta) => {
// File or directory exists, so this can be a "new path" to an actual rename/move or a creation
trace!("Path exists: {}", path.display());
trace!(path = %path.display(), "Path exists;");
let inode = get_inode(&meta);
let location_path = extract_location_path(self.location_id, self.library).await?;
let location_path = extract_location_path(self.location_id, &self.library).await?;
if !check_file_path_exists::<FilePathError>(
&IsolatedFilePathData::new(
@ -350,45 +398,49 @@ impl MacOsEventHandler<'_> {
{
if let Some((_, old_path)) = self.old_paths_map.remove(&inode) {
trace!(
"Got a match new -> old: {} -> {}",
path.display(),
old_path.display()
new_path = %path.display(),
old_path = %old_path.display(),
"Got a match new -> old;",
);
// We found a new path for this old path, so we can rename it
rename(self.location_id, &path, &old_path, meta, self.library).await?;
rename(self.location_id, &path, &old_path, meta, &self.library).await?;
} else {
trace!("No match for new path yet: {}", path.display());
trace!(path = %path.display(), "No match for new path yet;");
self.new_paths_map.insert(inode, (Instant::now(), path));
}
} else {
warn!(
"Received rename event for a file that already exists in the database: {}",
path.display()
path = %path.display(),
"Received rename event for a file that already exists in the database;",
);
}
}
Err(e) if e.kind() == io::ErrorKind::NotFound => {
// File or directory does not exist in the filesystem, if it exists in the database,
// then we try pairing it with the old path from our map
trace!("Path doesn't exists: {}", path.display());
trace!(path = %path.display(), "Path doesn't exists;");
let inode =
match extract_inode_from_path(self.location_id, &path, self.library).await {
match extract_inode_from_path(self.location_id, &path, &self.library).await {
Ok(inode) => inode,
Err(LocationManagerError::FilePath(FilePathError::NotFound(_))) => {
// temporary file, we can ignore it
return Ok(());
}
Err(e) => return Err(e),
};
if let Some((_, new_path)) = self.new_paths_map.remove(&inode) {
trace!(
"Got a match old -> new: {} -> {}",
path.display(),
new_path.display()
old_path = %path.display(),
new_path = %new_path.display(),
"Got a match old -> new;",
);
// We found a new path for this old path, so we can rename it
@ -399,15 +451,17 @@ impl MacOsEventHandler<'_> {
fs::metadata(&new_path)
.await
.map_err(|e| FileIOError::from((&new_path, e)))?,
self.library,
&self.library,
)
.await?;
} else {
trace!("No match for old path yet: {}", path.display());
trace!(path = %path.display(), "No match for old path yet;");
// We didn't find a new path for this old path, so we store ir for later
self.old_paths_map.insert(inode, (Instant::now(), path));
}
}
Err(e) => return Err(FileIOError::from((path, e)).into()),
}

View file

@ -1,25 +1,31 @@
use crate::{library::Library, Node};
use sd_prisma::prisma::location;
use sd_core_indexer_rules::{IndexerRule, IndexerRuler};
use sd_core_prisma_helpers::{location_ids_and_path, location_with_indexer_rules};
use sd_prisma::prisma::{location, PrismaClient};
use sd_utils::db::maybe_missing;
use std::{
collections::HashSet,
future::Future,
path::{Path, PathBuf},
pin::pin,
sync::Arc,
time::Duration,
};
use async_trait::async_trait;
use async_channel as chan;
use futures::StreamExt;
use futures_concurrency::stream::Merge;
use notify::{Config, Event, RecommendedWatcher, RecursiveMode, Watcher};
use tokio::{
runtime::Handle,
select,
sync::{mpsc, oneshot},
task::{block_in_place, JoinHandle},
spawn,
task::JoinHandle,
time::{interval_at, Instant, MissedTickBehavior},
};
use tracing::{debug, error, warn};
use tokio_stream::wrappers::IntervalStream;
use tracing::{debug, error, info, instrument, trace, warn, Instrument};
use uuid::Uuid;
use super::LocationManagerError;
@ -32,22 +38,22 @@ mod windows;
mod utils;
use utils::check_event;
use utils::reject_event;
#[cfg(target_os = "linux")]
type Handler<'lib> = linux::LinuxEventHandler<'lib>;
type Handler = linux::EventHandler;
#[cfg(target_os = "macos")]
type Handler<'lib> = macos::MacOsEventHandler<'lib>;
type Handler = macos::EventHandler;
#[cfg(target_os = "windows")]
type Handler<'lib> = windows::WindowsEventHandler<'lib>;
type Handler = windows::EventHandler;
#[cfg(target_os = "android")]
type Handler<'lib> = android::AndroidEventHandler<'lib>;
type Handler = android::EventHandler;
#[cfg(target_os = "ios")]
type Handler<'lib> = ios::IosEventHandler<'lib>;
type Handler = ios::EventHandler;
pub(super) type IgnorePath = (PathBuf, bool);
@ -55,82 +61,115 @@ type INode = u64;
type InstantAndPath = (Instant, PathBuf);
const ONE_SECOND: Duration = Duration::from_secs(1);
const THIRTY_SECONDS: Duration = Duration::from_secs(30);
const HUNDRED_MILLIS: Duration = Duration::from_millis(100);
#[async_trait]
trait EventHandler<'lib> {
fn new(
location_id: location::id::Type,
library: &'lib Arc<Library>,
node: &'lib Arc<Node>,
) -> Self
trait EventHandler: 'static {
fn new(location_id: location::id::Type, library: Arc<Library>, node: Arc<Node>) -> Self
where
Self: Sized;
/// Handle a file system event.
async fn handle_event(&mut self, event: Event) -> Result<(), LocationManagerError>;
fn handle_event(
&mut self,
event: Event,
) -> impl Future<Output = Result<(), LocationManagerError>> + Send;
/// As Event Handlers have some inner state, from time to time we need to call this tick method
/// so the event handler can update its state.
async fn tick(&mut self);
fn tick(&mut self) -> impl Future<Output = ()> + Send;
}
#[derive(Debug)]
pub(super) struct LocationWatcher {
id: i32,
path: String,
location_id: location::id::Type,
location_path: PathBuf,
watcher: RecommendedWatcher,
ignore_path_tx: mpsc::UnboundedSender<IgnorePath>,
ignore_path_tx: chan::Sender<IgnorePath>,
handle: Option<JoinHandle<()>>,
stop_tx: Option<oneshot::Sender<()>>,
stop_tx: chan::Sender<()>,
}
impl LocationWatcher {
pub(super) async fn new(
location: location::Data,
#[instrument(
name = "location_watcher",
skip(pub_id, maybe_location_path, library, node),
fields(
library_id = %library.id,
location_path = ?maybe_location_path,
),
)]
pub(super) fn new(
location_ids_and_path::Data {
id: location_id,
pub_id,
path: maybe_location_path,
..
}: location_ids_and_path::Data,
library: Arc<Library>,
node: Arc<Node>,
) -> Result<Self, LocationManagerError> {
let (events_tx, events_rx) = mpsc::unbounded_channel();
let (ignore_path_tx, ignore_path_rx) = mpsc::unbounded_channel();
let (stop_tx, stop_rx) = oneshot::channel();
let location_pub_id = Uuid::from_slice(&pub_id)?;
let location_path = maybe_missing(maybe_location_path, "location.path")?.into();
let (events_tx, events_rx) = chan::unbounded();
let (ignore_path_tx, ignore_path_rx) = chan::bounded(8);
let (stop_tx, stop_rx) = chan::bounded(1);
let watcher = RecommendedWatcher::new(
move |result| {
if !events_tx.is_closed() {
if events_tx.send(result).is_err() {
error!(
"Unable to send watcher event to location manager for location: <id='{}'>",
location.id
);
// SAFETY: we are not blocking the thread as this is an unbounded channel
if events_tx.send_blocking(result).is_err() {
error!(%location_id, "Unable to send watcher event to location manager;");
}
} else {
error!(
"Tried to send location file system events to a closed channel: <id='{}'",
location.id
);
error!(%location_id, "Tried to send file system events to a closed channel;");
}
},
Config::default(),
)?;
let handle = tokio::spawn(Self::handle_watch_events(
location.id,
Uuid::from_slice(&location.pub_id)?,
node,
library,
events_rx,
ignore_path_rx,
stop_rx,
));
let handle = spawn({
let events_rx = events_rx.clone();
let ignore_path_rx = ignore_path_rx.clone();
let stop_rx = stop_rx.clone();
async move {
while let Err(e) = spawn(
Self::handle_watch_events(
location_id,
location_pub_id,
Arc::clone(&node),
Arc::clone(&library),
events_rx.clone(),
ignore_path_rx.clone(),
stop_rx.clone(),
)
.in_current_span(),
)
.await
{
if e.is_panic() {
error!(?e, "Location watcher panicked;");
} else {
trace!("Location watcher received shutdown signal and will exit...");
break;
}
trace!("Restarting location watcher processing task...");
}
info!("Location watcher gracefully shutdown");
}
.in_current_span()
});
Ok(Self {
id: location.id,
path: maybe_missing(location.path, "location.path")?,
location_id,
location_path,
watcher,
ignore_path_tx,
handle: Some(handle),
stop_tx: Some(stop_tx),
stop_tx,
})
}
@ -139,157 +178,226 @@ impl LocationWatcher {
location_pub_id: Uuid,
node: Arc<Node>,
library: Arc<Library>,
mut events_rx: mpsc::UnboundedReceiver<notify::Result<Event>>,
mut ignore_path_rx: mpsc::UnboundedReceiver<IgnorePath>,
mut stop_rx: oneshot::Receiver<()>,
events_rx: chan::Receiver<notify::Result<Event>>,
ignore_path_rx: chan::Receiver<IgnorePath>,
stop_rx: chan::Receiver<()>,
) {
let mut event_handler = Handler::new(location_id, &library, &node);
enum StreamMessage {
NewEvent(notify::Result<Event>),
NewIgnorePath(IgnorePath),
Tick,
Stop,
}
let mut event_handler = Handler::new(location_id, Arc::clone(&library), Arc::clone(&node));
let mut last_event_at = Instant::now();
let mut cached_indexer_ruler = None;
let mut cached_location_path = None;
let mut paths_to_ignore = HashSet::new();
let mut handler_interval = interval_at(Instant::now() + HUNDRED_MILLIS, HUNDRED_MILLIS);
let mut handler_tick_interval =
interval_at(Instant::now() + HUNDRED_MILLIS, HUNDRED_MILLIS);
// In case of doubt check: https://docs.rs/tokio/latest/tokio/time/enum.MissedTickBehavior.html
handler_interval.set_missed_tick_behavior(MissedTickBehavior::Delay);
loop {
select! {
Some(event) = events_rx.recv() => {
match event {
Ok(event) => {
debug!("[Debug - handle_watch_events] Received event: {:#?}", event);
if let Err(e) = Self::handle_single_event(
location_id,
location_pub_id,
event,
&mut event_handler,
&node,
&library,
&paths_to_ignore,
).await {
error!("Failed to handle location file system event: \
<id='{location_id}', error='{e:#?}'>",
);
}
}
Err(e) => {
error!("watch error: {:#?}", e);
}
handler_tick_interval.set_missed_tick_behavior(MissedTickBehavior::Delay);
let mut msg_stream = pin!((
events_rx.map(StreamMessage::NewEvent),
ignore_path_rx.map(StreamMessage::NewIgnorePath),
IntervalStream::new(handler_tick_interval).map(|_| StreamMessage::Tick),
stop_rx.map(|()| StreamMessage::Stop),
)
.merge());
while let Some(msg) = msg_stream.next().await {
match msg {
StreamMessage::NewEvent(Ok(event)) => {
if let Err(e) = get_cached_indexer_ruler_and_location_path(
location_id,
&mut cached_indexer_ruler,
&mut cached_location_path,
&last_event_at,
&library.db,
)
.await
{
error!(?e, "Failed to get indexer ruler;");
}
last_event_at = Instant::now();
if let Err(e) = Self::handle_single_event(
location_pub_id,
cached_location_path.as_deref(),
event,
&mut event_handler,
&node,
&paths_to_ignore,
cached_indexer_ruler.as_ref(),
)
.await
{
error!(?e, "Failed to handle location file system event;");
}
}
Some((path, ignore)) = ignore_path_rx.recv() => {
if ignore {
StreamMessage::NewEvent(Err(e)) => error!(?e, "Watcher error;"),
StreamMessage::NewIgnorePath((path, should_ignore)) => {
if should_ignore {
paths_to_ignore.insert(path);
} else {
paths_to_ignore.remove(&path);
}
}
_ = handler_interval.tick() => {
event_handler.tick().await;
}
StreamMessage::Tick => event_handler.tick().await,
_ = &mut stop_rx => {
debug!("Stop Location Manager event handler for location: <id='{}'>", location_id);
break
StreamMessage::Stop => {
debug!("Stopping Location Manager event handler for location");
break;
}
}
}
}
async fn handle_single_event<'lib>(
location_id: location::id::Type,
#[instrument(skip_all, fields(?event, ?ignore_paths, ?location_path))]
async fn handle_single_event(
location_pub_id: Uuid,
location_path: Option<&Path>,
event: Event,
event_handler: &mut impl EventHandler<'lib>,
node: &'lib Node,
_library: &'lib Library,
event_handler: &mut impl EventHandler,
node: &Node,
ignore_paths: &HashSet<PathBuf>,
indexer_ruler: Option<&IndexerRuler>,
) -> Result<(), LocationManagerError> {
debug!("Event: {:#?}", event);
if !check_event(&event, ignore_paths) {
if reject_event(&event, ignore_paths, location_path, indexer_ruler).await {
return Ok(());
}
// let Some(location) = find_location(library, location_id)
// .include(location_with_indexer_rules::include())
// .exec()
// .await?
// else {
// warn!("Tried to handle event for unknown location: <id='{location_id}'>");
// return Ok(());
// };
if !node.locations.is_online(&location_pub_id).await {
warn!("Tried to handle event for offline location: <id='{location_id}'>");
warn!("Tried to handle event for offline location");
return Ok(());
}
// debug!("Handling event: {:#?}", event);
event_handler.handle_event(event).await
}
pub(super) fn ignore_path(
&self,
path: PathBuf,
ignore: bool,
) -> Result<(), LocationManagerError> {
self.ignore_path_tx.send((path, ignore)).map_err(Into::into)
#[instrument(
skip(self, path),
fields(
location_id = %self.location_id,
location_path = %self.location_path.display(),
path = %path.display(),
),
)]
pub(super) async fn ignore_path(&self, path: PathBuf, ignore: bool) {
self.ignore_path_tx
.send((path, ignore))
.await
.expect("Location watcher ignore path channel closed");
}
pub(super) fn check_path(&self, path: impl AsRef<Path>) -> bool {
Path::new(&self.path) == path.as_ref()
self.location_path == path.as_ref()
}
#[instrument(
skip(self),
fields(
location_id = %self.location_id,
location_path = %self.location_path.display(),
),
)]
pub(super) fn watch(&mut self) {
let path = &self.path;
debug!("Start watching location: (path: {path})");
trace!("Start watching location");
if let Err(e) = self
.watcher
.watch(Path::new(path), RecursiveMode::Recursive)
.watch(self.location_path.as_path(), RecursiveMode::Recursive)
{
error!("Unable to watch location: (path: {path}, error: {e:#?})");
error!(?e, "Unable to watch location;");
} else {
debug!("Now watching location: (path: {path})");
trace!("Now watching location");
}
}
#[instrument(
skip(self),
fields(
location_id = %self.location_id,
location_path = %self.location_path.display(),
),
)]
pub(super) fn unwatch(&mut self) {
let path = &self.path;
if let Err(e) = self.watcher.unwatch(Path::new(path)) {
if let Err(e) = self.watcher.unwatch(self.location_path.as_path()) {
/**************************************** TODO: ****************************************
* According to an unit test, this error may occur when a subdirectory is removed *
* and we try to unwatch the parent directory then we have to check the implications *
* of unwatch error for this case. *
**************************************************************************************/
error!("Unable to unwatch location: (path: {path}, error: {e:#?})",);
error!(?e, "Unable to unwatch location;");
} else {
debug!("Stop watching location: (path: {path})");
trace!("Stop watching location");
}
}
}
impl Drop for LocationWatcher {
fn drop(&mut self) {
if let Some(stop_tx) = self.stop_tx.take() {
if stop_tx.send(()).is_err() {
error!(
"Failed to send stop signal to location watcher: <id='{}'>",
self.id
);
}
// FIXME: change this Drop to async drop in the future
if let Some(handle) = self.handle.take() {
let stop_tx = self.stop_tx.clone();
spawn(async move {
stop_tx
.send(())
.await
.expect("Location watcher stop channel closed");
// FIXME: change this Drop to async drop in the future
if let Some(handle) = self.handle.take() {
if let Err(e) = block_in_place(move || Handle::current().block_on(handle)) {
error!("Failed to join watcher task: {e:#?}")
if let Err(e) = handle.await {
error!(?e, "Failed to join watcher task;");
}
}
});
}
}
}
async fn get_cached_indexer_ruler_and_location_path(
location_id: location::id::Type,
cached_indexer_ruler: &mut Option<IndexerRuler>,
location_path: &mut Option<PathBuf>,
last_event_at: &Instant,
db: &PrismaClient,
) -> Result<(), LocationManagerError> {
if cached_indexer_ruler.is_none() || last_event_at.elapsed() > THIRTY_SECONDS {
if let Some(location_with_indexer_rules::Data {
path,
indexer_rules,
..
}) = db
.location()
.find_unique(location::id::equals(location_id))
.include(location_with_indexer_rules::include())
.exec()
.await?
{
*cached_indexer_ruler = Some(
indexer_rules
.iter()
.map(|rule| IndexerRule::try_from(&rule.indexer_rule))
.collect::<Result<Vec<_>, _>>()
.map(IndexerRuler::new)?,
);
*location_path = path.map(Into::into);
}
}
Ok(())
}
/***************************************************************************************************
* Some tests to validate our assumptions of events through different file systems *
****************************************************************************************************
@ -412,26 +520,23 @@ mod tests {
expected_event: EventKind,
) {
let path = path.as_ref();
debug!(
"Expecting event: {expected_event:#?} at path: {}",
path.display()
);
debug!(?expected_event, path = %path.display());
let mut tries = 0;
loop {
match events_rx.try_recv() {
Ok(maybe_event) => {
let event = maybe_event.expect("Failed to receive event");
debug!("Received event: {event:#?}");
debug!(?event, "Received event;");
// Using `ends_with` and removing root path here due to a weird edge case on CI tests at MacOS
if event.paths[0].ends_with(path.iter().skip(1).collect::<PathBuf>())
&& event.kind == expected_event
{
debug!("Received expected event: {expected_event:#?}");
debug!("Received expected event");
break;
}
}
Err(e) => {
debug!("No event yet: {e:#?}");
debug!(?e, "No event yet;");
tries += 1;
sleep(Duration::from_millis(100)).await;
}
@ -451,7 +556,7 @@ mod tests {
watcher
.watch(root_dir.path(), notify::RecursiveMode::Recursive)
.expect("Failed to watch root directory");
debug!("Now watching {}", root_dir.path().display());
debug!(root = %root_dir.path().display(), "Now watching;");
let file_path = root_dir.path().join("test.txt");
fs::write(&file_path, "test").await.unwrap();
@ -475,9 +580,9 @@ mod tests {
)
.await;
debug!("Unwatching root directory: {}", root_dir.path().display());
debug!(root = %root_dir.path().display(), "Unwatching root directory;");
if let Err(e) = watcher.unwatch(root_dir.path()) {
error!("Failed to unwatch root directory: {e:#?}");
error!(?e, "Failed to unwatch root directory;");
}
}
@ -489,7 +594,7 @@ mod tests {
watcher
.watch(root_dir.path(), notify::RecursiveMode::Recursive)
.expect("Failed to watch root directory");
debug!("Now watching {}", root_dir.path().display());
debug!(root = %root_dir.path().display(), "Now watching;");
let dir_path = root_dir.path().join("inner");
fs::create_dir(&dir_path)
@ -505,9 +610,9 @@ mod tests {
#[cfg(target_os = "linux")]
expect_event(events_rx, &dir_path, EventKind::Create(CreateKind::Folder)).await;
debug!("Unwatching root directory: {}", root_dir.path().display());
debug!(root = %root_dir.path().display(), "Unwatching root directory;");
if let Err(e) = watcher.unwatch(root_dir.path()) {
error!("Failed to unwatch root directory: {e:#?}");
error!(?e, "Failed to unwatch root directory;");
}
}
@ -522,7 +627,7 @@ mod tests {
watcher
.watch(root_dir.path(), notify::RecursiveMode::Recursive)
.expect("Failed to watch root directory");
debug!("Now watching {}", root_dir.path().display());
debug!(root = %root_dir.path().display(), "Now watching;");
let mut file = fs::OpenOptions::new()
.append(true)
@ -556,9 +661,9 @@ mod tests {
)
.await;
debug!("Unwatching root directory: {}", root_dir.path().display());
debug!(root = %root_dir.path().display(), "Unwatching root directory;");
if let Err(e) = watcher.unwatch(root_dir.path()) {
error!("Failed to unwatch root directory: {e:#?}");
error!(?e, "Failed to unwatch root directory;");
}
}
@ -573,7 +678,7 @@ mod tests {
watcher
.watch(root_dir.path(), notify::RecursiveMode::Recursive)
.expect("Failed to watch root directory");
debug!("Now watching {}", root_dir.path().display());
debug!(root = %root_dir.path().display(), "Now watching;");
let new_file_name = root_dir.path().join("test2.txt");
@ -605,9 +710,9 @@ mod tests {
)
.await;
debug!("Unwatching root directory: {}", root_dir.path().display());
debug!(root = %root_dir.path().display(), "Unwatching root directory;");
if let Err(e) = watcher.unwatch(root_dir.path()) {
error!("Failed to unwatch root directory: {e:#?}");
error!(?e, "Failed to unwatch root directory;");
}
}
@ -624,7 +729,7 @@ mod tests {
watcher
.watch(root_dir.path(), notify::RecursiveMode::Recursive)
.expect("Failed to watch root directory");
debug!("Now watching {}", root_dir.path().display());
debug!(root = %root_dir.path().display(), "Now watching;");
let new_dir_name = root_dir.path().join("inner2");
@ -656,9 +761,9 @@ mod tests {
)
.await;
debug!("Unwatching root directory: {}", root_dir.path().display());
debug!(root = %root_dir.path().display(), "Unwatching root directory;");
if let Err(e) = watcher.unwatch(root_dir.path()) {
error!("Failed to unwatch root directory: {e:#?}");
error!(?e, "Failed to unwatch root directory;");
}
}
@ -673,7 +778,7 @@ mod tests {
watcher
.watch(root_dir.path(), notify::RecursiveMode::Recursive)
.expect("Failed to watch root directory");
debug!("Now watching {}", root_dir.path().display());
debug!(root = %root_dir.path().display(), "Now watching;");
fs::remove_file(&file_path)
.await
@ -696,9 +801,9 @@ mod tests {
)
.await;
debug!("Unwatching root directory: {}", root_dir.path().display());
debug!(root = %root_dir.path().display(), "Unwatching root directory;");
if let Err(e) = watcher.unwatch(root_dir.path()) {
error!("Failed to unwatch root directory: {e:#?}");
error!(?e, "Failed to unwatch root directory;");
}
}
@ -723,11 +828,11 @@ mod tests {
watcher
.watch(root_dir.path(), notify::RecursiveMode::Recursive)
.expect("Failed to watch root directory");
debug!("Now watching {}", root_dir.path().display());
debug!(root = %root_dir.path().display(), "Now watching;");
debug!("First unwatching the inner directory before removing it");
if let Err(e) = watcher.unwatch(&dir_path) {
error!("Failed to unwatch inner directory: {e:#?}");
error!(?e, "Failed to unwatch inner directory;");
}
fs::remove_dir(&dir_path)
@ -751,9 +856,9 @@ mod tests {
)
.await;
debug!("Unwatching root directory: {}", root_dir.path().display());
debug!(root = %root_dir.path().display(), "Unwatching root directory;");
if let Err(e) = watcher.unwatch(root_dir.path()) {
error!("Failed to unwatch root directory: {e:#?}");
error!(?e, "Failed to unwatch root directory;");
}
}
}

View file

@ -6,19 +6,7 @@ use crate::{
indexer::reverse_update_directories_sizes, location_with_indexer_rules,
manager::LocationManagerError, scan_location_sub_path, update_location_size,
},
object::{
media::{
exif_data_image_to_query_params,
exif_metadata_extractor::{can_extract_exif_data_for_image, extract_exif_data},
ffmpeg_metadata_extractor::{
can_extract_ffmpeg_data_for_audio, can_extract_ffmpeg_data_for_video,
extract_ffmpeg_data, save_ffmpeg_data,
},
old_thumbnail::get_indexed_thumbnail_path,
},
old_file_identifier::FileMetadata,
validation::hash::file_checksum,
},
object::validation::hash::file_checksum,
Node,
};
@ -28,21 +16,32 @@ use sd_core_file_path_helper::{
loose_find_existing_file_path_params, path_is_hidden, FilePathError, FilePathMetadata,
IsolatedFilePathData, MetadataExt,
};
use sd_core_prisma_helpers::file_path_with_object;
use sd_core_heavy_lifting::{
file_identifier::FileMetadata,
media_processor::{
exif_media_data, ffmpeg_media_data, generate_single_thumbnail, get_thumbnails_directory,
ThumbnailKind,
},
};
use sd_core_indexer_rules::{
seed::{GitIgnoreRules, GITIGNORE},
IndexerRuler, RulerDecision,
};
use sd_core_prisma_helpers::{file_path_with_object, object_ids, CasId, ObjectPubId};
use sd_file_ext::{
extensions::{AudioExtension, ImageExtension, VideoExtension},
kind::ObjectKind,
};
use sd_prisma::{
prisma::{exif_data, file_path, location, object},
prisma::{file_path, location, object},
prisma_sync,
};
use sd_sync::OperationFactory;
use sd_utils::{
db::{inode_from_db, inode_to_db, maybe_missing},
error::FileIOError,
msgpack, uuid_to_bytes,
msgpack,
};
#[cfg(target_family = "unix")]
@ -61,31 +60,107 @@ use std::{
};
use chrono::{DateTime, FixedOffset, Local, Utc};
use futures_concurrency::future::Join;
use notify::Event;
use tokio::{
fs,
io::{self, ErrorKind},
spawn,
time::Instant,
time::{sleep, Instant},
};
use tracing::{debug, error, trace, warn};
use uuid::Uuid;
use tracing::{error, instrument, trace, warn};
use super::{INode, HUNDRED_MILLIS};
use super::{INode, HUNDRED_MILLIS, ONE_SECOND};
pub(super) fn check_event(event: &Event, ignore_paths: &HashSet<PathBuf>) -> bool {
pub(super) async fn reject_event(
event: &Event,
ignore_paths: &HashSet<PathBuf>,
location_path: Option<&Path>,
indexer_ruler: Option<&IndexerRuler>,
) -> bool {
// if path includes .DS_Store, .spacedrive file creation or is in the `ignore_paths` set, we ignore
!event.paths.iter().any(|p| {
if event.paths.iter().any(|p| {
p.file_name()
.and_then(OsStr::to_str)
.map_or(false, |name| name == ".DS_Store" || name == ".spacedrive")
|| ignore_paths.contains(p)
})
}) {
trace!("Rejected by ignored paths");
return true;
}
if let Some(indexer_ruler) = indexer_ruler {
let ruler_decisions = event
.paths
.iter()
.map(|path| async move { (path, fs::metadata(path).await) })
.collect::<Vec<_>>()
.join()
.await
.into_iter()
.filter_map(|(path, res)| {
res.map(|metadata| (path, metadata))
.map_err(|e| {
if e.kind() != ErrorKind::NotFound {
error!(?e, path = %path.display(), "Failed to get metadata for path;");
}
})
.ok()
})
.map(|(path, metadata)| {
let mut independent_ruler = indexer_ruler.clone();
async move {
let path_to_check_gitignore = if metadata.is_dir() {
Some(path.as_path())
} else {
path.parent()
};
if let (Some(path_to_check_gitignore), Some(location_path)) =
(path_to_check_gitignore, location_path.as_ref())
{
if independent_ruler.has_system(&GITIGNORE) {
if let Some(rules) = GitIgnoreRules::get_rules_if_in_git_repo(
location_path,
path_to_check_gitignore,
)
.await
{
trace!("Found gitignore rules to follow");
independent_ruler.extend(rules.map(Into::into));
}
}
}
independent_ruler.evaluate_path(path, &metadata).await
}
})
.collect::<Vec<_>>()
.join()
.await;
if !ruler_decisions.is_empty()
&& ruler_decisions.into_iter().all(|res| {
matches!(
res.map_err(|e| trace!(?e, "Failed to evaluate path;"))
// In case of error, we accept the path as a safe default
.unwrap_or(RulerDecision::Accept),
RulerDecision::Reject
)
}) {
trace!("Rejected by indexer ruler");
return true;
}
}
false
}
#[instrument(skip_all, fields(path = %path.as_ref().display()), err)]
pub(super) async fn create_dir(
location_id: location::id::Type,
path: impl AsRef<Path>,
path: impl AsRef<Path> + Send,
metadata: &Metadata,
node: &Arc<Node>,
library: &Arc<Library>,
@ -94,17 +169,13 @@ pub(super) async fn create_dir(
.include(location_with_indexer_rules::include())
.exec()
.await?
.ok_or(LocationManagerError::MissingLocation(location_id))?;
.ok_or(LocationManagerError::LocationNotFound(location_id))?;
let path = path.as_ref();
let location_path = maybe_missing(&location.path, "location.path")?;
trace!(
"Location: <root_path ='{}'> creating directory: {}",
location_path,
path.display()
);
trace!(new_directory = %path.display(), "Creating directory;");
let iso_file_path = IsolatedFilePathData::new(location.id, location_path, path, true)?;
@ -112,10 +183,8 @@ pub(super) async fn create_dir(
if !parent_iso_file_path.is_root()
&& !check_file_path_exists::<FilePathError>(&parent_iso_file_path, &library.db).await?
{
warn!(
"Watcher found a directory without parent: {}",
&iso_file_path
);
warn!(%iso_file_path, "Watcher found a directory without parent;");
return Ok(());
};
@ -123,8 +192,6 @@ pub(super) async fn create_dir(
.materialized_path_for_children()
.expect("We're in the create dir function lol");
debug!("Creating path: {}", iso_file_path);
create_file_path(
library,
iso_file_path.to_parts(),
@ -133,8 +200,24 @@ pub(super) async fn create_dir(
)
.await?;
// scan the new directory
scan_location_sub_path(node, library, location, &children_materialized_path).await?;
spawn({
let node = Arc::clone(node);
let library = Arc::clone(library);
async move {
// Wait a bit for any files being moved into the new directory to be indexed by the watcher
sleep(ONE_SECOND).await;
trace!(%iso_file_path, "Scanning new directory;");
// scan the new directory
if let Err(e) =
scan_location_sub_path(&node, &library, location, &children_materialized_path).await
{
error!(?e, "Failed to scan new directory;");
}
}
});
invalidate_query!(library, "search.paths");
invalidate_query!(library, "search.objects");
@ -142,9 +225,10 @@ pub(super) async fn create_dir(
Ok(())
}
#[instrument(skip_all, fields(path = %path.as_ref().display()), err)]
pub(super) async fn create_file(
location_id: location::id::Type,
path: impl AsRef<Path>,
path: impl AsRef<Path> + Send,
metadata: &Metadata,
node: &Arc<Node>,
library: &Arc<Library>,
@ -162,8 +246,8 @@ pub(super) async fn create_file(
async fn inner_create_file(
location_id: location::id::Type,
location_path: impl AsRef<Path>,
path: impl AsRef<Path>,
location_path: impl AsRef<Path> + Send,
path: impl AsRef<Path> + Send,
metadata: &Metadata,
node: &Arc<Node>,
library @ Library {
@ -176,11 +260,7 @@ async fn inner_create_file(
let path = path.as_ref();
let location_path = location_path.as_ref();
trace!(
"Location: <root_path ='{}'> creating file: {}",
location_path.display(),
path.display()
);
trace!(new_file = %path.display(), "Creating file;");
let iso_file_path = IsolatedFilePathData::new(location_id, location_path, path, false)?;
let iso_file_path_parts = iso_file_path.to_parts();
@ -200,7 +280,8 @@ async fn inner_create_file(
.exec()
.await?
{
trace!("File already exists with that inode: {}", iso_file_path);
trace!(%iso_file_path, "File already exists with that inode;");
return inner_update_file(location_path, &file_path, path, node, library, None).await;
// If we can't find an existing file with the same inode, we check if there is a file with the same path
@ -216,10 +297,8 @@ async fn inner_create_file(
.exec()
.await?
{
trace!(
"File already exists with that iso_file_path: {}",
iso_file_path
);
trace!(%iso_file_path, "File already exists with that iso_file_path;");
return inner_update_file(
location_path,
&file_path,
@ -235,7 +314,8 @@ async fn inner_create_file(
if !parent_iso_file_path.is_root()
&& !check_file_path_exists::<FilePathError>(&parent_iso_file_path, db).await?
{
warn!("Watcher found a file without parent: {}", &iso_file_path);
warn!(%iso_file_path, "Watcher found a file without parent;");
return Ok(());
};
@ -246,17 +326,13 @@ async fn inner_create_file(
fs_metadata,
} = FileMetadata::new(&location_path, &iso_file_path).await?;
debug!("Creating path: {}", iso_file_path);
let created_file =
create_file_path(library, iso_file_path_parts, cas_id.clone(), metadata).await?;
object::select!(object_ids { id pub_id });
let existing_object = db
.object()
.find_first(vec![object::file_paths::some(vec![
file_path::cas_id::equals(cas_id.clone()),
file_path::cas_id::equals(cas_id.clone().map(Into::into)),
file_path::pub_id::not(created_file.pub_id.clone()),
])])
.select(object_ids::select())
@ -269,16 +345,17 @@ async fn inner_create_file(
} = if let Some(object) = existing_object {
object
} else {
let pub_id = uuid_to_bytes(Uuid::new_v4());
let pub_id: ObjectPubId = ObjectPubId::new();
let date_created: DateTime<FixedOffset> =
DateTime::<Local>::from(fs_metadata.created_or_now()).into();
let int_kind = kind as i32;
sync.write_ops(
db,
(
sync.shared_create(
prisma_sync::object::SyncId {
pub_id: pub_id.clone(),
pub_id: pub_id.to_db(),
},
[
(object::date_created::NAME, msgpack!(date_created)),
@ -287,7 +364,7 @@ async fn inner_create_file(
),
db.object()
.create(
pub_id.to_vec(),
pub_id.into(),
vec![
object::date_created::set(Some(date_created)),
object::kind::set(Some(int_kind)),
@ -330,16 +407,21 @@ async fn inner_create_file(
spawn({
let extension = extension.clone();
let path = path.to_path_buf();
let node = node.clone();
let thumbnails_directory =
get_thumbnails_directory(node.config.data_directory());
let library_id = *library_id;
async move {
if let Err(e) = node
.thumbnailer
.generate_single_indexed_thumbnail(&extension, cas_id, path, library_id)
.await
if let Err(e) = generate_single_thumbnail(
&thumbnails_directory,
extension,
cas_id,
path,
ThumbnailKind::Indexed(library_id),
)
.await
{
error!("Failed to generate thumbnail in the watcher: {e:#?}");
error!(?e, "Failed to generate thumbnail in the watcher;");
}
}
});
@ -349,34 +431,15 @@ async fn inner_create_file(
match kind {
ObjectKind::Image => {
if let Ok(image_extension) = ImageExtension::from_str(&extension) {
if can_extract_exif_data_for_image(&image_extension) {
if let Ok(Some(exif_data)) = extract_exif_data(path)
if exif_media_data::can_extract(image_extension) {
if let Ok(Some(exif_data)) = exif_media_data::extract(path)
.await
.map_err(|e| error!("Failed to extract media data: {e:#?}"))
.map_err(|e| error!(?e, "Failed to extract image media data;"))
{
let (sync_params, db_params) =
exif_data_image_to_query_params(exif_data);
sync.write_ops(
exif_media_data::save(
[(exif_data, object_id, object_pub_id.into())],
db,
(
sync.shared_create(
prisma_sync::exif_data::SyncId {
object: prisma_sync::object::SyncId {
pub_id: object_pub_id.clone(),
},
},
sync_params,
),
db.exif_data().upsert(
exif_data::object_id::equals(object_id),
exif_data::create(
object::id::equals(object_id),
db_params.clone(),
),
db_params,
),
),
sync,
)
.await?;
}
@ -386,12 +449,12 @@ async fn inner_create_file(
ObjectKind::Audio => {
if let Ok(audio_extension) = AudioExtension::from_str(&extension) {
if can_extract_ffmpeg_data_for_audio(&audio_extension) {
if let Ok(ffmpeg_data) = extract_ffmpeg_data(path)
if ffmpeg_media_data::can_extract_for_audio(audio_extension) {
if let Ok(ffmpeg_data) = ffmpeg_media_data::extract(path)
.await
.map_err(|e| error!("Failed to extract media data: {e:#?}"))
.map_err(|e| error!(?e, "Failed to extract audio media data;"))
{
save_ffmpeg_data([(ffmpeg_data, object_id)], db).await?;
ffmpeg_media_data::save([(ffmpeg_data, object_id)], db).await?;
}
}
}
@ -399,12 +462,12 @@ async fn inner_create_file(
ObjectKind::Video => {
if let Ok(video_extension) = VideoExtension::from_str(&extension) {
if can_extract_ffmpeg_data_for_video(&video_extension) {
if let Ok(ffmpeg_data) = extract_ffmpeg_data(path)
if ffmpeg_media_data::can_extract_for_video(video_extension) {
if let Ok(ffmpeg_data) = ffmpeg_media_data::extract(path)
.await
.map_err(|e| error!("Failed to extract media data: {e:#?}"))
.map_err(|e| error!(?e, "Failed to extract video media data;"))
{
save_ffmpeg_data([(ffmpeg_data, object_id)], db).await?;
ffmpeg_media_data::save([(ffmpeg_data, object_id)], db).await?;
}
}
}
@ -422,13 +485,14 @@ async fn inner_create_file(
Ok(())
}
#[instrument(skip_all, fields(path = %path.as_ref().display()), err)]
pub(super) async fn update_file(
location_id: location::id::Type,
full_path: impl AsRef<Path>,
path: impl AsRef<Path> + Send,
node: &Arc<Node>,
library: &Arc<Library>,
) -> Result<(), LocationManagerError> {
let full_path = full_path.as_ref();
let full_path = path.as_ref();
let metadata = match fs::metadata(full_path).await {
Ok(metadata) => metadata,
@ -464,16 +528,16 @@ pub(super) async fn update_file(
)
.await
}
.map(|_| {
.map(|()| {
invalidate_query!(library, "search.paths");
invalidate_query!(library, "search.objects");
})
}
async fn inner_update_file(
location_path: impl AsRef<Path>,
location_path: impl AsRef<Path> + Send,
file_path: &file_path_with_object::Data,
full_path: impl AsRef<Path>,
full_path: impl AsRef<Path> + Send,
node: &Arc<Node>,
library @ Library { db, sync, .. }: &Library,
maybe_new_inode: Option<INode>,
@ -485,9 +549,9 @@ async fn inner_update_file(
inode_from_db(&maybe_missing(file_path.inode.as_ref(), "file_path.inode")?[0..8]);
trace!(
"Location: <root_path ='{}'> updating file: {}",
location_path.display(),
full_path.display()
location_path = %location_path.display(),
path = %full_path.display(),
"Updating file;",
);
let iso_file_path = IsolatedFilePathData::try_from(file_path)?;
@ -514,7 +578,7 @@ async fn inner_update_file(
};
let is_hidden = path_is_hidden(full_path, &fs_metadata);
if file_path.cas_id != cas_id {
if file_path.cas_id.as_deref() != cas_id.as_ref().map(CasId::as_str) {
let (sync_params, db_params): (Vec<_>, Vec<_>) = {
use file_path::*;
@ -637,7 +701,7 @@ async fn inner_update_file(
.await?;
}
} else {
let pub_id = uuid_to_bytes(Uuid::new_v4());
let pub_id = ObjectPubId::new();
let date_created: DateTime<FixedOffset> =
DateTime::<Local>::from(fs_metadata.created_or_now()).into();
@ -646,7 +710,7 @@ async fn inner_update_file(
(
sync.shared_create(
prisma_sync::object::SyncId {
pub_id: pub_id.clone(),
pub_id: pub_id.to_db(),
},
[
(object::date_created::NAME, msgpack!(date_created)),
@ -654,7 +718,7 @@ async fn inner_update_file(
],
),
db.object().create(
pub_id.to_vec(),
pub_id.to_db(),
vec![
object::date_created::set(Some(date_created)),
object::kind::set(Some(int_kind)),
@ -672,49 +736,57 @@ async fn inner_update_file(
},
file_path::object::NAME,
msgpack!(prisma_sync::object::SyncId {
pub_id: pub_id.clone()
pub_id: pub_id.to_db()
}),
),
db.file_path().update(
file_path::pub_id::equals(file_path.pub_id.clone()),
vec![file_path::object::connect(object::pub_id::equals(pub_id))],
vec![file_path::object::connect(object::pub_id::equals(
pub_id.into(),
))],
),
)
.await?;
}
if let Some(old_cas_id) = &file_path.cas_id {
if let Some(old_cas_id) = file_path.cas_id.as_ref().map(CasId::from) {
// if this file had a thumbnail previously, we update it to match the new content
if library.thumbnail_exists(node, old_cas_id).await? {
if library.thumbnail_exists(node, &old_cas_id).await? {
if let Some(ext) = file_path.extension.clone() {
// Running in a detached task as thumbnail generation can take a while and we don't want to block the watcher
if let Some(cas_id) = cas_id {
let node = Arc::clone(node);
let path = full_path.to_path_buf();
let library_id = library.id;
let old_cas_id = old_cas_id.clone();
let old_cas_id = old_cas_id.to_owned();
spawn(async move {
let thumbnails_directory =
get_thumbnails_directory(node.config.data_directory());
let was_overwritten = old_cas_id == cas_id;
if let Err(e) = node
.thumbnailer
.generate_single_indexed_thumbnail(
&ext, cas_id, path, library_id,
)
.await
if let Err(e) = generate_single_thumbnail(
&thumbnails_directory,
ext.clone(),
cas_id,
path,
ThumbnailKind::Indexed(library_id),
)
.await
{
error!("Failed to generate thumbnail in the watcher: {e:#?}");
error!(?e, "Failed to generate thumbnail in the watcher;");
}
// If only a few bytes changed, cas_id will probably remains intact
// so we overwrote our previous thumbnail, so we can't remove it
if !was_overwritten {
// remove the old thumbnail as we're generating a new one
let thumb_path =
get_indexed_thumbnail_path(&node, &old_cas_id, library_id);
let thumb_path = ThumbnailKind::Indexed(library_id)
.compute_path(node.config.data_directory(), &old_cas_id);
if let Err(e) = fs::remove_file(&thumb_path).await {
error!(
"Failed to remove old thumbnail: {:#?}",
FileIOError::from((thumb_path, e))
e = ?FileIOError::from((thumb_path, e)),
"Failed to remove old thumbnail;",
);
}
}
@ -728,34 +800,15 @@ async fn inner_update_file(
match kind {
ObjectKind::Image => {
if let Ok(image_extension) = ImageExtension::from_str(extension) {
if can_extract_exif_data_for_image(&image_extension) {
if let Ok(Some(exif_data)) = extract_exif_data(full_path)
if exif_media_data::can_extract(image_extension) {
if let Ok(Some(exif_data)) = exif_media_data::extract(full_path)
.await
.map_err(|e| error!("Failed to extract media data: {e:#?}"))
.map_err(|e| error!(?e, "Failed to extract media data;"))
{
let (sync_params, db_params) =
exif_data_image_to_query_params(exif_data);
sync.write_ops(
exif_media_data::save(
[(exif_data, object.id, object.pub_id.as_slice().into())],
db,
(
sync.shared_create(
prisma_sync::exif_data::SyncId {
object: prisma_sync::object::SyncId {
pub_id: object.pub_id.clone(),
},
},
sync_params,
),
db.exif_data().upsert(
exif_data::object_id::equals(object.id),
exif_data::create(
object::id::equals(object.id),
db_params.clone(),
),
db_params,
),
),
sync,
)
.await?;
}
@ -765,12 +818,12 @@ async fn inner_update_file(
ObjectKind::Audio => {
if let Ok(audio_extension) = AudioExtension::from_str(extension) {
if can_extract_ffmpeg_data_for_audio(&audio_extension) {
if let Ok(ffmpeg_data) = extract_ffmpeg_data(full_path)
if ffmpeg_media_data::can_extract_for_audio(audio_extension) {
if let Ok(ffmpeg_data) = ffmpeg_media_data::extract(full_path)
.await
.map_err(|e| error!("Failed to extract media data: {e:#?}"))
.map_err(|e| error!(?e, "Failed to extract media data;"))
{
save_ffmpeg_data([(ffmpeg_data, object.id)], db).await?;
ffmpeg_media_data::save([(ffmpeg_data, object.id)], db).await?;
}
}
}
@ -778,12 +831,12 @@ async fn inner_update_file(
ObjectKind::Video => {
if let Ok(video_extension) = VideoExtension::from_str(extension) {
if can_extract_ffmpeg_data_for_video(&video_extension) {
if let Ok(ffmpeg_data) = extract_ffmpeg_data(full_path)
if ffmpeg_media_data::can_extract_for_video(video_extension) {
if let Ok(ffmpeg_data) = ffmpeg_media_data::extract(full_path)
.await
.map_err(|e| error!("Failed to extract media data: {e:#?}"))
.map_err(|e| error!(?e, "Failed to extract media data;"))
{
save_ffmpeg_data([(ffmpeg_data, object.id)], db).await?;
ffmpeg_media_data::save([(ffmpeg_data, object.id)], db).await?;
}
}
}
@ -823,10 +876,15 @@ async fn inner_update_file(
Ok(())
}
#[instrument(
skip_all,
fields(new_path = %new_path.as_ref().display(), old_path = %old_path.as_ref().display()),
err,
)]
pub(super) async fn rename(
location_id: location::id::Type,
new_path: impl AsRef<Path>,
old_path: impl AsRef<Path>,
new_path: impl AsRef<Path> + Send,
old_path: impl AsRef<Path> + Send,
new_path_metadata: Metadata,
library: &Library,
) -> Result<(), LocationManagerError> {
@ -841,7 +899,8 @@ pub(super) async fn rename(
let new_path_materialized_str =
extract_normalized_materialized_path_str(location_id, &location_path, new_path)?;
// Renaming a file could potentially be a move to another directory, so we check if our parent changed
// Renaming a file could potentially be a move to another directory,
// so we check if our parent changed
if old_path_materialized_str != new_path_materialized_str
&& !check_file_path_exists::<FilePathError>(
&IsolatedFilePathData::new(location_id, &location_path, new_path, true)?.parent(),
@ -851,7 +910,7 @@ pub(super) async fn rename(
{
return Err(LocationManagerError::MoveError {
path: new_path.into(),
reason: "parent directory does not exist".into(),
reason: "parent directory does not exist",
});
}
@ -890,7 +949,7 @@ pub(super) async fn rename(
.exec()
.await?;
let len = paths.len();
let total_paths_count = paths.len();
let (sync_params, db_params): (Vec<_>, Vec<_>) = paths
.into_iter()
.filter_map(|path| path.materialized_path.map(|mp| (path.id, path.pub_id, mp)))
@ -916,7 +975,7 @@ pub(super) async fn rename(
sync.write_ops(db, (sync_params, db_params)).await?;
trace!("Updated {len} file_paths");
trace!(%total_paths_count, "Updated file_paths;");
}
let is_hidden = path_is_hidden(new_path, &new_path_metadata);
@ -979,12 +1038,13 @@ pub(super) async fn rename(
Ok(())
}
#[instrument(skip_all, fields(path = %path.as_ref().display()), err)]
pub(super) async fn remove(
location_id: location::id::Type,
full_path: impl AsRef<Path>,
path: impl AsRef<Path> + Send,
library: &Library,
) -> Result<(), LocationManagerError> {
let full_path = full_path.as_ref();
let full_path = path.as_ref();
let location_path = extract_location_path(location_id, library).await?;
// if it doesn't exist either way, then we don't care
@ -1005,16 +1065,22 @@ pub(super) async fn remove(
remove_by_file_path(location_id, full_path, &file_path, library).await
}
pub(super) async fn remove_by_file_path(
async fn remove_by_file_path(
location_id: location::id::Type,
path: impl AsRef<Path>,
path: impl AsRef<Path> + Send,
file_path: &file_path::Data,
library: &Library,
) -> Result<(), LocationManagerError> {
// check file still exists on disk
match fs::metadata(path.as_ref()).await {
Ok(_) => {
todo!("file has changed in some way, re-identify it")
// It's possible that in the interval of time between the removal file event being
// received and we reaching this point, the file has been created again for some
// external reason, so we just error out and hope to receive this new create event
// later
return Err(LocationManagerError::FileStillExistsOnDisk(
path.as_ref().into(),
));
}
Err(e) if e.kind() == ErrorKind::NotFound => {
let Library { sync, db, .. } = library;
@ -1060,9 +1126,10 @@ pub(super) async fn remove_by_file_path(
Ok(())
}
#[instrument(skip_all, fields(path = %path.as_ref().display()), err)]
pub(super) async fn extract_inode_from_path(
location_id: location::id::Type,
path: impl AsRef<Path>,
path: impl AsRef<Path> + Send,
library: &Library,
) -> Result<INode, LocationManagerError> {
let path = path.as_ref();
@ -1070,7 +1137,7 @@ pub(super) async fn extract_inode_from_path(
.select(location::select!({ path }))
.exec()
.await?
.ok_or(LocationManagerError::MissingLocation(location_id))?;
.ok_or(LocationManagerError::LocationNotFound(location_id))?;
let location_path = maybe_missing(&location.path, "location.path")?;
@ -1095,6 +1162,7 @@ pub(super) async fn extract_inode_from_path(
)
}
#[instrument(skip_all, err)]
pub(super) async fn extract_location_path(
location_id: location::id::Type,
library: &Library,
@ -1104,12 +1172,12 @@ pub(super) async fn extract_location_path(
.exec()
.await?
.map_or(
Err(LocationManagerError::MissingLocation(location_id)),
Err(LocationManagerError::LocationNotFound(location_id)),
// NOTE: The following usage of `PathBuf` doesn't incur a new allocation so it's fine
|location| Ok(maybe_missing(location.path, "location.path")?.into()),
)
}
#[instrument(skip_all, err)]
pub(super) async fn recalculate_directories_size(
candidates: &mut HashMap<PathBuf, Instant>,
buffer: &mut Vec<(PathBuf, Instant)>,
@ -1129,7 +1197,7 @@ pub(super) async fn recalculate_directories_size(
.select(location::select!({ path }))
.exec()
.await?
.ok_or(LocationManagerError::MissingLocation(location_id))?
.ok_or(LocationManagerError::LocationNotFound(location_id))?
.path,
"location.path",
)?))
@ -1138,12 +1206,29 @@ pub(super) async fn recalculate_directories_size(
if let Some(location_path) = &location_path_cache {
if path != *location_path {
trace!(
"Reverse calculating directory sizes starting at {} until {}",
path.display(),
location_path.display(),
start_directory = %path.display(),
end_directory = %location_path.display(),
"Reverse calculating directory sizes;",
);
reverse_update_directories_sizes(path, location_id, location_path, library)
.await?;
let mut non_critical_errors = vec![];
reverse_update_directories_sizes(
path,
location_id,
location_path,
&library.db,
&library.sync,
&mut non_critical_errors,
)
.await
.map_err(sd_core_heavy_lifting::Error::from)?;
if !non_critical_errors.is_empty() {
error!(
?non_critical_errors,
"Reverse calculating directory sizes finished errors;",
);
}
should_invalidate = true;
} else {
should_update_location_size = true;

Some files were not shown because too many files have changed in this diff Show more