[ENG-939, ENG-1173] PDF Thumbnails (#1242)

* sd-pdf

* Process PDF blocking render inside a spawn_blocking
 - Load a single global Pdfium instance

* Migrate pdf thumb logic to sd-images
 - Replace block_in_place with spawn_blocking
 - Only load LibHeif once
 - Allow thumbnailer (both indexed and non-indexed locations) to process documents
 - Disable loading pdf viewer in Inspection in favour of loading it's thumbnail

* Try to load pdfium lib from absolute path

* Revert removed import due to rebase

* Small nitpick and some warnings

---------

Co-authored-by: Ericson Fogo Soares <ericson.ds999@gmail.com>
This commit is contained in:
Vítor Vasconcellos 2023-10-06 17:41:22 -03:00 committed by GitHub
parent 6f8eccdd99
commit 556cf1df63
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
20 changed files with 278 additions and 33 deletions

114
Cargo.lock generated
View file

@ -568,6 +568,7 @@ dependencies = [
"clang-sys",
"lazy_static",
"lazycell",
"log",
"peeking_take_while",
"proc-macro2",
"quote",
@ -575,6 +576,7 @@ dependencies = [
"rustc-hash",
"shlex",
"syn 1.0.109",
"which",
]
[[package]]
@ -966,7 +968,7 @@ checksum = "c688fc74432808e3eb684cae8830a86be1d66a2bd58e1f248ed0960a590baf6f"
dependencies = [
"glob",
"libc",
"libloading",
"libloading 0.7.4",
]
[[package]]
@ -1101,6 +1103,26 @@ version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "510ca239cf13b7f8d16a2b48f263de7b4f8c566f0af58d901031473c76afb1e3"
[[package]]
name = "console_error_panic_hook"
version = "0.1.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a06aeb73f470f66dcdbf7223caeebb85984942f22f1adb2a088cf9668146bbbc"
dependencies = [
"cfg-if",
"wasm-bindgen",
]
[[package]]
name = "console_log"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "be8aed40e4edbf4d3b4431ab260b63fdc40f5780a4766824329ea0f1eefe3c0f"
dependencies = [
"log",
"web-sys",
]
[[package]]
name = "const-oid"
version = "0.9.5"
@ -3207,6 +3229,15 @@ dependencies = [
"windows-sys 0.48.0",
]
[[package]]
name = "iter_tools"
version = "0.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "531cafdc99b3b3252bb32f5620e61d56b19415efc19900b12d1b2e7483854897"
dependencies = [
"itertools 0.10.5",
]
[[package]]
name = "itertools"
version = "0.10.5"
@ -3488,6 +3519,16 @@ dependencies = [
"winapi",
]
[[package]]
name = "libloading"
version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d580318f95776505201b28cf98eb1fa5e4be3b689633ba6a3e6cd880ff22d8cb"
dependencies = [
"cfg-if",
"windows-sys 0.48.0",
]
[[package]]
name = "libp2p"
version = "0.52.3"
@ -3961,6 +4002,12 @@ version = "0.7.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ed1202b2a6f884ae56f04cff409ab315c5ce26b5e58d7412e484f01fd52f52ef"
[[package]]
name = "maybe-owned"
version = "0.3.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4facc753ae494aeb6e3c22f839b158aebd4f9270f55cd3c79906c45476c47ab4"
[[package]]
name = "maybe-uninit"
version = "2.0.0"
@ -4988,6 +5035,33 @@ version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8835116a5c179084a830efb3adc117ab007512b535bc1a21c991d3b32a6b44dd"
[[package]]
name = "pdfium-render"
version = "0.8.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3f66fc726c464ae97b7bfec0f41b064d9d25669d5064623c57ac2c62e254617a"
dependencies = [
"bindgen",
"bitflags 2.4.0",
"bytemuck",
"bytes",
"chrono",
"console_error_panic_hook",
"console_log",
"image",
"iter_tools",
"js-sys",
"libloading 0.8.0",
"log",
"maybe-owned",
"once_cell",
"utf16string",
"vecmath",
"wasm-bindgen",
"wasm-bindgen-futures",
"web-sys",
]
[[package]]
name = "peeking_take_while"
version = "0.1.2"
@ -5200,6 +5274,12 @@ version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"
[[package]]
name = "piston-float"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ad78bf43dcf80e8f950c92b84f938a0fc7590b7f6866fbcbeca781609c115590"
[[package]]
name = "pkcs8"
version = "0.10.2"
@ -6734,8 +6814,11 @@ dependencies = [
"image",
"libheif-rs",
"libheif-sys",
"once_cell",
"pdfium-render",
"resvg",
"thiserror",
"tracing 0.2.0",
]
[[package]]
@ -8854,6 +8937,15 @@ version = "0.7.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9"
[[package]]
name = "utf16string"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0b62a1e85e12d5d712bf47a85f426b73d303e2d00a90de5f3004df3596e9d216"
dependencies = [
"byteorder",
]
[[package]]
name = "utf8parse"
version = "0.2.1"
@ -8882,6 +8974,15 @@ version = "0.2.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426"
[[package]]
name = "vecmath"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "956ae1e0d85bca567dee1dcf87fb1ca2e792792f66f87dced8381f99cd91156a"
dependencies = [
"piston-float",
]
[[package]]
name = "version-compare"
version = "0.0.11"
@ -9146,6 +9247,17 @@ version = "0.1.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9193164d4de03a926d909d3bc7c30543cecb35400c02114792c2cae20d5e2dbb"
[[package]]
name = "which"
version = "4.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2441c784c52b289a054b7201fc93253e288f094e2f4be9058343127c4226a269"
dependencies = [
"either",
"libc",
"once_cell",
]
[[package]]
name = "widestring"
version = "1.0.2"

View file

@ -337,11 +337,10 @@ impl Jobs {
.read()
.await
.values()
.filter_map(|worker| {
(!worker.is_paused()).then(|| {
let report = worker.report();
(report.get_meta().0, report)
})
.filter(|&worker| !worker.is_paused())
.map(|worker| {
let report = worker.report();
(report.get_meta().0, report)
})
.collect()
}

View file

@ -192,7 +192,7 @@ impl Library {
},
expires
.map(|e| vec![notification::expires_at::set(Some(e.fixed_offset()))])
.unwrap_or_else(Vec::new),
.unwrap_or_default(),
)
.exec()
.await

View file

@ -479,9 +479,9 @@ impl IndexerRule {
.await
.map(|results| {
results.into_iter().flatten().fold(
HashMap::with_capacity(RuleKind::variant_count()),
HashMap::<_, Vec<_>>::with_capacity(RuleKind::variant_count()),
|mut map, (kind, result)| {
map.entry(kind).or_insert_with(Vec::new).push(result);
map.entry(kind).or_default().push(result);
map
},
)

View file

@ -9,8 +9,8 @@ use crate::{
media::{
media_processor,
thumbnail::{
can_generate_thumbnail_for_image, generate_image_thumbnail, get_thumb_key,
get_thumbnail_path,
can_generate_thumbnail_for_document, can_generate_thumbnail_for_image,
generate_image_thumbnail, get_thumb_key, get_thumbnail_path,
},
MediaProcessorJobInit,
},
@ -27,7 +27,7 @@ use std::{
sync::Arc,
};
use sd_file_ext::extensions::ImageExtension;
use sd_file_ext::extensions::{DocumentExtension, ImageExtension};
use chrono::Utc;
use futures::future::TryFutureExt;
@ -928,6 +928,12 @@ pub(super) async fn generate_thumbnail(
error!("Failed to image thumbnail on location manager: {e:#?}");
}
}
} else if let Ok(extension) = DocumentExtension::from_str(extension) {
if can_generate_thumbnail_for_document(&extension) {
if let Err(e) = generate_image_thumbnail(path, &output_path).await {
error!("Failed to document thumbnail on location manager: {e:#?}");
}
}
}
#[cfg(feature = "ffmpeg")]

View file

@ -157,7 +157,10 @@ pub async fn walk(
.map(Into::into)
.unwrap_or(ObjectKind::Unknown);
let thumbnail_key = if matches!(kind, ObjectKind::Image | ObjectKind::Video) {
let thumbnail_key = if matches!(
kind,
ObjectKind::Image | ObjectKind::Video | ObjectKind::Document
) {
if let Ok(cas_id) = generate_cas_id(&entry_path, metadata.len())
.await
.map_err(|e| errors.push(NonIndexedLocationError::from((path, e)).into()))

View file

@ -229,7 +229,7 @@ async fn get_files_for_thumbnailer(
let image_thumb_files = get_all_children_files_by_extensions(
db,
parent_iso_file_path,
&thumbnail::FILTERED_IMAGE_EXTENSIONS,
&thumbnail::THUMBNAILABLE_EXTENSIONS,
)
.await?
.into_iter()
@ -241,7 +241,7 @@ async fn get_files_for_thumbnailer(
let video_files = get_all_children_files_by_extensions(
db,
parent_iso_file_path,
&thumbnail::FILTERED_VIDEO_EXTENSIONS,
&thumbnail::THUMBNAILABLE_VIDEO_EXTENSIONS,
)
.await?;

View file

@ -156,7 +156,7 @@ async fn get_files_for_thumbnailer(
let image_thumb_files = get_files_by_extensions(
db,
parent_iso_file_path,
&thumbnail::FILTERED_IMAGE_EXTENSIONS,
&thumbnail::THUMBNAILABLE_EXTENSIONS,
)
.await?
.into_iter()
@ -168,7 +168,7 @@ async fn get_files_for_thumbnailer(
let video_files = get_files_by_extensions(
db,
parent_iso_file_path,
&thumbnail::FILTERED_VIDEO_EXTENSIONS,
&thumbnail::THUMBNAILABLE_VIDEO_EXTENSIONS,
)
.await?;

View file

@ -8,7 +8,9 @@ use crate::{
Node,
};
use sd_file_ext::extensions::{Extension, ImageExtension, ALL_IMAGE_EXTENSIONS};
use sd_file_ext::extensions::{
DocumentExtension, Extension, ImageExtension, ALL_DOCUMENT_EXTENSIONS, ALL_IMAGE_EXTENSIONS,
};
use sd_images::format_image;
use sd_media_metadata::image::Orientation;
@ -26,7 +28,7 @@ use image::{self, imageops, DynamicImage, GenericImageView};
use once_cell::sync::Lazy;
use serde::{Deserialize, Serialize};
use thiserror::Error;
use tokio::{fs, io};
use tokio::{fs, io, task};
use tracing::{error, trace, warn};
use webp::Encoder;
@ -57,7 +59,7 @@ pub fn get_thumb_key(cas_id: &str) -> Vec<String> {
}
#[cfg(feature = "ffmpeg")]
pub(super) static FILTERED_VIDEO_EXTENSIONS: Lazy<Vec<Extension>> = Lazy::new(|| {
pub(super) static THUMBNAILABLE_VIDEO_EXTENSIONS: Lazy<Vec<Extension>> = Lazy::new(|| {
ALL_VIDEO_EXTENSIONS
.iter()
.cloned()
@ -66,12 +68,19 @@ pub(super) static FILTERED_VIDEO_EXTENSIONS: Lazy<Vec<Extension>> = Lazy::new(||
.collect()
});
pub(super) static FILTERED_IMAGE_EXTENSIONS: Lazy<Vec<Extension>> = Lazy::new(|| {
pub(super) static THUMBNAILABLE_EXTENSIONS: Lazy<Vec<Extension>> = Lazy::new(|| {
ALL_IMAGE_EXTENSIONS
.iter()
.cloned()
.filter(can_generate_thumbnail_for_image)
.map(Extension::Image)
.chain(
ALL_DOCUMENT_EXTENSIONS
.iter()
.cloned()
.filter(can_generate_thumbnail_for_document)
.map(Extension::Document),
)
.collect()
});
@ -88,6 +97,8 @@ pub enum ThumbnailerError {
Encoding,
#[error("error while converting the image: {0}")]
SdImages(#[from] sd_images::Error),
#[error("failed to execute converting task: {0}")]
Task(#[from] task::JoinError),
}
/// This is the target pixel count for all thumbnails to be resized to, and it is eventually downscaled
@ -125,7 +136,7 @@ pub async fn generate_image_thumbnail<P: AsRef<Path>>(
) -> Result<(), ThumbnailerError> {
let file_path = file_path.as_ref().to_path_buf();
let webp = tokio::task::block_in_place(move || -> Result<_, ThumbnailerError> {
let webp = task::spawn_blocking(move || -> Result<_, ThumbnailerError> {
let img = format_image(&file_path).map_err(|_| ThumbnailerError::Encoding)?;
let (w, h) = img.dimensions();
@ -154,7 +165,8 @@ pub async fn generate_image_thumbnail<P: AsRef<Path>>(
// this make us `deref` to have a `&[u8]` and then `to_owned` to make a Vec<u8>
// which implies on a unwanted clone...
Ok(encoder.encode(TARGET_QUALITY).deref().to_owned())
})?;
})
.await??;
let output_path = output_path.as_ref();
@ -200,6 +212,12 @@ pub const fn can_generate_thumbnail_for_image(image_extension: &ImageExtension)
)
}
pub const fn can_generate_thumbnail_for_document(document_extension: &DocumentExtension) -> bool {
use DocumentExtension::*;
matches!(document_extension, Pdf)
}
pub(super) async fn process(
entries: impl IntoIterator<Item = (&file_path_for_media_processor::Data, ThumbnailerEntryKind)>,
location_id: location::id::Type,

View file

@ -133,7 +133,7 @@ macro_rules! impl_stream {
s
.$streams_fn(bytes, &mut writer, aad)
.await
.map_or_else(Err, |_| Ok(writer.into_inner().into()))
.map_or_else(Err, |()| Ok(writer.into_inner().into()))
}
}

View file

@ -88,7 +88,7 @@ impl PasswordHasher {
argon2
.hash_password_into(password.expose(), &salt, &mut key)
.map_or(Err(Error::PasswordHash), |_| Ok(Key::new(key)))
.map_or(Err(Error::PasswordHash), |()| Ok(Key::new(key)))
}
#[allow(clippy::needless_pass_by_value)]
@ -110,7 +110,7 @@ impl PasswordHasher {
balloon
.hash_into(password.expose(), &salt, &mut key)
.map_or(Err(Error::PasswordHash), |_| Ok(Key::new(key)))
.map_or(Err(Error::PasswordHash), |()| Ok(Key::new(key)))
}
}

View file

@ -156,7 +156,7 @@ extension_category_enum! {
// document extensions
extension_category_enum! {
DocumentExtension _ALL_DOCUMENT_EXTENSIONS {
DocumentExtension ALL_DOCUMENT_EXTENSIONS {
Pdf = [0x25, 0x50, 0x44, 0x46, 0x2D],
Key = [0x50, 0x4B, 0x03, 0x04],
Pages = [0x50, 0x4B, 0x03, 0x04],

View file

@ -15,8 +15,11 @@ heif = ["dep:libheif-rs", "dep:libheif-sys"]
[dependencies]
image = "0.24.7"
thiserror = "1.0.48"
once_cell = "1.18.0"
tracing = { workspace = true }
resvg = "0.35.0"
# both of these added *default* bindgen features in 0.22.0 and 2.0.0+1.16.2 respectively
# this broke builds as we build our own liibheif, so i disabled their default features
libheif-rs = { version = "0.22.0", default-features = false, optional = true }
libheif-sys = { version = "2.0.0", default-features = false, optional = true }
pdfium-render = { version ="0.8.8", features = ["sync", "image", "thread_safe"] }

View file

@ -17,11 +17,14 @@ pub const SVG_EXTENSIONS: [&str; 2] = ["svg", "svgz"];
/// This value is in MiB.
pub const SVG_MAXIMUM_FILE_SIZE: u64 = MIB * 24;
/// The size that SVG images are rendered at, assuming they are square.
// TODO(brxken128): check for non-1:1 SVG images and create a function to resize
// them while maintaining the aspect ratio.
/// The size that SVG images are rendered at.
pub const SVG_RENDER_SIZE: u32 = 512;
pub const PDF_EXTENSION: &str = "pdf";
/// The size that PDF pages are rendered at.
pub const PDF_RENDER_SIZE: i32 = 1024;
/// The maximum file size that an image can be in order to have a thumbnail generated.
///
/// This value is in MiB.

View file

@ -4,10 +4,13 @@ pub type Result<T> = std::result::Result<T, Error>;
#[derive(thiserror::Error, Debug)]
pub enum Error {
#[error("error with pdfium: {0}")]
Pdfium(#[from] pdfium_render::prelude::PdfiumError),
#[error("failed to load pdfium library")]
PdfiumBinding,
#[cfg(feature = "heif")]
#[error("error with libheif: {0}")]
LibHeif(#[from] libheif_rs::HeifError),
#[error("error with usvg: {0}")]
USvg(#[from] resvg::usvg::Error),
#[error("failed to allocate `Pixbuf` while converting an SVG")]

View file

@ -2,6 +2,7 @@ use crate::{
consts,
error::{Error, Result},
generic::GenericHandler,
pdf::PdfHandler,
svg::SvgHandler,
ImageHandler,
};
@ -43,5 +44,9 @@ fn match_to_handler(ext: &OsStr) -> Box<dyn ImageHandler> {
handler = Box::new(SvgHandler {});
}
if ext == consts::PDF_EXTENSION {
handler = Box::new(PdfHandler {});
}
handler
}

View file

@ -4,10 +4,13 @@ pub use crate::error::{Error, Result};
use crate::ImageHandler;
use image::DynamicImage;
use libheif_rs::{ColorSpace, HeifContext, LibHeif, RgbChroma};
use once_cell::sync::Lazy;
use std::io::{Cursor, SeekFrom};
use std::io::{Read, Seek};
use std::path::Path;
static HEIF: Lazy<LibHeif> = Lazy::new(LibHeif::new);
pub struct HeifHandler {}
impl ImageHandler for HeifHandler {
@ -29,7 +32,7 @@ impl ImageHandler for HeifHandler {
let img = {
let data = self.get_data(path)?;
let handle = HeifContext::read_from_bytes(&data)?.primary_image_handle()?;
LibHeif::new().decode(&handle, ColorSpace::Rgb(RgbChroma::Rgb), None)
HEIF.decode(&handle, ColorSpace::Rgb(RgbChroma::Rgb), None)
}?;
let planes = img.planes();

View file

@ -26,6 +26,7 @@ mod formatter;
mod generic;
#[cfg(feature = "heif")]
mod heif;
mod pdf;
mod svg;
pub use error::{Error, Result};

88
crates/images/src/pdf.rs Normal file
View file

@ -0,0 +1,88 @@
use std::{
borrow::ToOwned,
env::current_exe,
path::{Path, PathBuf},
};
use crate::{consts::PDF_RENDER_SIZE, Error::PdfiumBinding, ImageHandler, Result};
use image::DynamicImage;
use once_cell::sync::Lazy;
use pdfium_render::prelude::{PdfPageRenderRotation, PdfRenderConfig, Pdfium};
use tracing::error;
// This path must be relative to the running binary
#[cfg(windows)]
const BINDING_LOCATION: &str = ".";
#[cfg(unix)]
const BINDING_LOCATION: &str = if cfg!(target_os = "macos") {
"../Frameworks/FFMpeg.framework/Libraries"
} else {
"../lib/spacedrive"
};
static PDFIUM: Lazy<Option<Pdfium>> = Lazy::new(|| {
let lib_name = Pdfium::pdfium_platform_library_name();
let lib_path = current_exe()
.ok()
.and_then(|exe_path| {
exe_path.parent().and_then(|parent_path| {
match parent_path
.join(BINDING_LOCATION)
.join(&lib_name)
.canonicalize()
{
Ok(lib_path) => lib_path.to_str().map(ToOwned::to_owned),
Err(err) => {
error!("{err:#?}");
None
}
}
})
})
.unwrap_or_else(|| {
#[allow(clippy::expect_used)]
PathBuf::from(BINDING_LOCATION)
.join(&lib_name)
.to_str()
.expect("We are converting valid strs to PathBuf then back, it should not fail")
.to_owned()
});
Pdfium::bind_to_library(lib_path)
.or_else(|err| {
error!("{err:#?}");
Pdfium::bind_to_system_library()
})
.map(Pdfium::new)
.map_err(|err| error!("{err:#?}"))
.ok()
});
pub struct PdfHandler {}
impl ImageHandler for PdfHandler {
fn maximum_size(&self) -> u64 {
// Pdfium will only load the portions of the document it actually needs into memory.
u64::MAX
}
fn validate_image(&self, _bits_per_pixel: u8, _length: usize) -> Result<()> {
Ok(())
}
fn handle_image(&self, path: &Path) -> Result<DynamicImage> {
let pdfium = PDFIUM.as_ref().ok_or(PdfiumBinding)?;
let render_config = PdfRenderConfig::new()
.set_target_width(PDF_RENDER_SIZE)
.set_maximum_height(PDF_RENDER_SIZE)
.rotate_if_landscape(PdfPageRenderRotation::Degrees90, true);
Ok(pdfium
.load_pdf_from_file(path, None)?
.pages()
.first()?
.render_with_config(&render_config)?
.as_image())
}
}

View file

@ -31,6 +31,7 @@ import {
FilePath,
FilePathWithObject,
getExplorerItemData,
getItemFilePath,
NonIndexedPathItem,
Object,
ObjectKindEnum,
@ -136,7 +137,7 @@ const Thumbnails = ({ items }: { items: ExplorerItem[] }) => {
<FileThumb
key={uniqueId(item)}
data={item}
loadOriginal
loadOriginal={getItemFilePath(item)?.extension !== 'pdf'}
frame
blackBars={thumbs.length === 1}
blackBarsSize={16}