CAS ID Improvements (#413)

* remove `ring` dependency and use `sha2` instead

* use BLAKE3 and include full file checksum

* update schema comments
This commit is contained in:
jake 2022-10-13 23:31:47 +01:00 committed by GitHub
parent b5c571541e
commit 4cd2dde35c
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 70 additions and 59 deletions

68
Cargo.lock generated
View file

@ -125,6 +125,18 @@ dependencies = [
"password-hash",
]
[[package]]
name = "arrayref"
version = "0.3.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a4c527152e37cf757a3f78aae5a06fbeefdb07ccc535c980a3208ee3060dd544"
[[package]]
name = "arrayvec"
version = "0.7.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8da52d66c7071e2e3fa2a1e5c6d088fec47b593032b254f5e980de8ea54454d6"
[[package]]
name = "ascii"
version = "0.9.3"
@ -493,6 +505,20 @@ dependencies = [
"digest 0.10.5",
]
[[package]]
name = "blake3"
version = "1.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a08e53fc5a564bb15bfe6fae56bd71522205f1f91893f9c0116edad6496c183f"
dependencies = [
"arrayref",
"arrayvec",
"cc",
"cfg-if 1.0.0",
"constant_time_eq",
"digest 0.10.5",
]
[[package]]
name = "block"
version = "0.1.6"
@ -861,6 +887,12 @@ dependencies = [
"wasm-bindgen",
]
[[package]]
name = "constant_time_eq"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "245097e9a4535ee1e3e3931fcfcd55a796a44c643e8596ff6566d68f09b87bbc"
[[package]]
name = "convert_case"
version = "0.4.0"
@ -1200,12 +1232,6 @@ dependencies = [
"parking_lot_core 0.9.3",
]
[[package]]
name = "data-encoding"
version = "2.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3ee2393c4a91429dffb4bedf19f4d6abf27d8a732c8ce4980305d782e5426d57"
[[package]]
name = "datamodel"
version = "0.1.0"
@ -4394,7 +4420,7 @@ dependencies = [
"bytes",
"fxhash",
"rand 0.8.5",
"ring 0.16.20",
"ring",
"rustls",
"rustls-native-certs",
"rustls-pemfile 0.2.1",
@ -4696,7 +4722,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6413f3de1edee53342e6138e75b56d32e7bc6e332b3bd62d497b1929d4cfbcdd"
dependencies = [
"pem",
"ring 0.16.20",
"ring",
"time 0.3.15",
"yasna",
]
@ -4869,21 +4895,6 @@ dependencies = [
"winapi",
]
[[package]]
name = "ring"
version = "0.17.0-alpha.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4575a179070909595bea5f999d67934737c2e0757a1eb9839af555917817b257"
dependencies = [
"cc",
"libc",
"once_cell",
"spin 0.5.2",
"untrusted",
"web-sys",
"winapi",
]
[[package]]
name = "rmp"
version = "0.8.11"
@ -4991,7 +5002,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5aab8ee6c7097ed6057f43c187a62418d0c05a4bd5f18b3571db50ee0f9ce033"
dependencies = [
"log",
"ring 0.16.20",
"ring",
"sct",
"webpki",
]
@ -5126,7 +5137,7 @@ version = "0.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d53dcdb7c9f8158937a7981b48accfd39a43af418591a5d008c7b22b5e1b7ca4"
dependencies = [
"ring 0.16.20",
"ring",
"untrusted",
]
@ -5137,9 +5148,9 @@ dependencies = [
"async-stream",
"async-trait",
"base64 0.13.0",
"blake3",
"chrono",
"ctor",
"data-encoding",
"enumflags2",
"ffmpeg-next",
"fs_extra",
@ -5153,7 +5164,6 @@ dependencies = [
"once_cell",
"openssl-sys",
"prisma-client-rust",
"ring 0.17.0-alpha.11",
"rmp",
"rmp-serde",
"rspc",
@ -5265,7 +5275,7 @@ name = "sd-tunnel-utils"
version = "0.1.0"
dependencies = [
"quinn",
"ring 0.16.20",
"ring",
"rmp",
"rmp-serde",
"rustls",
@ -7076,7 +7086,7 @@ version = "0.22.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f095d78192e208183081cc07bc5515ef55216397af48b873e5edcd72637fa1bd"
dependencies = [
"ring 0.16.20",
"ring",
"untrusted",
]

View file

@ -28,11 +28,10 @@ serde = { version = "1.0", features = ["derive"] }
chrono = { version = "0.4.22", features = ["serde"] }
serde_json = "1.0"
futures = "0.3"
data-encoding = "2.3.2"
ring = "0.17.0-alpha.11"
int-enum = "0.4.0"
rmp = "^0.8.11"
rmp-serde = "^1.1.1"
blake3 = "1.3.1"
# Project dependencies
rspc = { workspace = true, features = ["uuid", "chrono", "tracing"] }

View file

@ -98,9 +98,9 @@ model Location {
model Object {
id Int @id @default(autoincrement())
// content addressable storage id - sha256 sampled checksum
// content addressable storage id - blake3 sampled checksum
cas_id String @unique
// full byte contents digested into sha256 checksum
// full byte contents digested into blake3 checksum
integrity_checksum String? @unique
// basic metadata
name String?

View file

@ -1,5 +1,4 @@
use data_encoding::HEXLOWER;
use ring::digest::{Context, SHA256};
use blake3::Hasher;
use std::path::PathBuf;
use tokio::{
fs::File,
@ -18,51 +17,54 @@ async fn read_at(file: &mut File, offset: u64, size: u64) -> Result<Vec<u8>, io:
Ok(buf)
}
fn to_hex_string(b: &[u8]) -> String {
b.iter().map(|c| format!("{:02x}", c)).collect::<String>()
}
pub async fn generate_cas_id(path: PathBuf, size: u64) -> Result<String, io::Error> {
// open file reference
let mut file = File::open(path).await?;
let mut context = Context::new(&SHA256);
let mut hasher = Hasher::new();
// include the file size in the checksum
context.update(&size.to_le_bytes());
hasher.update(&size.to_le_bytes());
// if size is small enough, just read the whole thing
if SAMPLE_COUNT * SAMPLE_SIZE > size {
let buf = read_at(&mut file, 0, size).await?;
context.update(&buf);
hasher.update(&buf);
} else {
// loop over samples
for i in 0..SAMPLE_COUNT {
let buf = read_at(&mut file, (size / SAMPLE_COUNT) * i, SAMPLE_SIZE).await?;
context.update(&buf);
hasher.update(&buf);
}
// sample end of file
let buf = read_at(&mut file, size - SAMPLE_SIZE, SAMPLE_SIZE).await?;
context.update(&buf);
hasher.update(&buf);
}
let digest = context.finish();
let hex = HEXLOWER.encode(digest.as_ref());
let hex = to_hex_string(hasher.finalize().as_bytes());
Ok(hex)
}
// pub fn full_checksum(path: &str) -> Result<String> {
// // read file as buffer and convert to digest
// let mut reader = BufReader::new(File::open(path).unwrap());
// let mut context = Context::new(&SHA256);
// let mut buffer = [0; 1024];
// loop {
// let count = reader.read(&mut buffer)?;
// if count == 0 {
// break;
// }
// context.update(&buffer[..count]);
// }
// let digest = context.finish();
// // create a lowercase hash from
// let hex = HEXLOWER.encode(digest.as_ref());
pub async fn full_checksum(path: &str) -> Result<String, io::Error> {
const BLOCK_SIZE: usize = 1048576;
//read file as buffer and convert to digest
let mut reader = File::open(path).await?;
let mut context = Hasher::new();
let mut buffer = [0; 1048576];
loop {
let read_count = reader.read(&mut buffer).await?;
context.update(&buffer[..read_count]);
if read_count != BLOCK_SIZE {
break;
}
}
let hex = to_hex_string(context.finalize().as_bytes());
// Ok(hex)
// }
Ok(hex)
}