mirror of
https://github.com/spacedriveapp/spacedrive
synced 2024-07-14 01:54:04 +00:00
Fix cas_id sample hashing logic (#672)
This commit is contained in:
parent
ce9be10cdb
commit
b711fe8b27
|
@ -15,9 +15,10 @@ const HEADER_OR_FOOTER_SIZE: u64 = 1024 * 8;
|
|||
const MINIMUM_FILE_SIZE: u64 = 1024 * 100;
|
||||
|
||||
// Asserting that nobody messed up our consts
|
||||
const_assert!(
|
||||
HEADER_OR_FOOTER_SIZE + SAMPLE_COUNT * SAMPLE_SIZE + HEADER_OR_FOOTER_SIZE < MINIMUM_FILE_SIZE
|
||||
);
|
||||
const_assert!((HEADER_OR_FOOTER_SIZE * 2 + SAMPLE_COUNT * SAMPLE_SIZE) < MINIMUM_FILE_SIZE);
|
||||
|
||||
// Asserting that the sample size is larger than header/footer size, as the same buffer is used for both
|
||||
const_assert!(SAMPLE_SIZE > HEADER_OR_FOOTER_SIZE);
|
||||
|
||||
pub async fn generate_cas_id(path: impl AsRef<Path>, size: u64) -> Result<String, io::Error> {
|
||||
let mut hasher = Hasher::new();
|
||||
|
@ -25,26 +26,28 @@ pub async fn generate_cas_id(path: impl AsRef<Path>, size: u64) -> Result<String
|
|||
|
||||
if size <= MINIMUM_FILE_SIZE {
|
||||
// For small files, we hash the whole file
|
||||
fs::read(path).await.map(|buf| {
|
||||
hasher.update(&buf);
|
||||
})?;
|
||||
hasher.update(&fs::read(path).await?);
|
||||
} else {
|
||||
let mut file = File::open(path).await?;
|
||||
let mut buf = vec![0; SAMPLE_SIZE as usize].into_boxed_slice();
|
||||
|
||||
// Hashing the header
|
||||
file.read_exact(&mut buf[..HEADER_OR_FOOTER_SIZE as usize])
|
||||
.await?;
|
||||
hasher.update(&buf);
|
||||
let mut current_pos = file
|
||||
.read_exact(&mut buf[..HEADER_OR_FOOTER_SIZE as usize])
|
||||
.await? as u64;
|
||||
hasher.update(&buf[..HEADER_OR_FOOTER_SIZE as usize]);
|
||||
|
||||
// Sample hashing the inner content of the file
|
||||
for _ in 0..SAMPLE_COUNT {
|
||||
file.seek(SeekFrom::Current(
|
||||
((size - HEADER_OR_FOOTER_SIZE * 2) / SAMPLE_COUNT) as i64,
|
||||
))
|
||||
.await?;
|
||||
let seek_jump = (size - HEADER_OR_FOOTER_SIZE * 2) / SAMPLE_COUNT;
|
||||
loop {
|
||||
file.read_exact(&mut buf).await?;
|
||||
hasher.update(&buf);
|
||||
|
||||
if current_pos >= (HEADER_OR_FOOTER_SIZE + seek_jump * (SAMPLE_COUNT - 1)) {
|
||||
break;
|
||||
}
|
||||
|
||||
current_pos = file.seek(SeekFrom::Start(current_pos + seek_jump)).await?;
|
||||
}
|
||||
|
||||
// Hashing the footer
|
||||
|
@ -52,7 +55,7 @@ pub async fn generate_cas_id(path: impl AsRef<Path>, size: u64) -> Result<String
|
|||
.await?;
|
||||
file.read_exact(&mut buf[..HEADER_OR_FOOTER_SIZE as usize])
|
||||
.await?;
|
||||
hasher.update(&buf);
|
||||
hasher.update(&buf[..HEADER_OR_FOOTER_SIZE as usize]);
|
||||
}
|
||||
|
||||
Ok(hasher.finalize().to_hex()[..16].to_string())
|
||||
|
|
Loading…
Reference in a new issue