added unique file identifier

This commit is contained in:
Jamie Pine 2022-04-23 01:12:35 -07:00
parent f2e1f72811
commit 40cc780ec1
14 changed files with 207 additions and 143 deletions

75
Cargo.lock generated
View file

@ -534,12 +534,6 @@ version = "1.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
[[package]]
name = "bitstream-io"
version = "1.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3a429905f63bae528a4afe5e7520089139a7694e910f9a12e89010d738b9cca2"
[[package]]
name = "bitvec"
version = "0.22.3"
@ -1429,19 +1423,7 @@ dependencies = [
name = "debug"
version = "0.1.0"
dependencies = [
"anyhow",
"chrono",
"data-encoding",
"ffmpeg-next",
"kamadak-exif",
"matroska",
"mime",
"ring 0.16.20",
"sdcore",
"sha256",
"thumbnailer",
"tokio",
"uhlc",
]
[[package]]
@ -3188,15 +3170,6 @@ dependencies = [
"treediff",
]
[[package]]
name = "kamadak-exif"
version = "0.5.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "70494964492bf8e491eb3951c5d70c9627eb7100ede6cc56d748b9a3f302cfb6"
dependencies = [
"mutate_once",
]
[[package]]
name = "keccak"
version = "0.1.0"
@ -4067,17 +4040,6 @@ version = "0.1.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a3e378b66a060d48947b590737b30a1be76706c8dd7b8ba0f2fe3989c68a853f"
[[package]]
name = "matroska"
version = "0.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ee093e2ef3d0e7a8062daacfb315c8adf19b7dc96fd4bd33a9dff4c9b7e35d77"
dependencies = [
"bitstream-io",
"phf 0.10.1",
"time 0.3.9",
]
[[package]]
name = "maybe-uninit"
version = "2.0.0"
@ -4420,12 +4382,6 @@ dependencies = [
"unsigned-varint",
]
[[package]]
name = "mutate_once"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "16cf681a23b4d0a43fc35024c176437f9dcd818db34e0f42ab456a0ee5ad497b"
[[package]]
name = "mysql_async"
version = "0.29.0"
@ -6756,16 +6712,6 @@ dependencies = [
"digest 0.10.3",
]
[[package]]
name = "sha256"
version = "1.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4e84a7f596c081d359de5e06a83877138bc3c4483591e1af1916e1472e6e146e"
dependencies = [
"hex",
"sha2 0.9.9",
]
[[package]]
name = "sha3"
version = "0.9.1"
@ -7547,20 +7493,6 @@ dependencies = [
"num_cpus",
]
[[package]]
name = "thumbnailer"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "16c652b0061a3f3500284063fab527dc5e2dbd015905fcd08c5bd471c52a338b"
dependencies = [
"image",
"lazy_static",
"mime",
"rayon",
"tempfile",
"webp",
]
[[package]]
name = "tiberius"
version = "0.7.3"
@ -7623,15 +7555,8 @@ checksum = "c2702e08a7a860f005826c6815dcac101b19b5eb330c27fe4a5928fec1d20ddd"
dependencies = [
"libc",
"num_threads",
"time-macros",
]
[[package]]
name = "time-macros"
version = "0.2.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "42657b1a6f4d817cda8e7a0ace261fe0cc946cf3a80314390b22cc61ae080792"
[[package]]
name = "tinyvec"
version = "1.5.1"

View file

@ -6,16 +6,16 @@ edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
anyhow = "1.0.56"
data-encoding = "2.3.2"
kamadak-exif = "0.5.4"
ring = "0.16.20"
thumbnailer = "0.4.0"
mime = "0.3.16"
sha256 = "1.0.3"
ffmpeg-next = "5.0.3"
sdcore = { path = "../../core" }
uhlc = "0.4.1"
matroska = "0.11.0"
# anyhow = "1.0.56"
# data-encoding = "2.3.2"
# kamadak-exif = "0.5.4"
# ring = "0.16.20"
# thumbnailer = "0.4.0"
# mime = "0.3.16"
# sha256 = "1.0.3"
# ffmpeg-next = "5.0.3"
# sdcore = { path = "../../core" }
# uhlc = "0.4.1"
# matroska = "0.11.0"
tokio = { version = "1.17.0", features = ["sync", "rt"] }
chrono = "0.4.19"
# chrono = "0.4.19"

View file

@ -2,5 +2,10 @@
// use sdcore::{prisma, sync::engine::test, sync::FakeCoreContext};
#[tokio::main]
async fn main() {}
use std::fs::File;
fn main() {
let file = File::open("/Users/james/Desktop/Cloud/preview.mp4").unwrap();
println!("{:?}", file.metadata().unwrap())
}

View file

@ -49,7 +49,7 @@ function App() {
The file explorer from the future
</h1>
<p className="max-w-3xl mt-1 mb-8 text-lg text-center text-gray-450">
Spacedrive allows you to manage files across all devices, drives and clouds at once.
Manage files across all devices, drives and clouds from one place.
<br />
Designed for creators, hoarders and the painfully disorganized.
</p>

View file

@ -19,5 +19,7 @@ async fn main() {
tokio::spawn(async move {
core.start().await;
});
})
.await
.unwrap();
}

View file

@ -0,0 +1,32 @@
/*
Warnings:
- You are about to drop the column `streams_json` on the `media_data` table. All the data in the column will be lost.
- A unique constraint covering the columns `[cas_id]` on the table `files` will be added. If there are existing duplicate values, this will fail.
*/
-- RedefineTables
PRAGMA foreign_keys=OFF;
CREATE TABLE "new_media_data" (
"id" INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,
"pixel_width" INTEGER,
"pixel_height" INTEGER,
"longitude" REAL,
"latitude" REAL,
"fps" INTEGER,
"capture_device_make" TEXT,
"capture_device_model" TEXT,
"capture_device_software" TEXT,
"duration_seconds" INTEGER,
"codecs" TEXT,
"streams" INTEGER,
CONSTRAINT "media_data_id_fkey" FOREIGN KEY ("id") REFERENCES "files" ("id") ON DELETE CASCADE ON UPDATE CASCADE
);
INSERT INTO "new_media_data" ("capture_device_make", "capture_device_model", "capture_device_software", "codecs", "duration_seconds", "fps", "id", "latitude", "longitude", "pixel_height", "pixel_width") SELECT "capture_device_make", "capture_device_model", "capture_device_software", "codecs", "duration_seconds", "fps", "id", "latitude", "longitude", "pixel_height", "pixel_width" FROM "media_data";
DROP TABLE "media_data";
ALTER TABLE "new_media_data" RENAME TO "media_data";
PRAGMA foreign_key_check;
PRAGMA foreign_keys=ON;
-- CreateIndex
CREATE UNIQUE INDEX "files_cas_id_key" ON "files"("cas_id");

View file

@ -93,7 +93,7 @@ model File {
id Int @id @default(autoincrement())
// content addressable storage id - sha256
// this does not need to be unique, as incoming replicas will always ignore if at least one exists
cas_id String
cas_id String @unique
// full byte contents digested into sha256 checksum
integrity_checksum String? @unique
// basic metadata

View file

@ -1,7 +1,7 @@
extern crate ffmpeg_next as ffmpeg;
use chrono::NaiveDateTime;
use ffmpeg::{dictionary::Iter, format};
use std::{env, ffi::OsStr, fs, path::Path};
use std::{ffi::OsStr, path::Path};
#[derive(Default, Debug)]
pub struct MediaItem {

View file

@ -0,0 +1,127 @@
use crate::job::jobs::JobReportUpdate;
use crate::{
file::FileError,
job::{jobs::Job, worker::WorkerContext},
prisma::{self, file_path},
CoreContext,
};
use anyhow::Result;
use futures::executor::block_on;
use serde::{Deserialize, Serialize};
#[derive(Deserialize, Serialize, Debug)]
pub struct FileCreated {
pub id: i32,
pub cas_id: String,
}
#[derive(Debug)]
pub struct FileIdentifierJob;
#[async_trait::async_trait]
impl Job for FileIdentifierJob {
async fn run(&self, ctx: WorkerContext) -> Result<()> {
let total_count = count_orphan_file_paths(&ctx.core_ctx).await?;
println!("Found {} orphan file paths", total_count);
let task_count = (total_count as f64 / 100f64).ceil() as usize;
println!("Will process {} tasks", task_count);
// update job with total task count based on orphan file_paths count
ctx.progress(vec![JobReportUpdate::TaskCount(task_count)]);
let db = ctx.core_ctx.database.clone();
let ctx = tokio::task::spawn_blocking(move || {
let mut completed: usize = 0;
while completed < task_count {
let file_paths = block_on(get_orphan_file_paths(&ctx.core_ctx, completed * 100)).unwrap();
println!("Processing: {:?}", file_paths);
let mut rows: Vec<String> = Vec::new();
for file_path in file_paths.iter() {
if file_path.temp_cas_id.is_none() {
continue;
}
rows.push(prepare_file_values(file_path));
}
if rows.len() == 0 {
break;
}
let insert_files = format!(
r#"INSERT INTO files (cas_id, size_in_bytes) VALUES {} ON CONFLICT (cas_id) DO NOTHING RETURNING id, cas_id"#,
rows.join(", ")
);
println!("{}", insert_files);
let files: Vec<FileCreated> = block_on(db._query_raw(&insert_files)).unwrap();
println!("FILES: {:?}", files);
for file in files.iter() {
let update_file_path = format!(
r#"UPDATE file_paths SET file_id = "{}" WHERE temp_cas_id = "{}""#,
file.id, file.cas_id
);
println!("UPDATING PATH: {}", update_file_path);
block_on(db._execute_raw(&update_file_path)).unwrap();
}
completed += 1;
println!("completed: {}", completed);
ctx.progress(vec![JobReportUpdate::CompletedTaskCount(completed)]);
}
ctx
}).await?;
let remaining = count_orphan_file_paths(&ctx.core_ctx).await?;
if remaining > 0 {
ctx.core_ctx.spawn_job(Box::new(FileIdentifierJob));
}
Ok(())
}
}
#[derive(Deserialize, Serialize, Debug)]
struct CountRes {
count: Option<usize>,
}
pub async fn count_orphan_file_paths(ctx: &CoreContext) -> Result<usize, FileError> {
let db = &ctx.database;
let files_count = db
._query_raw::<CountRes>(
r#"SELECT COUNT(*) AS count FROM file_paths WHERE file_id IS NULL AND is_dir IS FALSE"#,
)
.await?;
Ok(files_count[0].count.unwrap_or(0))
}
pub async fn get_orphan_file_paths(
ctx: &CoreContext,
offset: usize,
) -> Result<Vec<file_path::Data>, FileError> {
let db = &ctx.database;
println!("offset: {}", offset);
let files = db
.file_path()
.find_many(vec![
file_path::file_id::equals(None),
file_path::is_dir::equals(false),
])
.skip(offset)
.take(100)
.exec()
.await?;
Ok(files)
}
pub fn prepare_file_values(file_path: &file_path::Data) -> String {
format!(
"(\"{}\",\"{}\")",
file_path.temp_cas_id.as_ref().unwrap(),
"0"
)
}

View file

@ -1 +1,2 @@
pub mod checksum;
pub mod identifier;

View file

@ -1,3 +1,4 @@
use crate::file::cas::identifier::FileIdentifierJob;
use job::jobs::{Job, JobReport, Jobs};
use log::{error, info};
use prisma::PrismaClient;
@ -269,6 +270,10 @@ impl Core {
fs::remove_file(Path::new(&self.state.data_path).join("library.db")).unwrap();
CoreResponse::Success(())
}
ClientCommand::IdentifyUniqueFiles => {
ctx.spawn_job(Box::new(FileIdentifierJob));
CoreResponse::Success(())
}
})
}
@ -328,6 +333,7 @@ pub enum ClientCommand {
SysVolumeUnmount { id: i32 },
GenerateThumbsForLocation { id: i32, path: String },
PurgeDatabase,
IdentifyUniqueFiles,
}
// represents an event this library can emit

View file

@ -1,5 +1,3 @@
## What is it?
Spacedrive is a cross platform app that allows you to manage files across all devices, drives and clouds at once. Check out the [readme](https://github.com/spacedriveapp) for more detailed info.

View file

@ -2,60 +2,24 @@ import { useBridgeQuery } from '@sd/client';
import React from 'react';
import ReactJson from 'react-json-view';
import FileItem from '../components/file/FileItem';
import CodeBlock from '../components/primitive/Codeblock';
import { Tag } from '../components/primitive/Tag';
export const SpacesScreen: React.FC<{}> = (props) => {
const { data: client } = useBridgeQuery('ClientGetState');
const { data: jobs } = useBridgeQuery('JobGetRunning');
const { data: jobHistory } = useBridgeQuery('JobGetHistory');
return (
<div className="flex flex-col items-center justify-center w-full h-screen px-2 py-5">
<div className="mt-2 mb-24 select-text">
<p className="mb-2 font-medium text-md text-gray-250">Rust level client state:</p>
<ReactJson
// collapsed
enableClipboard={false}
displayDataTypes={false}
theme="ocean"
src={{ ...client }}
style={{
padding: 20,
borderRadius: 5,
backgroundColor: '#101016',
border: 1,
borderColor: '#1E1E27',
borderStyle: 'solid'
}}
/>
<div className="flex flex-col w-full h-screen p-5 overflow-x-scroll">
<div className="flex flex-col space-y-5 pb-7">
<h1 className="text-lg font-bold ">Developer Debugger</h1>
<h1 className="text-sm font-bold ">Running Jobs</h1>
<CodeBlock src={{ ...jobs }} />
<h1 className="text-sm font-bold ">Job History</h1>
<CodeBlock src={{ ...jobHistory }} />
<h1 className="text-sm font-bold ">Client State</h1>
<CodeBlock src={{ ...client }} />
</div>
{/*<div className="-mt-[1px] space-x-2 ml-1">*/}
{/* <Tag color="red">Videos</Tag>*/}
{/* <Tag color="orange">DSLR Photos</Tag>*/}
{/* <Tag color="yellow">Camera Roll</Tag>*/}
{/* <Tag color="green">NFTs</Tag>*/}
{/* <Tag color="pink">Screenshots</Tag>*/}
{/* <Tag color="blue">Documents</Tag>*/}
{/* <Tag color="purple">Repositories</Tag>*/}
{/*</div>*/}
{/*<div className="flex flex-wrap p-2 my-3 space-x-2 bg-black rounded">*/}
{/* <div className="w-10 h-10 rounded bg-gray-950" />*/}
{/* <div className="w-10 h-10 bg-gray-900 rounded" />*/}
{/* <div className="w-10 h-10 rounded bg-gray-850" />*/}
{/* <div className="w-10 h-10 bg-gray-800 rounded" />*/}
{/* <div className="w-10 h-10 rounded bg-gray-750" />*/}
{/* <div className="w-10 h-10 bg-gray-700 rounded" />*/}
{/* <div className="w-10 h-10 rounded bg-gray-650" />*/}
{/* <div className="w-10 h-10 bg-gray-600 rounded" />*/}
{/* <div className="w-10 h-10 rounded bg-gray-550" />*/}
{/* <div className="w-10 h-10 bg-gray-400 rounded" />*/}
{/* <div className="w-10 h-10 rounded bg-gray-450" />*/}
{/* <div className="w-10 h-10 bg-gray-400 rounded" />*/}
{/* <div className="w-10 h-10 rounded bg-gray-350" />*/}
{/* <div className="w-10 h-10 bg-gray-300 rounded" />*/}
{/* <div className="w-10 h-10 rounded bg-gray-250" />*/}
{/* /!* <div className="w-10 h-10 bg-gray-200 rounded" />*/}
{/* <div className="w-10 h-10 rounded bg-gray-150" />*/}
{/* <div className="w-10 h-10 bg-gray-100 rounded" />*/}
{/* <div className="w-10 h-10 rounded bg-gray-50" /> *!/*/}
{/*</div>*/}
</div>
);
};

View file

@ -18,6 +18,7 @@ export default function GeneralSettings() {
alert('Database purged');
}
});
const { mutate: identifyFiles } = useBridgeCommand('IdentifyUniqueFiles');
return (
<div className="flex flex-col flex-grow max-w-4xl space-y-4">
@ -37,6 +38,9 @@ export default function GeneralSettings() {
<Button className="w-40" variant="gray" size="sm" onClick={() => purgeDB(undefined)}>
Purge database
</Button>
<Button className="w-40" variant="gray" size="sm" onClick={() => identifyFiles(undefined)}>
Identify unique files
</Button>
</div>
{/* <InputContainer
title="Test scan directory"