mirror of
https://github.com/spacedriveapp/spacedrive
synced 2024-07-07 04:23:29 +00:00
WOAH job progress in realtime... in the UI! worked first time too but that was down to Brendan
Co-authored-by: Brendan Allan <brendonovich@outlook.com>
This commit is contained in:
parent
80369e005c
commit
9fd8997fd1
|
@ -12,7 +12,7 @@ use std::{
|
||||||
};
|
};
|
||||||
use walkdir::{DirEntry, WalkDir};
|
use walkdir::{DirEntry, WalkDir};
|
||||||
|
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug)]
|
||||||
pub struct IndexerJob {
|
pub struct IndexerJob {
|
||||||
pub path: String,
|
pub path: String,
|
||||||
}
|
}
|
||||||
|
@ -51,6 +51,7 @@ pub async fn scan_path(
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
println!("Scanning directory: {}", &path);
|
println!("Scanning directory: {}", &path);
|
||||||
let db = &ctx.database;
|
let db = &ctx.database;
|
||||||
|
let path = path.to_string();
|
||||||
|
|
||||||
let location = create_location(&ctx, &path).await?;
|
let location = create_location(&ctx, &path).await?;
|
||||||
|
|
||||||
|
@ -62,7 +63,7 @@ pub async fn scan_path(
|
||||||
id: Option<i64>,
|
id: Option<i64>,
|
||||||
}
|
}
|
||||||
// grab the next id so we can increment in memory for batch inserting
|
// grab the next id so we can increment in memory for batch inserting
|
||||||
let mut next_file_id = match db
|
let first_file_id = match db
|
||||||
._query_raw::<QueryRes>(r#"SELECT MAX(id) id FROM file_paths"#)
|
._query_raw::<QueryRes>(r#"SELECT MAX(id) id FROM file_paths"#)
|
||||||
.await
|
.await
|
||||||
{
|
{
|
||||||
|
@ -70,59 +71,67 @@ pub async fn scan_path(
|
||||||
Err(e) => Err(anyhow!("Error querying for next file id: {}", e))?,
|
Err(e) => Err(anyhow!("Error querying for next file id: {}", e))?,
|
||||||
};
|
};
|
||||||
|
|
||||||
let mut get_id = || {
|
|
||||||
next_file_id += 1;
|
|
||||||
next_file_id
|
|
||||||
};
|
|
||||||
|
|
||||||
//check is path is a directory
|
//check is path is a directory
|
||||||
if !PathBuf::from(path).is_dir() {
|
if !PathBuf::from(&path).is_dir() {
|
||||||
return Err(anyhow::anyhow!("{} is not a directory", path));
|
return Err(anyhow::anyhow!("{} is not a directory", &path));
|
||||||
}
|
}
|
||||||
|
let dir_path = path.clone();
|
||||||
|
|
||||||
// store every valid path discovered
|
let (paths, scan_start) = tokio::task::spawn_blocking(move || {
|
||||||
let mut paths: Vec<(PathBuf, i64, Option<i64>)> = Vec::new();
|
// store every valid path discovered
|
||||||
// store a hashmap of directories to their file ids for fast lookup
|
let mut paths: Vec<(PathBuf, i64, Option<i64>)> = Vec::new();
|
||||||
let mut dirs: HashMap<String, i64> = HashMap::new();
|
// store a hashmap of directories to their file ids for fast lookup
|
||||||
// begin timer for logging purposes
|
let mut dirs: HashMap<String, i64> = HashMap::new();
|
||||||
let scan_start = Instant::now();
|
// begin timer for logging purposes
|
||||||
// walk through directory recursively
|
let scan_start = Instant::now();
|
||||||
for entry in WalkDir::new(path).into_iter().filter_entry(|dir| {
|
|
||||||
let approved = !is_hidden(dir)
|
let mut next_file_id = first_file_id;
|
||||||
&& !is_app_bundle(dir)
|
let mut get_id = || {
|
||||||
&& !is_node_modules(dir)
|
next_file_id += 1;
|
||||||
&& !is_library(dir);
|
next_file_id
|
||||||
approved
|
|
||||||
}) {
|
|
||||||
// extract directory entry or log and continue if failed
|
|
||||||
let entry = match entry {
|
|
||||||
Ok(entry) => entry,
|
|
||||||
Err(e) => {
|
|
||||||
println!("Error reading file {}", e);
|
|
||||||
continue;
|
|
||||||
},
|
|
||||||
};
|
};
|
||||||
let path = entry.path();
|
// walk through directory recursively
|
||||||
|
for entry in WalkDir::new(&dir_path).into_iter().filter_entry(|dir| {
|
||||||
let parent_path = path
|
let approved = !is_hidden(dir)
|
||||||
.parent()
|
&& !is_app_bundle(dir)
|
||||||
.unwrap_or(Path::new(""))
|
&& !is_node_modules(dir)
|
||||||
.to_str()
|
&& !is_library(dir);
|
||||||
.unwrap_or("");
|
approved
|
||||||
let parent_dir_id = dirs.get(&*parent_path);
|
}) {
|
||||||
// println!("Discovered: {:?}, {:?}", &path, &parent_dir_id);
|
// extract directory entry or log and continue if failed
|
||||||
|
let entry = match entry {
|
||||||
let file_id = get_id();
|
Ok(entry) => entry,
|
||||||
paths.push((path.to_owned(), file_id, parent_dir_id.cloned()));
|
Err(e) => {
|
||||||
|
println!("Error reading file {}", e);
|
||||||
if entry.file_type().is_dir() {
|
continue;
|
||||||
let _path = match path.to_str() {
|
},
|
||||||
Some(path) => path.to_owned(),
|
|
||||||
None => continue,
|
|
||||||
};
|
};
|
||||||
dirs.insert(_path, file_id);
|
let path = entry.path();
|
||||||
|
|
||||||
|
let parent_path = path
|
||||||
|
.parent()
|
||||||
|
.unwrap_or(Path::new(""))
|
||||||
|
.to_str()
|
||||||
|
.unwrap_or("");
|
||||||
|
let parent_dir_id = dirs.get(&*parent_path);
|
||||||
|
// println!("Discovered: {:?}, {:?}", &path, &parent_dir_id);
|
||||||
|
|
||||||
|
let file_id = get_id();
|
||||||
|
paths.push((path.to_owned(), file_id, parent_dir_id.cloned()));
|
||||||
|
|
||||||
|
if entry.file_type().is_dir() {
|
||||||
|
let _path = match path.to_str() {
|
||||||
|
Some(path) => path.to_owned(),
|
||||||
|
None => continue,
|
||||||
|
};
|
||||||
|
dirs.insert(_path, file_id);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
(paths, scan_start)
|
||||||
|
})
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
let db_write_start = Instant::now();
|
let db_write_start = Instant::now();
|
||||||
let scan_read_time = scan_start.elapsed();
|
let scan_read_time = scan_start.elapsed();
|
||||||
|
|
||||||
|
@ -166,7 +175,7 @@ pub async fn scan_path(
|
||||||
}
|
}
|
||||||
println!(
|
println!(
|
||||||
"scan of {:?} completed in {:?}. {:?} files found. db write completed in {:?}",
|
"scan of {:?} completed in {:?}. {:?} files found. db write completed in {:?}",
|
||||||
path,
|
&path,
|
||||||
scan_read_time,
|
scan_read_time,
|
||||||
paths.len(),
|
paths.len(),
|
||||||
db_write_start.elapsed()
|
db_write_start.elapsed()
|
||||||
|
|
|
@ -1,16 +1,16 @@
|
||||||
use super::worker::{Worker, WorkerContext};
|
use super::worker::{Worker, WorkerContext};
|
||||||
use crate::{prisma::JobData, CoreContext};
|
use crate::{prisma::JobData, CoreContext};
|
||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
use dyn_clone::DynClone;
|
|
||||||
use int_enum::IntEnum;
|
use int_enum::IntEnum;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
use std::{collections::HashMap, fmt::Debug};
|
use std::{collections::HashMap, fmt::Debug, sync::Arc};
|
||||||
|
use tokio::sync::Mutex;
|
||||||
use ts_rs::TS;
|
use ts_rs::TS;
|
||||||
|
|
||||||
const MAX_WORKERS: usize = 4;
|
const MAX_WORKERS: usize = 4;
|
||||||
|
|
||||||
#[async_trait::async_trait]
|
#[async_trait::async_trait]
|
||||||
pub trait Job: Send + Sync + Debug + DynClone {
|
pub trait Job: Send + Sync + Debug {
|
||||||
async fn run(&self, ctx: WorkerContext) -> Result<()>;
|
async fn run(&self, ctx: WorkerContext) -> Result<()>;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -18,7 +18,7 @@ pub trait Job: Send + Sync + Debug + DynClone {
|
||||||
pub struct Jobs {
|
pub struct Jobs {
|
||||||
job_queue: Vec<Box<dyn Job>>,
|
job_queue: Vec<Box<dyn Job>>,
|
||||||
// workers are spawned when jobs are picked off the queue
|
// workers are spawned when jobs are picked off the queue
|
||||||
running_workers: HashMap<String, Worker>,
|
running_workers: HashMap<String, Arc<Mutex<Worker>>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Jobs {
|
impl Jobs {
|
||||||
|
@ -30,19 +30,26 @@ impl Jobs {
|
||||||
}
|
}
|
||||||
pub async fn ingest(&mut self, ctx: &CoreContext, job: Box<dyn Job>) {
|
pub async fn ingest(&mut self, ctx: &CoreContext, job: Box<dyn Job>) {
|
||||||
// create worker to process job
|
// create worker to process job
|
||||||
let mut worker = Worker::new(job);
|
let worker = Worker::new(job);
|
||||||
|
let id = worker.id();
|
||||||
|
|
||||||
if self.running_workers.len() < MAX_WORKERS {
|
if self.running_workers.len() < MAX_WORKERS {
|
||||||
worker.spawn(ctx).await;
|
let wrapped_worker = Arc::new(Mutex::new(worker));
|
||||||
self.running_workers.insert(worker.id(), worker);
|
|
||||||
|
Worker::spawn(wrapped_worker.clone(), ctx).await;
|
||||||
|
|
||||||
|
self.running_workers.insert(id, wrapped_worker);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
pub async fn get_running(&self) -> Vec<JobReport> {
|
pub async fn get_running(&self) -> Vec<JobReport> {
|
||||||
self.running_workers
|
let mut ret = vec![];
|
||||||
.values()
|
|
||||||
.into_iter()
|
for worker in self.running_workers.values() {
|
||||||
.map(|worker| worker.job_report.clone())
|
let worker = worker.lock().await;
|
||||||
.collect()
|
ret.push(worker.job_report.clone());
|
||||||
|
}
|
||||||
|
|
||||||
|
ret
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1,6 +1,10 @@
|
||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
use crate::{ClientQuery, CoreContext, CoreEvent, Job};
|
use crate::{ClientQuery, CoreContext, CoreEvent, Job};
|
||||||
use dyn_clone::clone_box;
|
use tokio::sync::{
|
||||||
use tokio::sync::mpsc::{unbounded_channel, UnboundedReceiver, UnboundedSender};
|
mpsc::{unbounded_channel, UnboundedReceiver, UnboundedSender},
|
||||||
|
Mutex,
|
||||||
|
};
|
||||||
|
|
||||||
use super::jobs::{JobReport, JobReportUpdate, JobStatus};
|
use super::jobs::{JobReport, JobReportUpdate, JobStatus};
|
||||||
|
|
||||||
|
@ -11,6 +15,11 @@ pub enum WorkerEvent {
|
||||||
Failed,
|
Failed,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
enum WorkerState {
|
||||||
|
Pending(Box<dyn Job>, UnboundedReceiver<WorkerEvent>),
|
||||||
|
Running,
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
pub struct WorkerContext {
|
pub struct WorkerContext {
|
||||||
pub core_ctx: CoreContext,
|
pub core_ctx: CoreContext,
|
||||||
|
@ -20,31 +29,49 @@ pub struct WorkerContext {
|
||||||
// a worker is a dedicated thread that runs a single job
|
// a worker is a dedicated thread that runs a single job
|
||||||
// once the job is complete the worker will exit
|
// once the job is complete the worker will exit
|
||||||
pub struct Worker {
|
pub struct Worker {
|
||||||
job: Box<dyn Job>,
|
|
||||||
pub job_report: JobReport,
|
pub job_report: JobReport,
|
||||||
worker_channel: (UnboundedSender<WorkerEvent>, UnboundedReceiver<WorkerEvent>),
|
state: WorkerState,
|
||||||
|
worker_sender: UnboundedSender<WorkerEvent>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Worker {
|
impl Worker {
|
||||||
pub fn new(job: Box<dyn Job>) -> Self {
|
pub fn new(job: Box<dyn Job>) -> Self {
|
||||||
|
let (worker_sender, worker_receiver) = unbounded_channel();
|
||||||
let uuid = uuid::Uuid::new_v4().to_string();
|
let uuid = uuid::Uuid::new_v4().to_string();
|
||||||
|
|
||||||
println!("worker uuid: {}", &uuid);
|
println!("worker uuid: {}", &uuid);
|
||||||
|
|
||||||
Self {
|
Self {
|
||||||
job,
|
state: WorkerState::Pending(job, worker_receiver),
|
||||||
job_report: JobReport::new(uuid),
|
job_report: JobReport::new(uuid),
|
||||||
worker_channel: unbounded_channel(),
|
worker_sender,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// spawns a thread and extracts channel sender to communicate with it
|
// spawns a thread and extracts channel sender to communicate with it
|
||||||
pub async fn spawn(&mut self, ctx: &CoreContext) {
|
pub async fn spawn(worker: Arc<Mutex<Self>>, ctx: &CoreContext) {
|
||||||
println!("spawning worker");
|
println!("spawning worker");
|
||||||
// we capture the worker receiver channel so state can be updated from inside the worker
|
// we capture the worker receiver channel so state can be updated from inside the worker
|
||||||
let worker_sender = self.worker_channel.0.clone();
|
let mut worker_mut = worker.lock().await;
|
||||||
|
|
||||||
|
let (job, worker_receiver) =
|
||||||
|
match std::mem::replace(&mut worker_mut.state, WorkerState::Running) {
|
||||||
|
WorkerState::Pending(job, worker_receiver) => {
|
||||||
|
worker_mut.state = WorkerState::Running;
|
||||||
|
(job, worker_receiver)
|
||||||
|
},
|
||||||
|
WorkerState::Running => unreachable!(),
|
||||||
|
};
|
||||||
|
|
||||||
|
let worker_sender = worker_mut.worker_sender.clone();
|
||||||
let core_ctx = ctx.clone();
|
let core_ctx = ctx.clone();
|
||||||
|
|
||||||
let job = clone_box(&*self.job);
|
worker_mut.job_report.status = JobStatus::Running;
|
||||||
|
|
||||||
self.track_progress(&ctx).await;
|
tokio::spawn(Worker::track_progress(
|
||||||
|
worker.clone(),
|
||||||
|
worker_receiver,
|
||||||
|
ctx.clone(),
|
||||||
|
));
|
||||||
|
|
||||||
tokio::spawn(async move {
|
tokio::spawn(async move {
|
||||||
println!("new worker thread spawned");
|
println!("new worker thread spawned");
|
||||||
|
@ -61,44 +88,54 @@ impl Worker {
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn id(&self) -> String {
|
pub fn id(&self) -> String {
|
||||||
self.job_report.id.to_owned()
|
self.job_report.id.to_owned()
|
||||||
}
|
}
|
||||||
async fn track_progress(&mut self, ctx: &CoreContext) {
|
|
||||||
|
async fn track_progress(
|
||||||
|
worker: Arc<Mutex<Self>>,
|
||||||
|
mut channel: UnboundedReceiver<WorkerEvent>,
|
||||||
|
ctx: CoreContext,
|
||||||
|
) {
|
||||||
println!("tracking progress");
|
println!("tracking progress");
|
||||||
self.job_report.status = JobStatus::Running;
|
while let Some(command) = channel.recv().await {
|
||||||
loop {
|
let mut worker = worker.lock().await;
|
||||||
tokio::select! {
|
|
||||||
Some(command) = self.worker_channel.1.recv() => {
|
match command {
|
||||||
match command {
|
WorkerEvent::Progressed(changes) => {
|
||||||
WorkerEvent::Progressed(changes) => {
|
println!("worker event: progressed");
|
||||||
println!("worker event: progressed");
|
for change in changes {
|
||||||
for change in changes {
|
match change {
|
||||||
match change {
|
JobReportUpdate::TaskCount(task_count) => {
|
||||||
JobReportUpdate::TaskCount(task_count) => {
|
worker.job_report.task_count = task_count;
|
||||||
self.job_report.task_count = task_count;
|
},
|
||||||
},
|
JobReportUpdate::CompletedTaskCount(completed_task_count) => {
|
||||||
JobReportUpdate::CompletedTaskCount(completed_task_count) => {
|
worker.job_report.completed_task_count =
|
||||||
self.job_report.completed_task_count = completed_task_count;
|
completed_task_count;
|
||||||
},
|
},
|
||||||
JobReportUpdate::Message(message) => {
|
JobReportUpdate::Message(message) => {
|
||||||
self.job_report.message = message;
|
worker.job_report.message = message;
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
}
|
|
||||||
ctx.emit(CoreEvent::InvalidateQuery(ClientQuery::JobGetRunning)).await;
|
|
||||||
},
|
|
||||||
WorkerEvent::Completed => {
|
|
||||||
self.job_report.status = JobStatus::Completed;
|
|
||||||
ctx.emit(CoreEvent::InvalidateQuery(ClientQuery::JobGetRunning)).await;
|
|
||||||
ctx.emit(CoreEvent::InvalidateQuery(ClientQuery::JobGetHistory)).await;
|
|
||||||
},
|
|
||||||
WorkerEvent::Failed => {
|
|
||||||
self.job_report.status = JobStatus::Failed;
|
|
||||||
ctx.emit(CoreEvent::InvalidateQuery(ClientQuery::JobGetHistory)).await;
|
|
||||||
},
|
|
||||||
}
|
}
|
||||||
}
|
ctx.emit(CoreEvent::InvalidateQuery(ClientQuery::JobGetRunning))
|
||||||
|
.await;
|
||||||
|
},
|
||||||
|
WorkerEvent::Completed => {
|
||||||
|
worker.job_report.status = JobStatus::Completed;
|
||||||
|
ctx.emit(CoreEvent::InvalidateQuery(ClientQuery::JobGetRunning))
|
||||||
|
.await;
|
||||||
|
ctx.emit(CoreEvent::InvalidateQuery(ClientQuery::JobGetHistory))
|
||||||
|
.await;
|
||||||
|
break;
|
||||||
|
},
|
||||||
|
WorkerEvent::Failed => {
|
||||||
|
worker.job_report.status = JobStatus::Failed;
|
||||||
|
ctx.emit(CoreEvent::InvalidateQuery(ClientQuery::JobGetHistory))
|
||||||
|
.await;
|
||||||
|
break;
|
||||||
|
},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue