mirror of
https://github.com/spacedriveapp/spacedrive
synced 2024-07-04 08:43:27 +00:00
Trying to avoid data loss on actor stop
This commit is contained in:
parent
f72166fe70
commit
6cdaed47ec
|
@ -207,7 +207,9 @@ impl Actor {
|
||||||
}
|
}
|
||||||
|
|
||||||
if let Some(tx) = event.wait_tx {
|
if let Some(tx) = event.wait_tx {
|
||||||
tx.send(()).ok();
|
if tx.send(()).is_err() {
|
||||||
|
warn!("Failed to send wait_tx signal");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if event.has_more {
|
if event.has_more {
|
||||||
|
|
|
@ -205,8 +205,7 @@ async fn no_update_after_delete() -> Result<(), Box<dyn std::error::Error>> {
|
||||||
),
|
),
|
||||||
instance1.db.location().find_many(vec![]),
|
instance1.db.location().find_many(vec![]),
|
||||||
)
|
)
|
||||||
.await
|
.await?;
|
||||||
.ok();
|
|
||||||
|
|
||||||
// one spare update operation that actually gets ignored by instance 2
|
// one spare update operation that actually gets ignored by instance 2
|
||||||
assert_eq!(instance1.db.crdt_operation().count(vec![]).exec().await?, 5);
|
assert_eq!(instance1.db.crdt_operation().count(vec![]).exec().await?, 5);
|
||||||
|
|
|
@ -146,7 +146,7 @@ impl Instance {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
ingest::Request::FinishedIngesting => {
|
ingest::Request::FinishedIngesting => {
|
||||||
right.sync.tx.send(SyncMessage::Ingested).ok();
|
right.sync.tx.send(SyncMessage::Ingested).unwrap();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,62 +1,93 @@
|
||||||
|
#![warn(
|
||||||
|
clippy::all,
|
||||||
|
clippy::pedantic,
|
||||||
|
clippy::correctness,
|
||||||
|
clippy::perf,
|
||||||
|
clippy::style,
|
||||||
|
clippy::suspicious,
|
||||||
|
clippy::complexity,
|
||||||
|
clippy::nursery,
|
||||||
|
clippy::unwrap_used,
|
||||||
|
unused_qualifications,
|
||||||
|
rust_2018_idioms,
|
||||||
|
trivial_casts,
|
||||||
|
trivial_numeric_casts,
|
||||||
|
unused_allocation,
|
||||||
|
clippy::unnecessary_cast,
|
||||||
|
clippy::cast_lossless,
|
||||||
|
clippy::cast_possible_truncation,
|
||||||
|
clippy::cast_possible_wrap,
|
||||||
|
clippy::cast_precision_loss,
|
||||||
|
clippy::cast_sign_loss,
|
||||||
|
clippy::dbg_macro,
|
||||||
|
clippy::deprecated_cfg_attr,
|
||||||
|
clippy::separated_literal_suffix,
|
||||||
|
deprecated
|
||||||
|
)]
|
||||||
|
#![forbid(deprecated_in_future)]
|
||||||
|
#![allow(clippy::missing_errors_doc, clippy::module_name_repetitions)]
|
||||||
|
|
||||||
use std::{
|
use std::{
|
||||||
collections::HashMap,
|
collections::HashMap,
|
||||||
future::{Future, IntoFuture},
|
future::{Future, IntoFuture},
|
||||||
|
panic::{panic_any, AssertUnwindSafe},
|
||||||
pin::Pin,
|
pin::Pin,
|
||||||
sync::Arc,
|
sync::{
|
||||||
|
atomic::{AtomicBool, Ordering},
|
||||||
|
Arc,
|
||||||
|
},
|
||||||
task::{Context, Poll},
|
task::{Context, Poll},
|
||||||
|
time::Duration,
|
||||||
};
|
};
|
||||||
|
|
||||||
use async_channel as chan;
|
use async_channel as chan;
|
||||||
|
use futures::FutureExt;
|
||||||
use tokio::{
|
use tokio::{
|
||||||
sync::{broadcast, oneshot, Mutex, RwLock},
|
spawn,
|
||||||
task::AbortHandle,
|
sync::{broadcast, RwLock},
|
||||||
|
task::JoinHandle,
|
||||||
|
time::timeout,
|
||||||
};
|
};
|
||||||
use tracing::{error, instrument, warn};
|
use tracing::{error, instrument, warn};
|
||||||
|
|
||||||
type ActorFn = dyn Fn(StopActor) -> Pin<Box<dyn Future<Output = ()> + Send>> + Send + Sync;
|
const ONE_MINUTE: Duration = Duration::from_secs(60);
|
||||||
type ActorsMap = HashMap<&'static str, (Arc<Actor>, ActorRunState)>;
|
|
||||||
|
type ActorFn = dyn Fn(Stopper) -> Pin<Box<dyn Future<Output = ()> + Send>> + Send + Sync;
|
||||||
|
|
||||||
pub struct Actor {
|
pub struct Actor {
|
||||||
abort_handle: Mutex<Option<AbortHandle>>,
|
|
||||||
spawn_fn: Arc<ActorFn>,
|
spawn_fn: Arc<ActorFn>,
|
||||||
|
maybe_handle: Option<JoinHandle<()>>,
|
||||||
|
is_running: Arc<AtomicBool>,
|
||||||
stop_tx: chan::Sender<()>,
|
stop_tx: chan::Sender<()>,
|
||||||
stop_rx: chan::Receiver<()>,
|
stop_rx: chan::Receiver<()>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
|
||||||
enum ActorRunState {
|
|
||||||
Running,
|
|
||||||
Stopped,
|
|
||||||
}
|
|
||||||
|
|
||||||
pub struct Actors {
|
pub struct Actors {
|
||||||
pub invalidate_rx: broadcast::Receiver<()>,
|
pub invalidate_rx: broadcast::Receiver<()>,
|
||||||
invalidate_tx: broadcast::Sender<()>,
|
invalidate_tx: broadcast::Sender<()>,
|
||||||
actors: Arc<RwLock<ActorsMap>>,
|
actors: Arc<RwLock<HashMap<&'static str, Actor>>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Actors {
|
impl Actors {
|
||||||
pub async fn declare<F: Future<Output = ()> + Send + 'static>(
|
pub async fn declare<Fut>(
|
||||||
self: &Arc<Self>,
|
self: &Arc<Self>,
|
||||||
name: &'static str,
|
name: &'static str,
|
||||||
actor_fn: impl FnOnce(StopActor) -> F + Send + Sync + Clone + 'static,
|
actor_fn: impl FnOnce(Stopper) -> Fut + Send + Sync + Clone + 'static,
|
||||||
autostart: bool,
|
autostart: bool,
|
||||||
) {
|
) where
|
||||||
|
Fut: Future<Output = ()> + Send + 'static,
|
||||||
|
{
|
||||||
let (stop_tx, stop_rx) = chan::bounded(1);
|
let (stop_tx, stop_rx) = chan::bounded(1);
|
||||||
|
|
||||||
self.actors.write().await.insert(
|
self.actors.write().await.insert(
|
||||||
name,
|
name,
|
||||||
(
|
Actor {
|
||||||
Arc::new(Actor {
|
spawn_fn: Arc::new(move |stop| Box::pin((actor_fn.clone())(stop))),
|
||||||
abort_handle: Default::default(),
|
maybe_handle: None,
|
||||||
spawn_fn: Arc::new(move |stop| {
|
is_running: Arc::new(AtomicBool::new(false)),
|
||||||
Box::pin((actor_fn.clone())(stop)) as Pin<Box<_>>
|
stop_tx,
|
||||||
}),
|
stop_rx,
|
||||||
stop_tx,
|
},
|
||||||
stop_rx,
|
|
||||||
}),
|
|
||||||
ActorRunState::Stopped,
|
|
||||||
),
|
|
||||||
);
|
);
|
||||||
|
|
||||||
if autostart {
|
if autostart {
|
||||||
|
@ -66,119 +97,105 @@ impl Actors {
|
||||||
|
|
||||||
#[instrument(skip(self))]
|
#[instrument(skip(self))]
|
||||||
pub async fn start(self: &Arc<Self>, name: &str) {
|
pub async fn start(self: &Arc<Self>, name: &str) {
|
||||||
let actor = {
|
if let Some(actor) = self.actors.write().await.get_mut(name) {
|
||||||
let mut actors = self.actors.write().await;
|
if actor.is_running.load(Ordering::Acquire) {
|
||||||
|
|
||||||
let Some((actor, run_state)) = actors.get_mut(name) else {
|
|
||||||
return;
|
|
||||||
};
|
|
||||||
|
|
||||||
if matches!(run_state, ActorRunState::Running) {
|
|
||||||
warn!("Actor already running!");
|
warn!("Actor already running!");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
*run_state = ActorRunState::Running;
|
let invalidate_tx = self.invalidate_tx.clone();
|
||||||
|
|
||||||
Arc::clone(actor)
|
let is_running = Arc::clone(&actor.is_running);
|
||||||
};
|
|
||||||
|
|
||||||
let mut abort_handle = actor.abort_handle.lock().await;
|
is_running.store(true, Ordering::Release);
|
||||||
if abort_handle.is_some() {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
let (tx, rx) = oneshot::channel();
|
if invalidate_tx.send(()).is_err() {
|
||||||
|
warn!("Failed to send invalidate signal");
|
||||||
let invalidate_tx = self.invalidate_tx.clone();
|
|
||||||
|
|
||||||
let spawn_fn = actor.spawn_fn.clone();
|
|
||||||
let stop_actor = StopActor {
|
|
||||||
rx: actor.stop_rx.clone(),
|
|
||||||
};
|
|
||||||
|
|
||||||
let task = tokio::spawn(async move {
|
|
||||||
(spawn_fn)(stop_actor).await;
|
|
||||||
|
|
||||||
tx.send(()).ok();
|
|
||||||
});
|
|
||||||
|
|
||||||
*abort_handle = Some(task.abort_handle());
|
|
||||||
invalidate_tx.send(()).ok();
|
|
||||||
|
|
||||||
tokio::spawn({
|
|
||||||
let actor = actor.clone();
|
|
||||||
async move {
|
|
||||||
#[allow(clippy::match_single_binding)]
|
|
||||||
match rx.await {
|
|
||||||
_ => {}
|
|
||||||
};
|
|
||||||
|
|
||||||
actor.abort_handle.lock().await.take();
|
|
||||||
invalidate_tx.send(()).ok();
|
|
||||||
}
|
}
|
||||||
});
|
|
||||||
|
if let Some(handle) = actor.maybe_handle.take() {
|
||||||
|
if handle.await.is_err() {
|
||||||
|
// This should never happen, as we're trying to catch the panic below with
|
||||||
|
// `catch_unwind`.
|
||||||
|
error!("Actor unexpectedly panicked");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
actor.maybe_handle = Some(spawn({
|
||||||
|
let spawn_fn = Arc::clone(&actor.spawn_fn);
|
||||||
|
|
||||||
|
let stop_actor = Stopper(actor.stop_rx.clone());
|
||||||
|
|
||||||
|
async move {
|
||||||
|
if (AssertUnwindSafe((spawn_fn)(stop_actor)))
|
||||||
|
.catch_unwind()
|
||||||
|
.await
|
||||||
|
.is_err()
|
||||||
|
{
|
||||||
|
error!("Actor unexpectedly panicked");
|
||||||
|
}
|
||||||
|
|
||||||
|
is_running.store(false, Ordering::Release);
|
||||||
|
|
||||||
|
if invalidate_tx.send(()).is_err() {
|
||||||
|
warn!("Failed to send invalidate signal");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[instrument(skip(self))]
|
#[instrument(skip(self))]
|
||||||
pub async fn stop(self: &Arc<Self>, name: &str) {
|
pub async fn stop(self: &Arc<Self>, name: &str) {
|
||||||
let actor = {
|
if let Some(actor) = self.actors.write().await.get_mut(name) {
|
||||||
let mut actors = self.actors.write().await;
|
if !actor.is_running.load(Ordering::Acquire) {
|
||||||
|
|
||||||
let Some((actor, run_state)) = actors.get_mut(name) else {
|
|
||||||
return;
|
|
||||||
};
|
|
||||||
|
|
||||||
if matches!(run_state, ActorRunState::Stopped) {
|
|
||||||
warn!("Actor already stopped!");
|
warn!("Actor already stopped!");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if actor.stop_tx.send(()).await.is_err() {
|
if actor.stop_tx.send(()).await.is_ok() {
|
||||||
error!("Failed to send stop signal to actor");
|
wait_stop_or_abort(actor.maybe_handle.take()).await;
|
||||||
|
|
||||||
|
assert!(
|
||||||
|
!actor.is_running.load(Ordering::Acquire),
|
||||||
|
"actor handle finished without setting actor to stopped"
|
||||||
|
);
|
||||||
|
} else {
|
||||||
|
error!("Failed to send stop signal to actor, will check if it's already stopped or abort otherwise");
|
||||||
|
wait_stop_or_abort(actor.maybe_handle.take()).await;
|
||||||
}
|
}
|
||||||
|
|
||||||
*run_state = ActorRunState::Stopped;
|
|
||||||
|
|
||||||
Arc::clone(actor)
|
|
||||||
};
|
|
||||||
|
|
||||||
let mut abort_handle = actor.abort_handle.lock().await;
|
|
||||||
|
|
||||||
if let Some(abort_handle) = abort_handle.take() {
|
|
||||||
abort_handle.abort();
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn get_state(&self) -> HashMap<String, bool> {
|
pub async fn get_state(&self) -> HashMap<String, bool> {
|
||||||
let actors = self.actors.read().await;
|
self.actors
|
||||||
|
.read()
|
||||||
let mut state = HashMap::with_capacity(actors.len());
|
.await
|
||||||
|
.iter()
|
||||||
for (name, (actor, _)) in actors.iter() {
|
.map(|(&name, actor)| (name.to_string(), actor.is_running.load(Ordering::Relaxed)))
|
||||||
state.insert(name.to_string(), actor.abort_handle.lock().await.is_some());
|
.collect()
|
||||||
}
|
|
||||||
|
|
||||||
state
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Default for Actors {
|
impl Default for Actors {
|
||||||
fn default() -> Self {
|
fn default() -> Self {
|
||||||
let actors = Default::default();
|
|
||||||
|
|
||||||
let (invalidate_tx, invalidate_rx) = broadcast::channel(1);
|
let (invalidate_tx, invalidate_rx) = broadcast::channel(1);
|
||||||
|
|
||||||
Self {
|
Self {
|
||||||
actors,
|
actors: Arc::default(),
|
||||||
invalidate_rx,
|
invalidate_rx,
|
||||||
invalidate_tx,
|
invalidate_tx,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct StopActor {
|
pub struct Stopper(chan::Receiver<()>);
|
||||||
rx: chan::Receiver<()>,
|
|
||||||
|
impl Stopper {
|
||||||
|
#[must_use]
|
||||||
|
pub fn check_stop(&self) -> bool {
|
||||||
|
self.0.try_recv().is_ok()
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pin_project_lite::pin_project! {
|
pin_project_lite::pin_project! {
|
||||||
|
@ -206,13 +223,34 @@ impl Future for StopActorFuture<'_> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'recv> IntoFuture for &'recv StopActor {
|
impl<'recv> IntoFuture for &'recv Stopper {
|
||||||
type Output = ();
|
type Output = ();
|
||||||
type IntoFuture = StopActorFuture<'recv>;
|
type IntoFuture = StopActorFuture<'recv>;
|
||||||
|
|
||||||
fn into_future(self) -> Self::IntoFuture {
|
fn into_future(self) -> Self::IntoFuture {
|
||||||
StopActorFuture {
|
Self::IntoFuture { fut: self.0.recv() }
|
||||||
fut: self.rx.recv(),
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn wait_stop_or_abort(maybe_handle: Option<JoinHandle<()>>) {
|
||||||
|
if let Some(handle) = maybe_handle {
|
||||||
|
let abort_handle = handle.abort_handle();
|
||||||
|
|
||||||
|
match timeout(ONE_MINUTE, handle).await {
|
||||||
|
Ok(Ok(())) => { /* Everything is Awesome! */ }
|
||||||
|
Ok(Err(e)) => {
|
||||||
|
// This should never happen, as we're trying to catch the panic with
|
||||||
|
// `catch_unwind`.
|
||||||
|
if e.is_panic() {
|
||||||
|
let p = e.into_panic();
|
||||||
|
error!("Actor unexpectedly panicked, we will pop up the panic!");
|
||||||
|
panic_any(p);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Err(_) => {
|
||||||
|
error!("Actor failed to gracefully stop in the allotted time, will force abortion");
|
||||||
|
abort_handle.abort();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue