@@ -176,6 +176,7 @@ pub struct Server<C = HttpConnector, A = AddrIncoming, D = HttpsConnector<HttpCo
176176 pub storage_server_address : String ,
177177 pub connector : Option < D > ,
178178 pub migrate_bottomless : bool ,
179+ pub enable_deadlock_monitor : bool ,
179180}
180181
181182impl < C , A , D > Default for Server < C , A , D > {
@@ -201,6 +202,7 @@ impl<C, A, D> Default for Server<C, A, D> {
201202 storage_server_address : Default :: default ( ) ,
202203 connector : None ,
203204 migrate_bottomless : false ,
205+ enable_deadlock_monitor : false ,
204206 }
205207 }
206208}
@@ -410,6 +412,57 @@ fn init_version_file(db_path: &Path) -> anyhow::Result<()> {
410412 Ok ( ( ) )
411413}
412414
415+ /// The deadlock watcher monitors the main tokio runtime for deadlock by sending Ping to a task
416+ /// within it, and waiting for pongs. If the runtime fails to respond in due time, the watcher
417+ /// exits the process.
418+ fn install_deadlock_monitor ( ) {
419+ // this is a very generous deadline for the main runtime to respond
420+ const PONG_DEADLINE : Duration = Duration :: from_secs ( 5 ) ;
421+
422+ struct Ping ;
423+ struct Pong ;
424+
425+ let ( sender, mut receiver) = tokio:: sync:: mpsc:: channel ( 1 ) ;
426+
427+ std:: thread:: spawn ( move || {
428+ let rt = tokio:: runtime:: Builder :: new_current_thread ( )
429+ . enable_time ( )
430+ . build ( )
431+ . unwrap ( ) ;
432+ rt. block_on ( async move {
433+ loop {
434+ let ( snd, ret) = tokio:: sync:: oneshot:: channel ( ) ;
435+ sender. try_send ( ( snd, Ping ) ) . unwrap ( ) ;
436+ match tokio:: time:: timeout ( PONG_DEADLINE , ret) . await {
437+ Ok ( Ok ( Pong ) ) => ( ) ,
438+ Err ( _) => {
439+ tracing:: error!(
440+ "main runtime failed to respond within deadlines, deadlock detected"
441+ ) ;
442+ // std::process::exit(1);
443+ }
444+ _ => ( ) ,
445+ }
446+
447+ tokio:: time:: sleep ( Duration :: from_secs ( 1 ) ) . await ;
448+ }
449+ } )
450+ } ) ;
451+
452+ tokio:: spawn ( async move {
453+ loop {
454+ match receiver. recv ( ) . await {
455+ Some ( ( ret, Ping ) ) => {
456+ let _ = ret. send ( Pong ) ;
457+ }
458+ None => break ,
459+ }
460+ }
461+
462+ tracing:: warn!( "deadlock monitor exited" )
463+ } ) ;
464+ }
465+
413466impl < C , A , D > Server < C , A , D >
414467where
415468 C : Connector ,
@@ -501,6 +554,11 @@ where
501554 static INIT : std:: sync:: Once = std:: sync:: Once :: new ( ) ;
502555 let mut task_manager = TaskManager :: new ( ) ;
503556
557+ if self . enable_deadlock_monitor {
558+ install_deadlock_monitor ( ) ;
559+ tracing:: info!( "deadlock monitor installed" ) ;
560+ }
561+
504562 if std:: env:: var ( "LIBSQL_SQLITE_MIMALLOC" ) . is_ok ( ) {
505563 setup_sqlite_alloc ( ) ;
506564 }
0 commit comments