Skip to content

Commit f896974

Browse files
authored
[TS] Implement near-heap-limit termination (#4777)
# Description of Changes Uses `Isolate::add_near_heap_limit_callback` to prevent unbounded heap growth. Upon nearing the heap limit, we will now: 1. Call `Isolate::terminate_execution`. 2. Request that v8 double the heap limit. Then, upon finishing a function call, we lower the heap limit back down. This should hopefully fix the issue where v8 hits the heap limit and crashes the whole process. Also improves the way termination requests are checked for and processed. # Expected complexity level and risk 2 # Testing - [ ] Manual testing with memory-leaky modules
1 parent a7729f7 commit f896974

9 files changed

Lines changed: 331 additions & 165 deletions

File tree

crates/core/src/config.rs

Lines changed: 18 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -190,8 +190,12 @@ pub struct V8HeapPolicyConfig {
190190
pub heap_gc_trigger_fraction: f64,
191191
#[serde(default = "def_retire", deserialize_with = "de_fraction")]
192192
pub heap_retire_fraction: f64,
193-
#[serde(default, rename = "heap-limit-mb", deserialize_with = "de_limit_mb")]
194-
pub heap_limit_bytes: Option<usize>,
193+
#[serde(
194+
default = "def_heap_limit",
195+
rename = "heap-limit-mb",
196+
deserialize_with = "de_limit_mb"
197+
)]
198+
pub heap_limit_bytes: usize,
195199
}
196200

197201
impl Default for V8HeapPolicyConfig {
@@ -201,7 +205,7 @@ impl Default for V8HeapPolicyConfig {
201205
heap_check_time_interval: def_time_interval(),
202206
heap_gc_trigger_fraction: def_gc_trigger(),
203207
heap_retire_fraction: def_retire(),
204-
heap_limit_bytes: None,
208+
heap_limit_bytes: def_heap_limit(),
205209
}
206210
}
207211
}
@@ -242,6 +246,12 @@ fn def_retire() -> f64 {
242246
0.75
243247
}
244248

249+
/// Default heap limit, in bytes
250+
fn def_heap_limit() -> usize {
251+
// 1 GiB
252+
1024 * 1024 * 1024
253+
}
254+
245255
fn de_nz_u64<'de, D>(deserializer: D) -> Result<Option<u64>, D::Error>
246256
where
247257
D: serde::Deserializer<'de>,
@@ -294,22 +304,20 @@ where
294304
}
295305
}
296306

297-
fn de_limit_mb<'de, D>(deserializer: D) -> Result<Option<usize>, D::Error>
307+
fn de_limit_mb<'de, D>(deserializer: D) -> Result<usize, D::Error>
298308
where
299309
D: serde::Deserializer<'de>,
300310
{
301311
let value = u64::deserialize(deserializer)?;
302312
if value == 0 {
303-
return Ok(None);
313+
return Ok(def_heap_limit());
304314
}
305315

306316
let bytes = value
307317
.checked_mul(1024 * 1024)
308318
.ok_or_else(|| serde::de::Error::custom("heap-limit-mb is too large"))?;
309319

310-
usize::try_from(bytes)
311-
.map(Some)
312-
.map_err(|_| serde::de::Error::custom("heap-limit-mb does not fit in usize"))
320+
usize::try_from(bytes).map_err(|_| serde::de::Error::custom("heap-limit-mb does not fit in usize"))
313321
}
314322

315323
#[cfg(test)]
@@ -441,7 +449,7 @@ mod tests {
441449
);
442450
assert_eq!(config.v8_heap_policy.heap_gc_trigger_fraction, 0.67);
443451
assert_eq!(config.v8_heap_policy.heap_retire_fraction, 0.75);
444-
assert_eq!(config.v8_heap_policy.heap_limit_bytes, None);
452+
assert_eq!(config.v8_heap_policy.heap_limit_bytes, 1024 * 1024 * 1024);
445453
}
446454

447455
#[test]
@@ -464,6 +472,6 @@ mod tests {
464472
);
465473
assert_eq!(config.v8_heap_policy.heap_gc_trigger_fraction, 0.6);
466474
assert_eq!(config.v8_heap_policy.heap_retire_fraction, 0.8);
467-
assert_eq!(config.v8_heap_policy.heap_limit_bytes, Some(256 * 1024 * 1024));
475+
assert_eq!(config.v8_heap_policy.heap_limit_bytes, 256 * 1024 * 1024);
468476
}
469477
}

crates/core/src/host/v8/budget.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@ pub(super) extern "C" fn cb_noop(_: v8::UnsafeRawIsolatePtr, _: *mut c_void) {}
6969
///
7070
/// Every `callback_every` ticks, `callback` is called.
7171
fn run_timeout_and_cb_every(
72+
// TODO: use RemoteTerminator here once we actually call this function, and make RemoteTerminator thread-safe.
7273
handle: IsolateHandle,
7374
callback_every: u64,
7475
callback: InterruptCallback,

crates/core/src/host/v8/error.rs

Lines changed: 128 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,9 @@ use core::fmt;
1313
use spacetimedb_data_structures::map::IntMap;
1414
use spacetimedb_primitives::errno;
1515
use spacetimedb_sats::Serialize;
16+
use std::cell::Cell;
1617
use std::num::NonZero;
18+
use std::rc::Rc;
1719
use v8::{tc_scope, Exception, HandleScope, Local, PinScope, PinnedRef, StackFrame, StackTrace, TryCatch, Value};
1820

1921
/// The result of trying to convert a [`Value`] in scope `'scope` to some type `T`.
@@ -107,24 +109,6 @@ impl ArrayTooLongError {
107109
}
108110
}
109111

110-
/// A catchable termination error thrown in callbacks to indicate a host error.
111-
#[derive(Serialize)]
112-
pub(super) struct TerminationError {
113-
__terminated__: String,
114-
}
115-
116-
impl TerminationError {
117-
/// Converts [`anyhow::Error`] to a termination error.
118-
pub(super) fn from_error<'scope>(
119-
scope: &PinScope<'scope, '_>,
120-
error: &anyhow::Error,
121-
) -> ExcResult<ExceptionValue<'scope>> {
122-
let __terminated__ = format!("{error}");
123-
let error = Self { __terminated__ };
124-
serialize_to_js(scope, &error).map(ExceptionValue)
125-
}
126-
}
127-
128112
/// Collapses `res` where the `Ok(x)` where `x` is throwable.
129113
pub(super) fn collapse_exc_thrown<'scope>(
130114
scope: &PinScope<'scope, '_>,
@@ -159,38 +143,96 @@ pub type SysCallResult<T> = Result<T, SysCallError>;
159143
/// A flag set in [`throw_nodes_error`].
160144
/// The flag should be checked in every module -> host ABI.
161145
/// If the flag is set, the call is prevented.
162-
struct TerminationFlag;
146+
#[derive(Default, Clone)]
147+
pub(super) struct TerminationFlag(Rc<TerminationFlagInner>);
148+
149+
#[derive(Default)]
150+
struct TerminationFlagInner {
151+
flag: Cell<bool>,
152+
reason: Cell<Option<anyhow::Error>>,
153+
}
154+
155+
impl TerminationFlag {
156+
/// Set the terminate-execution flag due to the given host error.
157+
pub(super) fn set(&self, err: anyhow::Error) {
158+
let already_set = self.0.flag.replace(true);
159+
if !already_set {
160+
self.0.reason.set(Some(err));
161+
}
162+
}
163+
164+
pub(super) fn is_set(&self) -> bool {
165+
self.0.flag.get()
166+
}
167+
168+
pub(super) fn clear(&self) -> Option<anyhow::Error> {
169+
let flag = self.0.flag.take();
170+
let reason = self.0.reason.take();
171+
flag.then_some(()).and(reason)
172+
}
173+
}
163174

164175
/// Terminate execution immediately due to the given host error.
165176
pub(super) fn terminate_execution<'scope>(
166177
scope: &mut PinScope<'scope, '_>,
167-
err: &anyhow::Error,
178+
err: anyhow::Error,
168179
) -> ExcResult<ExceptionValue<'scope>> {
169-
// Terminate execution ASAP and throw a catchable exception (`TerminationError`).
170-
// Unfortunately, JS execution won't be terminated once the callback returns,
171-
// so we set a slot that all callbacks immediately check
172-
// to ensure that the module won't be able to do anything to the host
173-
// while it's being terminated (eventually).
174-
scope.terminate_execution();
175-
scope.set_slot(TerminationFlag);
176-
TerminationError::from_error(scope, err)
180+
get_or_insert_slot(scope, TerminationFlag::default).set(err);
181+
Err(terminate_execution_now(scope))
177182
}
178183

179-
/// Checks the termination flag and throws a `TerminationError` if set.
180-
///
181-
/// Returns whether the flag was set.
182-
pub(super) fn throw_if_terminated(scope: &PinScope<'_, '_>) -> bool {
184+
/// A handle to terminate execution of a v8 isolate.
185+
pub(super) struct RemoteTerminator {
186+
pub(super) flag: TerminationFlag,
187+
pub(super) handle: v8::IsolateHandle,
188+
}
189+
190+
impl RemoteTerminator {
191+
pub(super) fn new(isolate: &mut v8::Isolate) -> Self {
192+
let flag = get_or_insert_slot(isolate, TerminationFlag::default).clone();
193+
let handle = isolate.thread_safe_handle();
194+
Self { flag, handle }
195+
}
196+
197+
/// Request that v8 terminate execution due to the given host error.
198+
pub(super) fn terminate_execution(&self, err: anyhow::Error) {
199+
self.flag.set(err);
200+
self.handle.terminate_execution();
201+
}
202+
}
203+
204+
/// Checks the termination flag and terminates execution immediately if it was set.
205+
pub(super) fn check_termination(scope: &PinScope<'_, '_>) -> ExcResult<()> {
183206
// If the flag was set in `throw_nodes_error`,
184207
// we need to block all module -> host ABI calls.
185-
let set = scope.get_slot::<TerminationFlag>().is_some();
186-
if set {
187-
let err = anyhow::anyhow!("execution is being terminated");
188-
if let Ok(exception) = TerminationError::from_error(scope, &err) {
189-
exception.throw(scope);
190-
}
208+
if let Some(flag) = scope.get_slot::<TerminationFlag>()
209+
&& flag.is_set()
210+
{
211+
return Err(terminate_execution_now(scope));
191212
}
192213

193-
set
214+
Ok(())
215+
}
216+
217+
/// Terminate execution of the v8 isolate, not as a request, but right now.
218+
fn terminate_execution_now(scope: &PinScope<'_, '_>) -> ExceptionThrown {
219+
if !scope.is_execution_terminating() {
220+
scope.terminate_execution();
221+
handle_interrupts(scope);
222+
assert!(scope.is_execution_terminating());
223+
}
224+
exception_already_thrown()
225+
}
226+
227+
/// Check for interrupts, such as an execution termination request, and handle then.
228+
fn handle_interrupts(scope: &PinScope<'_, '_>) {
229+
// This is stupid. `Isolate::TerminateExecution` requests a termination interrupt, meaning that
230+
// `isolate.terminate_execution(); isolate.is_execution_terminating()` evaluates to false.
231+
// v8 exposes neither the internal, immediate version `i::Isolate::TerminateExecution`, nor
232+
// `Isolate::HandleInterrupts`, so we have to call `HandleInterrupts` in a roundabout way:
233+
// via `JSON::Stringify`, which checks at the start of the call for any interrupts. Yippee.
234+
let obj = v8::Integer::new(scope, 0);
235+
let _ = v8::json::stringify(scope, obj.into());
194236
}
195237

196238
/// A catchable error code thrown in callbacks
@@ -218,7 +260,7 @@ pub(crate) struct ExceptionThrown {
218260

219261
impl ExceptionThrown {
220262
/// Turns a caught JS exception in `scope` into a [`JSError`].
221-
pub(crate) fn into_error(self, scope: &mut PinTryCatch) -> JsError {
263+
pub(crate) fn into_error(self, scope: &mut PinTryCatch) -> Result<JsError, UnknownJsError> {
222264
JsError::from_caught(scope)
223265
}
224266
}
@@ -255,12 +297,15 @@ pub(super) enum ErrorOrException<Exc> {
255297
Exception(Exc),
256298
}
257299

258-
impl<Exc> ErrorOrException<Exc> {
259-
pub(super) fn map_exception<Exc2>(self, f: impl FnOnce(Exc) -> Exc2) -> ErrorOrException<Exc2> {
260-
match self {
300+
impl ErrorOrException<ExceptionThrown> {
301+
pub(super) fn exc_into_error(
302+
self,
303+
scope: &mut PinTryCatch<'_, '_, '_, '_>,
304+
) -> Result<ErrorOrException<JsError>, UnknownJsError> {
305+
Ok(match self {
261306
ErrorOrException::Err(e) => ErrorOrException::Err(e),
262-
ErrorOrException::Exception(exc) => ErrorOrException::Exception(f(exc)),
263-
}
307+
ErrorOrException::Exception(exc) => ErrorOrException::Exception(exc.into_error(scope)?),
308+
})
264309
}
265310
}
266311

@@ -276,6 +321,12 @@ impl From<ExceptionThrown> for ErrorOrException<ExceptionThrown> {
276321
}
277322
}
278323

324+
impl From<JsError> for ErrorOrException<JsError> {
325+
fn from(e: JsError) -> Self {
326+
Self::Exception(e)
327+
}
328+
}
329+
279330
impl From<ErrorOrException<JsError>> for anyhow::Error {
280331
fn from(err: ErrorOrException<JsError>) -> Self {
281332
match err {
@@ -508,23 +559,41 @@ fn get_or_insert_slot<T: 'static>(isolate: &mut v8::Isolate, default: impl FnOnc
508559

509560
impl JsError {
510561
/// Turns a caught JS exception in `scope` into a [`JSError`].
511-
fn from_caught(scope: &mut PinTryCatch<'_, '_, '_, '_>) -> Self {
512-
match scope.message() {
513-
Some(message) => Self {
514-
trace: message
515-
.get_stack_trace(scope)
516-
.map(|trace| JsStackTrace::from_trace(scope, trace))
517-
.unwrap_or_default(),
518-
msg: message.get(scope).to_rust_string_lossy(scope),
519-
},
520-
None => Self {
521-
trace: JsStackTrace::default(),
522-
msg: "unknown error".to_owned(),
523-
},
562+
fn from_caught(scope: &mut PinTryCatch<'_, '_, '_, '_>) -> Result<Self, UnknownJsError> {
563+
let message = scope.message().ok_or(UnknownJsError)?;
564+
Ok(Self {
565+
trace: message
566+
.get_stack_trace(scope)
567+
.map(|trace| JsStackTrace::from_trace(scope, trace))
568+
.unwrap_or_default(),
569+
msg: message.get(scope).to_rust_string_lossy(scope),
570+
})
571+
}
572+
}
573+
574+
pub(super) struct UnknownJsError;
575+
576+
impl From<UnknownJsError> for JsError {
577+
fn from(_: UnknownJsError) -> Self {
578+
Self {
579+
trace: JsStackTrace::default(),
580+
msg: "unknown error".to_owned(),
524581
}
525582
}
526583
}
527584

585+
impl From<UnknownJsError> for ErrorOrException<JsError> {
586+
fn from(e: UnknownJsError) -> Self {
587+
Self::Exception(e.into())
588+
}
589+
}
590+
591+
impl From<UnknownJsError> for anyhow::Error {
592+
fn from(e: UnknownJsError) -> Self {
593+
JsError::from(e).into()
594+
}
595+
}
596+
528597
pub(super) fn log_traceback(replica_ctx: &ReplicaContext, func_type: &str, func: &str, e: &anyhow::Error) {
529598
// no need to log `JsError` separately; it'll be displayed if it exists in the error.
530599
log::info!("{func_type} \"{func}\" raised a runtime error: {e:#}");
@@ -553,7 +622,7 @@ pub(super) fn catch_exception<'scope, T>(
553622
body: impl FnOnce(&mut PinTryCatch<'scope, '_, '_, '_>) -> Result<T, ErrorOrException<ExceptionThrown>>,
554623
) -> Result<T, ErrorOrException<JsError>> {
555624
tc_scope!(scope, scope);
556-
body(scope).map_err(|e| e.map_exception(|exc| exc.into_error(scope)))
625+
body(scope).map_err(|e| e.exc_into_error(scope).unwrap_or_else(Into::into))
557626
}
558627

559628
pub(super) type PinTryCatch<'scope, 'iso, 'x, 's> = PinnedRef<'x, TryCatch<'s, 'scope, HandleScope<'iso>>>;

0 commit comments

Comments
 (0)