2020#include " src/request_body_processor/json_backend.h"
2121
2222#include < algorithm>
23+ #include < cassert>
2324#include < chrono>
2425#include < cstdint>
2526#include < memory>
@@ -89,6 +90,14 @@ JsonParseResult fromSimdjsonError(simdjson::error_code error) {
8990 }
9091}
9192
93+ /*
94+ * The ondemand parser is reused per thread because simdjson benefits from
95+ * keeping its internal buffers warm across parses. thread_local storage keeps
96+ * the parser isolated to the calling thread, so no parser state is shared
97+ * across transactions running on different threads. The parse and full
98+ * document traversal both complete inside parseDocumentWithSimdjson(), so no
99+ * parser-backed state escapes this function.
100+ */
92101simdjson::ondemand::parser &getReusableSimdjsonParser () {
93102 thread_local std::unique_ptr<simdjson::ondemand::parser> parser;
94103 if (parser == nullptr ) {
@@ -413,15 +422,30 @@ JsonParseResult parseDocumentWithSimdjson(const std::string &input,
413422 JsonSinkStatus::InternalError, " JSON event sink is null." );
414423 }
415424
425+ const char *const input_data = input.data ();
426+ const std::size_t input_size = input.size ();
427+
416428 simdjson::ondemand::parser &parser = getReusableSimdjsonParser ();
417- if (auto error = prepareParser (&parser, input.size (), options); error) {
429+ // This only prepares parser capacity and max-depth bookkeeping. It does
430+ // not make the caller-provided string safe for zero-copy parsing.
431+ if (auto error = prepareParser (&parser, input_size, options); error) {
418432 return fromSimdjsonError (error);
419433 }
420434
435+ // TODO: Revisit zero-copy only when the caller can guarantee a stable
436+ // buffer whose allocation is at least len + SIMDJSON_PADDING bytes.
437+ //
438+ // We intentionally keep the padded_string copy here. The current input is
439+ // a const std::string built from the request-body snapshot/append path, so
440+ // it does not provide guaranteed padding for simdjson's direct iterate()
441+ // overloads. In practice large request bodies often end up with
442+ // size() == capacity(), making any direct path allocator- and stdlib-
443+ // dependent. padded_string keeps this backend deterministic until the
444+ // caller can provide guaranteed lifetime and padding.
421445#ifdef MSC_JSON_AUDIT_INSTRUMENTATION
422446 const auto padded_start = std::chrono::steady_clock::now ();
423447 simdjson::padded_string padded (input);
424- recordSimdjsonPaddedCopy (input. size () , static_cast <std::uint64_t >(
448+ recordSimdjsonPaddedCopy (input_size , static_cast <std::uint64_t >(
425449 std::chrono::duration_cast<std::chrono::nanoseconds>(
426450 std::chrono::steady_clock::now () - padded_start).count ()));
427451#else
@@ -447,7 +471,12 @@ JsonParseResult parseDocumentWithSimdjson(const std::string &input,
447471#endif
448472
449473 JsonBackendWalker walker (sink);
450- return walker.walk (&document);
474+ assert (input.data () == input_data);
475+ assert (input.size () == input_size);
476+ JsonParseResult walk_result = walker.walk (&document);
477+ assert (input.data () == input_data);
478+ assert (input.size () == input_size);
479+ return walk_result;
451480}
452481
453482} // namespace RequestBodyProcessor
0 commit comments