Skip to content

Commit 8fe000c

Browse files
committed
Complete implementation simdjson
1 parent 15328a1 commit 8fe000c

1 file changed

Lines changed: 32 additions & 3 deletions

File tree

src/request_body_processor/json_backend_simdjson.cc

Lines changed: 32 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
#include "src/request_body_processor/json_backend.h"
2121

2222
#include <algorithm>
23+
#include <cassert>
2324
#include <chrono>
2425
#include <cstdint>
2526
#include <memory>
@@ -89,6 +90,14 @@ JsonParseResult fromSimdjsonError(simdjson::error_code error) {
8990
}
9091
}
9192

93+
/*
94+
* The ondemand parser is reused per thread because simdjson benefits from
95+
* keeping its internal buffers warm across parses. thread_local storage keeps
96+
* the parser isolated to the calling thread, so no parser state is shared
97+
* across transactions running on different threads. The parse and full
98+
* document traversal both complete inside parseDocumentWithSimdjson(), so no
99+
* parser-backed state escapes this function.
100+
*/
92101
simdjson::ondemand::parser &getReusableSimdjsonParser() {
93102
thread_local std::unique_ptr<simdjson::ondemand::parser> parser;
94103
if (parser == nullptr) {
@@ -413,15 +422,30 @@ JsonParseResult parseDocumentWithSimdjson(const std::string &input,
413422
JsonSinkStatus::InternalError, "JSON event sink is null.");
414423
}
415424

425+
const char *const input_data = input.data();
426+
const std::size_t input_size = input.size();
427+
416428
simdjson::ondemand::parser &parser = getReusableSimdjsonParser();
417-
if (auto error = prepareParser(&parser, input.size(), options); error) {
429+
// This only prepares parser capacity and max-depth bookkeeping. It does
430+
// not make the caller-provided string safe for zero-copy parsing.
431+
if (auto error = prepareParser(&parser, input_size, options); error) {
418432
return fromSimdjsonError(error);
419433
}
420434

435+
// TODO: Revisit zero-copy only when the caller can guarantee a stable
436+
// buffer whose allocation is at least len + SIMDJSON_PADDING bytes.
437+
//
438+
// We intentionally keep the padded_string copy here. The current input is
439+
// a const std::string built from the request-body snapshot/append path, so
440+
// it does not provide guaranteed padding for simdjson's direct iterate()
441+
// overloads. In practice large request bodies often end up with
442+
// size() == capacity(), making any direct path allocator- and stdlib-
443+
// dependent. padded_string keeps this backend deterministic until the
444+
// caller can provide guaranteed lifetime and padding.
421445
#ifdef MSC_JSON_AUDIT_INSTRUMENTATION
422446
const auto padded_start = std::chrono::steady_clock::now();
423447
simdjson::padded_string padded(input);
424-
recordSimdjsonPaddedCopy(input.size(), static_cast<std::uint64_t>(
448+
recordSimdjsonPaddedCopy(input_size, static_cast<std::uint64_t>(
425449
std::chrono::duration_cast<std::chrono::nanoseconds>(
426450
std::chrono::steady_clock::now() - padded_start).count()));
427451
#else
@@ -447,7 +471,12 @@ JsonParseResult parseDocumentWithSimdjson(const std::string &input,
447471
#endif
448472

449473
JsonBackendWalker walker(sink);
450-
return walker.walk(&document);
474+
assert(input.data() == input_data);
475+
assert(input.size() == input_size);
476+
JsonParseResult walk_result = walker.walk(&document);
477+
assert(input.data() == input_data);
478+
assert(input.size() == input_size);
479+
return walk_result;
451480
}
452481

453482
} // namespace RequestBodyProcessor

0 commit comments

Comments
 (0)