Skip to content

Commit 959d5b3

Browse files
gh-144264: Speed up Base64 decoding of data containing ignored characters
Try the fast path again after decoding a quad the slow path. Use a bitmap cache for the ignorechars argument.
1 parent 3e9a5b0 commit 959d5b3

2 files changed

Lines changed: 29 additions & 11 deletions

File tree

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Speed up Base64 decoding of data containing ignored characters (both in
2+
non-strict mode and with an explicit *ignorechars* argument).
3+
It is now up to 2 times faster for multiline Base64 data.

Modules/binascii.c

Lines changed: 26 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -471,10 +471,19 @@ binascii_b2a_uu_impl(PyObject *module, Py_buffer *data, int backtick)
471471

472472

473473
static int
474-
ignorechar(unsigned char c, Py_buffer *ignorechars)
474+
ignorechar(unsigned char c, Py_buffer *ignorechars, char ignorecache[32])
475475
{
476-
return (ignorechars->buf != NULL &&
477-
memchr(ignorechars->buf, c, ignorechars->len));
476+
if (ignorechars->buf == NULL) {
477+
return 0;
478+
}
479+
if (ignorecache[c >> 8] & (1 << (c & 7))) {
480+
return 1;
481+
}
482+
if (memchr(ignorechars->buf, c, ignorechars->len)) {
483+
ignorecache[c >> 8] |= 1 << (c & 7);
484+
return 1;
485+
}
486+
return 0;
478487
}
479488

480489
/*[clinic input]
@@ -508,6 +517,10 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, int strict_mode,
508517
if (strict_mode == -1) {
509518
strict_mode = (ignorechars->buf != NULL);
510519
}
520+
char ignorecache[32];
521+
if (strict_mode && ignorechars->buf != NULL) {
522+
memset(ignorecache, 0, sizeof(ignorecache));
523+
}
511524

512525
/* Allocate the buffer */
513526
Py_ssize_t bin_len = ((ascii_len+3)/4)*3; /* Upper bound, corrected later */
@@ -517,8 +530,7 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, int strict_mode,
517530
}
518531
unsigned char *bin_data = PyBytesWriter_GetData(writer);
519532

520-
size_t i = 0; /* Current position in input */
521-
533+
fastpath:
522534
/* Fast path: use optimized decoder for complete quads.
523535
* This works for both strict and non-strict mode for valid input.
524536
* The fast path stops at padding, invalid chars, or incomplete groups.
@@ -527,7 +539,8 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, int strict_mode,
527539
Py_ssize_t fast_chars = base64_decode_fast(ascii_data, (Py_ssize_t)ascii_len,
528540
bin_data, table_a2b_base64);
529541
if (fast_chars > 0) {
530-
i = (size_t)fast_chars;
542+
ascii_data += fast_chars;
543+
ascii_len -= fast_chars;
531544
bin_data += (fast_chars / 4) * 3;
532545
}
533546
}
@@ -536,8 +549,8 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, int strict_mode,
536549
int quad_pos = 0;
537550
unsigned char leftchar = 0;
538551
int pads = 0;
539-
for (; i < ascii_len; i++) {
540-
unsigned char this_ch = ascii_data[i];
552+
for (; ascii_len; ascii_data++, ascii_len--) {
553+
unsigned char this_ch = *ascii_data;
541554

542555
/* Check for pad sequences and ignore
543556
** the invalid ones.
@@ -549,7 +562,7 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, int strict_mode,
549562
if (quad_pos == 0) {
550563
state = get_binascii_state(module);
551564
if (state) {
552-
PyErr_SetString(state->Error, (i == 0)
565+
PyErr_SetString(state->Error, (ascii_data == data->buf)
553566
? "Leading padding not allowed"
554567
: "Excess padding not allowed");
555568
}
@@ -580,7 +593,7 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, int strict_mode,
580593

581594
unsigned char v = table_a2b_base64[this_ch];
582595
if (v >= 64) {
583-
if (strict_mode && !ignorechar(this_ch, ignorechars)) {
596+
if (strict_mode && !ignorechar(this_ch, ignorechars, ignorecache)) {
584597
state = get_binascii_state(module);
585598
if (state) {
586599
PyErr_SetString(state->Error, "Only base64 data is allowed");
@@ -621,7 +634,9 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, int strict_mode,
621634
quad_pos = 0;
622635
*bin_data++ = (leftchar << 6) | (v);
623636
leftchar = 0;
624-
break;
637+
ascii_data++;
638+
ascii_len--;
639+
goto fastpath;
625640
}
626641
}
627642

0 commit comments

Comments
 (0)