Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Speed up Base64 decoding of data containing ignored characters (both in
non-strict mode and with an explicit *ignorechars* argument).
It is now up to 2 times faster for multiline Base64 data.
37 changes: 26 additions & 11 deletions Modules/binascii.c
Original file line number Diff line number Diff line change
Expand Up @@ -471,10 +471,19 @@


static int
ignorechar(unsigned char c, Py_buffer *ignorechars)
ignorechar(unsigned char c, Py_buffer *ignorechars, char ignorecache[32])
{
return (ignorechars->buf != NULL &&
memchr(ignorechars->buf, c, ignorechars->len));
if (ignorechars->buf == NULL) {
return 0;
}
if (ignorecache[c >> 8] & (1 << (c & 7))) {

Check warning on line 479 in Modules/binascii.c

View workflow job for this annotation

GitHub Actions / Windows (free-threading) / Build and test (x64)

'>>': right shift by too large amount, data loss [D:\a\cpython\cpython\PCbuild\pythoncore.vcxproj]

Check warning on line 479 in Modules/binascii.c

View workflow job for this annotation

GitHub Actions / Windows / Build and test (arm64)

'>>': right shift by too large amount, data loss [C:\a\cpython\cpython\PCbuild\pythoncore.vcxproj]

Check warning on line 479 in Modules/binascii.c

View workflow job for this annotation

GitHub Actions / Windows (free-threading) / Build and test (arm64)

'>>': right shift by too large amount, data loss [C:\a\cpython\cpython\PCbuild\pythoncore.vcxproj]

Check warning on line 479 in Modules/binascii.c

View workflow job for this annotation

GitHub Actions / Windows / Build and test (x64)

'>>': right shift by too large amount, data loss [D:\a\cpython\cpython\PCbuild\pythoncore.vcxproj]
Comment thread
vstinner marked this conversation as resolved.
Outdated
return 1;
}
if (memchr(ignorechars->buf, c, ignorechars->len)) {
ignorecache[c >> 8] |= 1 << (c & 7);

Check warning on line 483 in Modules/binascii.c

View workflow job for this annotation

GitHub Actions / Windows (free-threading) / Build and test (x64)

'>>': right shift by too large amount, data loss [D:\a\cpython\cpython\PCbuild\pythoncore.vcxproj]

Check warning on line 483 in Modules/binascii.c

View workflow job for this annotation

GitHub Actions / Windows / Build and test (arm64)

'>>': right shift by too large amount, data loss [C:\a\cpython\cpython\PCbuild\pythoncore.vcxproj]

Check warning on line 483 in Modules/binascii.c

View workflow job for this annotation

GitHub Actions / Windows (free-threading) / Build and test (arm64)

'>>': right shift by too large amount, data loss [C:\a\cpython\cpython\PCbuild\pythoncore.vcxproj]

Check warning on line 483 in Modules/binascii.c

View workflow job for this annotation

GitHub Actions / Windows / Build and test (x64)

'>>': right shift by too large amount, data loss [D:\a\cpython\cpython\PCbuild\pythoncore.vcxproj]
return 1;
}
return 0;
}

/*[clinic input]
Expand Down Expand Up @@ -508,6 +517,10 @@
if (strict_mode == -1) {
strict_mode = (ignorechars->buf != NULL);
}
char ignorecache[32];
if (strict_mode && ignorechars->buf != NULL) {
memset(ignorecache, 0, sizeof(ignorecache));
}

/* Allocate the buffer */
Py_ssize_t bin_len = ((ascii_len+3)/4)*3; /* Upper bound, corrected later */
Expand All @@ -517,8 +530,7 @@
}
unsigned char *bin_data = PyBytesWriter_GetData(writer);

size_t i = 0; /* Current position in input */

fastpath:
/* Fast path: use optimized decoder for complete quads.
* This works for both strict and non-strict mode for valid input.
* The fast path stops at padding, invalid chars, or incomplete groups.
Expand All @@ -527,7 +539,8 @@
Py_ssize_t fast_chars = base64_decode_fast(ascii_data, (Py_ssize_t)ascii_len,
bin_data, table_a2b_base64);
if (fast_chars > 0) {
i = (size_t)fast_chars;
ascii_data += fast_chars;
ascii_len -= fast_chars;
bin_data += (fast_chars / 4) * 3;
}
}
Expand All @@ -536,8 +549,8 @@
int quad_pos = 0;
unsigned char leftchar = 0;
int pads = 0;
for (; i < ascii_len; i++) {
unsigned char this_ch = ascii_data[i];
for (; ascii_len; ascii_data++, ascii_len--) {
unsigned char this_ch = *ascii_data;

/* Check for pad sequences and ignore
** the invalid ones.
Expand All @@ -549,7 +562,7 @@
if (quad_pos == 0) {
state = get_binascii_state(module);
if (state) {
PyErr_SetString(state->Error, (i == 0)
PyErr_SetString(state->Error, (ascii_data == data->buf)
? "Leading padding not allowed"
: "Excess padding not allowed");
}
Expand Down Expand Up @@ -580,7 +593,7 @@

unsigned char v = table_a2b_base64[this_ch];
if (v >= 64) {
if (strict_mode && !ignorechar(this_ch, ignorechars)) {
if (strict_mode && !ignorechar(this_ch, ignorechars, ignorecache)) {
state = get_binascii_state(module);
if (state) {
PyErr_SetString(state->Error, "Only base64 data is allowed");
Expand Down Expand Up @@ -621,7 +634,9 @@
quad_pos = 0;
*bin_data++ = (leftchar << 6) | (v);
leftchar = 0;
break;
ascii_data++;
ascii_len--;
goto fastpath;
}
}

Expand Down
Loading