From 7aae38c4c2356398986642ca0fe293859be771ce Mon Sep 17 00:00:00 2001 From: Yury Selivanov Date: Wed, 17 Sep 2025 17:47:04 +0100 Subject: [PATCH 01/16] Issue #gh-139122: Reimplement base UUID type, uuid4(), and uuid7() in C The C implementation considerably boosts the performance of the key UUID operations: ------------------------------------ Operation Speedup ------------------------------------ uuid4() generation 15.01x uuid7() generation 29.64x UUID from string 6.76x UUID from bytes 5.16x str(uuid) conversion 6.66x ------------------------------------ Summary of changes: * The UUID type is reimplemented in C in its entirety. * The pure-Python is kept around and is used of the C implementation isn't available for some reason. * Both implementations are tested extensively; additional tests are added to ensure that the C implementation of the type follows the pure Python implementation fully. * The Python implementation stores UUID values as int objects. The C implementation stores them as `uint8_t[16]` array. * The C implementation has faster hash() implementation but also caches the computed hash value to speedup cases when UUIDs are used as set/dict keys. * The C implementation has a freelist to make new UUID object instantiation as fast as possible. * uuid4() and uuid7() are now implmented in C. The most performance boost (10x) comes from overfetching entropy to decrease the number of _PyOS_URandom() calls. On its own it's a safe optimization with the edge case that Unix fork needs to be explicitly handled. We do that by comparing the current PID to the PID of when the random buffer was populated. * Portions of code are coming from my implementation of faster UUID in gel-python [1]. I did use AI during the development, but basically had to rewrite the code it generated to be more idiomatic and efficient. * The benchmark can be found here [2]. * This PR makes Python UUID operations as fast as they are in NodeJS and Bun runtimes. [1] https://github.com/MagicStack/py-pgproto/blob/b8109fb311a59f30f9947567a13508da9a776564/uuid.pyx [2] https://gist.github.com/1st1/f03e816f34a61e4d46c78ff98baf4818 --- Include/internal/pycore_pylifecycle.h | 4 +- Lib/test/test_uuid.py | 120 +- Lib/uuid.py | 53 +- ...-09-18-14-13-00.gh-issue-139122.m3lp66.rst | 57 + Modules/_uuidmodule.c | 1601 ++++++++++++++++- Modules/clinic/_uuidmodule.c.h | 255 +++ 6 files changed, 2052 insertions(+), 38 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-09-18-14-13-00.gh-issue-139122.m3lp66.rst create mode 100644 Modules/clinic/_uuidmodule.c.h diff --git a/Include/internal/pycore_pylifecycle.h b/Include/internal/pycore_pylifecycle.h index 8faf7a4d403f84..95e49ec316e7aa 100644 --- a/Include/internal/pycore_pylifecycle.h +++ b/Include/internal/pycore_pylifecycle.h @@ -98,8 +98,8 @@ extern const char* _Py_gitversion(void); // Export for '_asyncio' shared extension PyAPI_FUNC(int) _Py_IsInterpreterFinalizing(PyInterpreterState *interp); -/* Random */ -extern int _PyOS_URandom(void *buffer, Py_ssize_t size); +// Export for '_uuid' shared extension +PyAPI_FUNC(int) _PyOS_URandom(void *buffer, Py_ssize_t size); // Export for '_random' shared extension PyAPI_FUNC(int) _PyOS_URandomNonblock(void *buffer, Py_ssize_t size); diff --git a/Lib/test/test_uuid.py b/Lib/test/test_uuid.py index 33045a78721aac..a315268c3d3cb7 100755 --- a/Lib/test/test_uuid.py +++ b/Lib/test/test_uuid.py @@ -35,6 +35,7 @@ def get_command_stdout(command, args): class BaseTestUUID: uuid = None + is_c_uuid = False def test_nil_uuid(self): nil_uuid = self.uuid.NIL @@ -282,14 +283,16 @@ def test_exceptions(self): badvalue(lambda: self.uuid.UUID('123456781234567812345678z2345678')) # Badly formed bytes. - badvalue(lambda: self.uuid.UUID(bytes='abc')) - badvalue(lambda: self.uuid.UUID(bytes='\0'*15)) - badvalue(lambda: self.uuid.UUID(bytes='\0'*17)) + badtype(lambda: self.uuid.UUID(bytes='abc')) + badvalue(lambda: self.uuid.UUID(bytes=b'abc')) + badvalue(lambda: self.uuid.UUID(bytes=b'\0'*15)) + badvalue(lambda: self.uuid.UUID(bytes=b'\0'*17)) # Badly formed bytes_le. - badvalue(lambda: self.uuid.UUID(bytes_le='abc')) - badvalue(lambda: self.uuid.UUID(bytes_le='\0'*15)) - badvalue(lambda: self.uuid.UUID(bytes_le='\0'*17)) + badtype(lambda: self.uuid.UUID(bytes_le='abc')) + badvalue(lambda: self.uuid.UUID(bytes_le=b'abc')) + badvalue(lambda: self.uuid.UUID(bytes_le=b'\0'*15)) + badvalue(lambda: self.uuid.UUID(bytes_le=b'\0'*17)) # Badly formed fields. badvalue(lambda: self.uuid.UUID(fields=(1,))) @@ -877,12 +880,18 @@ def test_uuid6_test_vectors(self): equal((u.int >> 80) & 0xffff, 0x232a) equal((u.int >> 96) & 0xffff_ffff, 0x1ec9_414c) - def test_uuid7(self): + def test_uuid7_functional(self): equal = self.assertEqual u = self.uuid.uuid7() equal(u.variant, self.uuid.RFC_4122) equal(u.version, 7) + def test_uuid7_mock(self): + if self.is_c_uuid: + self.skipTest("C implementation of uuid7 cannot be tested with mocks") + + equal = self.assertEqual + # 1 Jan 2023 12:34:56.123_456_789 timestamp_ns = 1672533296_123_456_789 # ns precision timestamp_ms, _ = divmod(timestamp_ns, 1_000_000) @@ -940,7 +949,15 @@ def test_uuid7_uniqueness(self): versions = {u.version for u in uuids} self.assertSetEqual(versions, {7}) - def test_uuid7_monotonicity(self): + def test_uuid7_monotonicity_functional(self): + equal = self.assertEqual + us = [self.uuid.uuid7() for _ in range(10_000)] + equal(us, sorted(us)) + + def test_uuid7_monotonicity_mock(self): + if self.is_c_uuid: + self.skipTest("C implementation of uuid7 cannot be tested with mocks") + equal = self.assertEqual us = [self.uuid.uuid7() for _ in range(10_000)] @@ -1003,7 +1020,10 @@ def test_uuid7_monotonicity(self): self.assertLess(u1, u2) - def test_uuid7_timestamp_backwards(self): + def test_uuid7_timestamp_backwards_mock(self): + if self.is_c_uuid: + self.skipTest("C implementation of uuid7 cannot be tested with mocks") + equal = self.assertEqual # 1 Jan 2023 12:34:56.123_456_789 timestamp_ns = 1672533296_123_456_789 # ns precision @@ -1043,7 +1063,10 @@ def test_uuid7_timestamp_backwards(self): equal((u.int >> 32) & 0x3fff_ffff, counter_lo + 1) equal(u.int & 0xffff_ffff, tail) - def test_uuid7_overflow_counter(self): + def test_uuid7_overflow_counter_mock(self): + if self.is_c_uuid: + self.skipTest("C implementation of uuid7 cannot be tested with mocks") + equal = self.assertEqual # 1 Jan 2023 12:34:56.123_456_789 timestamp_ns = 1672533296_123_456_789 # ns precision @@ -1149,6 +1172,7 @@ def test_uuid_weakref(self): class CommandLineTestCases: uuid = None # to be defined in subclasses + is_c_uuid = False def do_test_standalone_uuid(self, version): stdout = io.StringIO() @@ -1257,6 +1281,7 @@ class TestUUIDWithoutExtModule(CommandLineTestCases, BaseTestUUID, unittest.Test @unittest.skipUnless(c_uuid, 'requires the C _uuid module') class TestUUIDWithExtModule(CommandLineTestCases, BaseTestUUID, unittest.TestCase): uuid = c_uuid + is_c_uuid = True def check_has_stable_libuuid_extractable_node(self): if not self.uuid._has_stable_extractable_node: @@ -1287,6 +1312,7 @@ def test_windows_getnode_from_libuuid(self): class BaseTestInternals: _uuid = py_uuid + is_c_uuid = False def check_parse_mac(self, aix): if not aix: @@ -1480,6 +1506,7 @@ class TestInternalsWithoutExtModule(BaseTestInternals, unittest.TestCase): @unittest.skipUnless(c_uuid, 'requires the C _uuid module') class TestInternalsWithExtModule(BaseTestInternals, unittest.TestCase): uuid = c_uuid + is_c_uuid = True @unittest.skipUnless(os.name == 'posix', 'requires Posix') def test_unix_getnode(self): @@ -1497,5 +1524,78 @@ def test_windll_getnode(self): self.check_node(node) +@unittest.skipUnless(c_uuid, "requires the C _uuid module") +class TestCImplementationCompat(unittest.TestCase): + def test_compatibility(self): + import uuid + + PU = uuid._py_UUID + CU = uuid._c_UUID + N = 1000 + + uuids = [ + "00000000-0000-0000-0000-000000000000", + "ffffffff-ffff-ffff-ffff-ffffffffffff", + "c0bec4fd-e4e3-050c-a362-da3f734ffd56", # regression + *(str(uuid.uuid4()) for _ in range(N)), + *(str(uuid.uuid7()) for _ in range(N)), + *(str(uuid.uuid1()) for _ in range(N)), + *(str(uuid.UUID(bytes=os.urandom(16))) for _ in range(N)), + ] + + def full_test(p, u): + self.assertEqual(p, u) + self.assertEqual(p.hex, u.hex) + self.assertEqual(p.int, u.int) + self.assertEqual(p.variant, u.variant) + self.assertEqual(p.version, u.version) + self.assertEqual(p.is_safe, u.is_safe) + self.assertEqual(p.bytes, u.bytes) + self.assertEqual(p.bytes_le, u.bytes_le) + self.assertEqual(p.fields, u.fields) + self.assertEqual(p.time_low, u.time_low) + self.assertEqual(p.time_mid, u.time_mid) + self.assertEqual(p.time_hi_version, u.time_hi_version) + self.assertEqual(p.clock_seq_hi_variant, u.clock_seq_hi_variant) + self.assertEqual(p.clock_seq_low, u.clock_seq_low) + self.assertEqual(p.node, u.node) + + all_ps = set() + all_us = set() + for uuid_str in uuids: + with self.subTest(uuid=uuid_str): + p = PU(uuid_str) + u = CU(uuid_str) + full_test(p, u) + + u2 = CU(bytes_le=p.bytes_le) + full_test(p, u2) + + u3 = CU(fields=p.fields) + full_test(p, u3) + + u4 = CU(int=p.int) + full_test(p, u4) + + u5 = CU( + hex=p.hex, + is_safe=uuid.SafeUUID.safe, + ) + full_test( + PU( + uuid_str, + is_safe=uuid.SafeUUID.safe, + ), + u5, + ) + + all_ps.add(p) + all_us.add(u) + + self.assertEqual(len(all_ps), len(all_us)) + self.assertEqual(len(all_ps), len(uuids)) + + + if __name__ == '__main__': unittest.main() diff --git a/Lib/uuid.py b/Lib/uuid.py index c0150a59d7cb9a..03206dd28faf61 100644 --- a/Lib/uuid.py +++ b/Lib/uuid.py @@ -99,6 +99,7 @@ class SafeUUID: _UINT_128_MAX = (1 << 128) - 1 + # 128-bit mask to clear the variant and version bits of a UUID integral value _RFC_4122_CLEARFLAGS_MASK = ~((0xf000 << 64) | (0xc000 << 48)) # RFC 4122 variant bits and version bits to activate on a UUID integral value. @@ -111,6 +112,19 @@ class SafeUUID: _RFC_4122_VERSION_8_FLAGS = ((8 << 76) | (0x8000 << 48)) +# Import optional C extension at toplevel, to help disabling it when testing +try: + import _uuid + _generate_time_safe = getattr(_uuid, "generate_time_safe", None) + _has_stable_extractable_node = _uuid.has_stable_extractable_node + _UuidCreate = getattr(_uuid, "UuidCreate", None) +except ImportError: + _uuid = None + _generate_time_safe = None + _has_stable_extractable_node = False + _UuidCreate = None + + class UUID: """Instances of the UUID class represent UUIDs as specified in RFC 4122. UUID objects are immutable, hashable, and usable as dictionary keys. @@ -219,6 +233,10 @@ def __init__(self, hex=None, bytes=None, bytes_le=None, fields=None, raise ValueError('badly formed hexadecimal UUID string') int = int_(hex, 16) elif bytes_le is not None: + if not isinstance(bytes_le, bytes_): + raise TypeError( + f'a bytes-like object is required, not {type(bytes_le).__name__!r}' + ) if len(bytes_le) != 16: raise ValueError('bytes_le is not a 16-char string') assert isinstance(bytes_le, bytes_), repr(bytes_le) @@ -226,6 +244,10 @@ def __init__(self, hex=None, bytes=None, bytes_le=None, fields=None, bytes_le[8-1:6-1:-1] + bytes_le[8:]) int = int_.from_bytes(bytes) # big endian elif bytes is not None: + if not isinstance(bytes, bytes_): + raise TypeError( + f'a bytes-like object is required, not {type(bytes).__name__!r}' + ) if len(bytes) != 16: raise ValueError('bytes is not a 16-char string') assert isinstance(bytes, bytes_), repr(bytes) @@ -234,7 +256,7 @@ def __init__(self, hex=None, bytes=None, bytes_le=None, fields=None, if len(fields) != 6: raise ValueError('fields is not a 6-tuple') (time_low, time_mid, time_hi_version, - clock_seq_hi_variant, clock_seq_low, node) = fields + clock_seq_hi_variant, clock_seq_low, node) = fields if not 0 <= time_low < (1 << 32): raise ValueError('field 1 out of range (need a 32-bit value)') if not 0 <= time_mid < (1 << 16): @@ -249,7 +271,7 @@ def __init__(self, hex=None, bytes=None, bytes_le=None, fields=None, raise ValueError('field 6 out of range (need a 48-bit value)') clock_seq = (clock_seq_hi_variant << 8) | clock_seq_low int = ((time_low << 96) | (time_mid << 80) | - (time_hi_version << 64) | (clock_seq << 48) | node) + (time_hi_version << 64) | (clock_seq << 48) | node) if not 0 <= int <= _UINT_128_MAX: raise ValueError('int is out of range (need a 128-bit value)') if version is not None: @@ -629,19 +651,6 @@ def _netstat_getnode(): return _find_mac_under_heading('netstat', '-ian', b'Address') -# Import optional C extension at toplevel, to help disabling it when testing -try: - import _uuid - _generate_time_safe = getattr(_uuid, "generate_time_safe", None) - _has_stable_extractable_node = _uuid.has_stable_extractable_node - _UuidCreate = getattr(_uuid, "UuidCreate", None) -except ImportError: - _uuid = None - _generate_time_safe = None - _has_stable_extractable_node = False - _UuidCreate = None - - def _unix_getnode(): """Get the hardware address on Unix using the _uuid extension module.""" if _generate_time_safe and _has_stable_extractable_node: @@ -932,6 +941,20 @@ def uuid8(a=None, b=None, c=None): return UUID._from_int(int_uuid_8) +_py_uuid4 = uuid4 +_py_uuid7 = uuid7 +_py_UUID = UUID +try: + from _uuid import UUID, uuid4, uuid7 +except ImportError: + _c_UUID = None + _c_uuid4 = None + _c_uuid7 = None +else: + _c_UUID = UUID + _c_uuid4 = uuid4 + _c_uuid7 = uuid7 + def main(): """Run the uuid command line interface.""" uuid_funcs = { diff --git a/Misc/NEWS.d/next/Library/2025-09-18-14-13-00.gh-issue-139122.m3lp66.rst b/Misc/NEWS.d/next/Library/2025-09-18-14-13-00.gh-issue-139122.m3lp66.rst new file mode 100644 index 00000000000000..72eb12fecf9853 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-09-18-14-13-00.gh-issue-139122.m3lp66.rst @@ -0,0 +1,57 @@ +Reimplement base UUID type, uuid4(), and uuid7() in C + +The C implementation considerably boosts the performance of the key UUID +operations: + +------------------------------------ +Operation Speedup +------------------------------------ +uuid4() generation 15.01x +uuid7() generation 29.64x +UUID from string 6.76x +UUID from bytes 5.16x +str(uuid) conversion 6.66x +------------------------------------ + +Summary of changes: + +* The UUID type is reimplemented in C in its entirety. + +* The pure-Python is kept around and is used of the C implementation + isn't available for some reason. + +* Both implementations are tested extensively; additional tests are + added to ensure that the C implementation of the type follows the pure + Python implementation fully. + +* The Python implementation stores UUID values as int objects. The C + implementation stores them as `uint8_t[16]` array. + +* The C implementation has faster hash() implementation but also caches + the computed hash value to speedup cases when UUIDs are used as + set/dict keys. + +* The C implementation has a freelist to make new UUID object + instantiation as fast as possible. + +* uuid4() and uuid7() are now implmented in C. The most performance + boost (10x) comes from overfetching entropy to decrease the number of + _PyOS_URandom() calls. On its own it's a safe optimization with the + edge case that Unix fork needs to be explicitly handled. We do that by + comparing the current PID to the PID of when the random buffer was + populated. + +* Portions of code are coming from my implementation of faster UUID + in gel-python [1]. I did use AI during the development, but basically + had to rewrite the code it generated to be more idiomatic and + efficient. + +* The benchmark can be found here [2]. + +* This PR makes Python UUID operations as fast as they are in NodeJS and + Bun runtimes. + +[1] +https://github.com/MagicStack/py-pgproto/blob/b8109fb311a59f30f9947567a13508da9a776564/uuid.pyx + +[2] https://gist.github.com/1st1/f03e816f34a61e4d46c78ff98baf4818 diff --git a/Modules/_uuidmodule.c b/Modules/_uuidmodule.c index c31a7e8fea5608..b7bdb02381db64 100644 --- a/Modules/_uuidmodule.c +++ b/Modules/_uuidmodule.c @@ -1,15 +1,18 @@ -/* - * Python UUID module that wraps libuuid or Windows rpcrt4.dll. - * DCE compatible Universally Unique Identifier library. - */ +// UUID accelerator base type. -// Need limited C API version 3.13 for Py_mod_gil -#include "pyconfig.h" // Py_GIL_DISABLED -#ifndef Py_GIL_DISABLED -# define Py_LIMITED_API 0x030d0000 +#ifndef Py_BUILD_CORE_BUILTIN +# define Py_BUILD_CORE_MODULE 1 #endif +#include "pyconfig.h" // Py_GIL_DISABLED #include "Python.h" +#include // for strncasecmp +#include "structmember.h" // for PyMemberDef + +#include "pycore_long.h" // _PyLong_FromByteArray, _PyLong_AsByteArray +#include "pycore_pylifecycle.h" // _PyOS_URandom() +#include "pycore_time.h" // PyTime_Time + #if defined(HAVE_UUID_H) // AIX, FreeBSD, libuuid with pkgconf #include @@ -18,12 +21,29 @@ #include #endif +#ifdef HAVE_UNISTD_H +# include // getpid() +#endif +#ifdef HAVE_PROCESS_H +# include // getpid() +#endif +#ifdef MS_WINDOWS +# include // GetCurrentProcessId() +#endif + #ifdef MS_WINDOWS #include #endif #ifndef MS_WINDOWS + +/*[clinic input] +module _uuid +[clinic start generated code]*/ +/*[clinic end generated code: output=da39a3ee5e6b4b0d input=7cbed123a45a3859]*/ + + static PyObject * py_uuid_generate_time_safe(PyObject *Py_UNUSED(context), PyObject *Py_UNUSED(ignored)) @@ -91,13 +111,1493 @@ py_windows_has_stable_node(void) #endif /* MS_WINDOWS */ +typedef struct uuidobject { + PyObject_HEAD + uint8_t bytes[16]; + Py_hash_t cached_hash; + PyObject *is_safe; + PyObject *weakreflist; +} uuidobject; + + +// UUID Structure per RFC 9562: +// +// A UUID is 128 bits (16 bytes) represented as: +// +// String: xx xx xx xx - xx xx - Mx xx - Nx xx - xx xx xx xx xx xx +// Byte pos: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 +// ^^^^^^^^^^^ ^^^^^ ^^^^^ ^^^^^ ^^^^^^^^^^^^^^^^^ +// time_low mid hi seq node +// +// Byte Layout (big-endian): +// +// Bytes 0-3: time_low (32 bits) +// Bytes 4-5: time_mid (16 bits) +// Bytes 6-7: time_hi_and_version (16 bits) +// Bytes 8-9: clock_seq_and_variant (16 bits) +// Bytes 10-15: node (48 bits) +// +// Version field is located in byte 6; most significant 4 bits: +// +// Variant field is located in byte 8; most significant variable bits: +// 0xxx: Reserved for NCS compatibility +// 10xx: RFC 4122/9562 (standard) +// 110x: Reserved for Microsoft compatibility +// 111x: Reserved for future definition + +#define RANDOM_BUF_SIZE 256 +#define MAX_FREE_LIST_SIZE 32 + +/* State of the _uuid module */ +typedef struct { + PyTypeObject *UuidType; + + PyObject *safe_uuid; + PyObject *safe_uuid_safe; + PyObject *safe_uuid_unsafe; + PyObject *safe_uuid_unknown; + + PyObject *uint128_max; + + PyObject *reserved_ncs; + PyObject *rfc_4122; + PyObject *reserved_microsoft; + PyObject *reserved_future; + + // UUID v7 state + uint64_t last_timestamp_v7; + uint64_t last_counter_v7; + + // We overfetch entropy to speed up successive uuid generations; + // this enables 10x peformance boost. + uint8_t random_buf[RANDOM_BUF_SIZE]; + uint64_t random_idx; + uint64_t random_last_pid; + + // A freelist for uuid objects -- 15-20% performance boost. + uuidobject *freelist; + uint64_t freelist_size; +} uuid_state; + +#include "clinic/_uuidmodule.c.h" + +/*[clinic input] +class uuid.UUID "uuidobject *" "&UuidType" +[clinic start generated code]*/ +/*[clinic end generated code: output=da39a3ee5e6b4b0d input=84ae6e2089cffd3f]*/ + +// Forward declarations +static int from_hex(uuidobject *self, PyObject *hex); +static int from_bytes_le(uuidobject *self, Py_buffer *bytes_le); +static int from_int(uuidobject *self, PyObject *int_value, int validate); +static int from_fields(uuidobject *self, PyObject *fields); + +static uint64_t +uuid_getpid(void) { + #if !defined(MS_WINDOWS) || defined(MS_WINDOWS_DESKTOP) || defined(MS_WINDOWS_SYSTEM) + return (uint64_t)getpid(); +#else + return (uint64_t)GetCurrentProcessId(); +#endif +} + +static inline uuid_state * +get_uuid_state(PyObject *mod) +{ + uuid_state *state = PyModule_GetState(mod); + assert(state != NULL); + return state; +} + +static inline uuid_state * +get_uuid_state_by_cls(PyTypeObject *cls) +{ + uuid_state *state = (uuid_state *)PyType_GetModuleState(cls); + assert(state != NULL); + return state; +} + +// Forward declaration +static PyObject *uuid_from_bytes_array(PyTypeObject *type, uint8_t bytes[16]); + +static int +gen_random(uuid_state *state, uint8_t *bytes, Py_ssize_t size) +{ + // Overfetching & caching entropy improves the performance 10x. + // There's a precedent with NodeJS doing exact same thing for + // improving performance of their UUID implementation. + + // IMPORTANT: callers should have a critical section or a lock + // around this function. + + uint64_t pid = uuid_getpid(); + if (pid != state->random_last_pid) { + // The main concern to take core of with caching entropy is handling + // fork -- we don't want the child process to share any entropy with + // us. Luckily getpid() is fast. + state->random_last_pid = pid; + state->random_idx = RANDOM_BUF_SIZE; + } + + if (state->random_idx + size <= RANDOM_BUF_SIZE) { + memcpy(bytes, state->random_buf + state->random_idx, size); + state->random_idx += size; + } + else { + // Pure Python implementation uses os.urandom() which + // wraps _PyOS_URandom + if (_PyOS_URandom(state->random_buf, RANDOM_BUF_SIZE) < 0) { + return -1; + } + memcpy(bytes, state->random_buf, size); + state->random_idx = size; + } + return 0; +} + +/*[clinic input] +@critical_section +_uuid.uuid4 + +Generate a random UUID (version 4). +[clinic start generated code]*/ + +static PyObject * +_uuid_uuid4_impl(PyObject *module) +/*[clinic end generated code: output=b835af30d9d6efc5 input=9cfb3a3b71c25cdf]*/ +{ + uuid_state *state = get_uuid_state(module); + uint8_t bytes[16]; + + if (gen_random(state, bytes, 16) < 0) { + return NULL; + } + + // Set version (4) and variant + bytes[6] = (bytes[6] & 0x0f) | 0x40; + bytes[8] = (bytes[8] & 0x3f) | 0x80; + + return uuid_from_bytes_array(state->UuidType, bytes); +} + +static inline int +uuid7_get_counter_and_tail(uuid_state *state, uint64_t *counter, uint32_t *tail) +{ + uint8_t rand_bytes[10]; + if (gen_random(state, rand_bytes, 10) < 0) { + return -1; + } + + uint16_t high = ((uint16_t)rand_bytes[0] << 8) | rand_bytes[1]; + uint64_t low = ((uint64_t)rand_bytes[2] << 56) | + ((uint64_t)rand_bytes[3] << 48) | + ((uint64_t)rand_bytes[4] << 40) | + ((uint64_t)rand_bytes[5] << 32) | + ((uint64_t)rand_bytes[6] << 24) | + ((uint64_t)rand_bytes[7] << 16) | + ((uint64_t)rand_bytes[8] << 8) | + ((uint64_t)rand_bytes[9]); + + *counter = (((uint64_t)(high & 0x1FF) << 32) | (low >> 32)) & 0x1FFFFFFFFFF; + *tail = (uint32_t)low; + return 0; +} + + +// There's code that modifies the module state (emulating global variables +// used in the pure Python implementation.) So we're slapping a critical +// section here to make it easier to reason about the C port of this code. +/*[clinic input] +@critical_section +_uuid.uuid7 + +Generate a UUID from a Unix timestamp in milliseconds and random bits. + +UUIDv7 objects feature monotonicity within a millisecond. +[clinic start generated code]*/ + +static PyObject * +_uuid_uuid7_impl(PyObject *module) +/*[clinic end generated code: output=f301accc11162c91 input=88514d61dc785108]*/ +{ + uuid_state *state = get_uuid_state(module); + uint8_t bytes[16]; + uint64_t timestamp_ms, counter; + uint32_t tail; + + PyTime_t pytime; + if (PyTime_Time(&pytime) < 0) { + return NULL; + } + timestamp_ms = (uint64_t)(pytime / 1000000); + + if (state->last_timestamp_v7 == 0 || timestamp_ms > state->last_timestamp_v7) { + if (uuid7_get_counter_and_tail(state, &counter, &tail) < 0) { + return NULL; + } + } else { + if (timestamp_ms < state->last_timestamp_v7) { + timestamp_ms = state->last_timestamp_v7 + 1; + } + // advance the 42-bit counter + counter = state->last_counter_v7 + 1; + if (counter > 0x3FFFFFFFFFF) { + // advance the 48-bit timestamp + timestamp_ms += 1; + if (uuid7_get_counter_and_tail(state, &counter, &tail) < 0) { + return NULL; + } + } else { + // This is the common fast path, we only need 4 bytes of entropy + // 32-bit random data + if (gen_random(state, (uint8_t *)&tail, 4) < 0) { + return NULL; + } + } + } + + timestamp_ms &= 0xFFFFFFFFFFFF; + bytes[0] = (timestamp_ms >> 40); + bytes[1] = (timestamp_ms >> 32); + bytes[2] = (timestamp_ms >> 24); + bytes[3] = (timestamp_ms >> 16); + bytes[4] = (timestamp_ms >> 8); + bytes[5] = timestamp_ms; + + uint16_t counter_hi = (counter >> 30) & 0x0FFF; + bytes[6] = 0x70 | ((counter_hi >> 8)); // Version 7 = 0111 + bytes[7] = counter_hi; + + uint16_t counter_mid = (counter >> 16) & 0x3FFF; + bytes[8] = 0x80 | (counter_mid >> 8); // Variant = 10 + bytes[9] = counter_mid; + + uint16_t counter_lo = counter & 0xFFFF; + bytes[10] = counter_lo >> 8; + bytes[11] = counter_lo; + + bytes[12] = tail >> 24; + bytes[13] = tail >> 16; + bytes[14] = tail >> 8; + bytes[15] = tail; + + state->last_timestamp_v7 = timestamp_ms; + state->last_counter_v7 = counter; + + return uuid_from_bytes_array(state->UuidType, bytes); +} + +/*[clinic input] +uuid.UUID.__init__ + + hex: 'U' = NULL + bytes: 'y*' = None + bytes_le: 'y*' = None + fields: object = NULL + int: object = NULL + version: object = NULL + * + is_safe: object = NULL + +UUID is a fast base implementation type for uuid.UUID. +[clinic start generated code]*/ + +static int +_uuid_UUID___init___impl(uuidobject *self, PyObject *hex, Py_buffer *bytes, + Py_buffer *bytes_le, PyObject *fields, + PyObject *int_value, PyObject *version, + PyObject *is_safe) +/*[clinic end generated code: output=93a6881c8f79bf9b input=b9c79672fbd76a99]*/ + +{ + uuid_state *state = get_uuid_state_by_cls(Py_TYPE(self)); + + int passed = 0; + if (hex != NULL) passed++; + if (bytes->obj != NULL) passed++; + if (bytes_le->obj != NULL) passed++; + if (fields != NULL) passed++; + if (int_value != NULL) passed++; + if (passed != 1) { + PyErr_SetString( + PyExc_TypeError, + "one of the hex, bytes, bytes_le, fields, or int arguments must be given" + ); + return -1; + } + + if (hex != NULL) { + if (from_hex(self, hex) < 0) { + return -1; + } + } + else if (bytes->obj != NULL) { + if (bytes->len != 16) { + PyErr_SetString( + PyExc_ValueError, + "bytes is not a 16-char string" + ); + return -1; + } + memcpy(self->bytes, bytes->buf, 16); + } + else if (bytes_le->obj != NULL) { + if (from_bytes_le(self, bytes_le) < 0) { + return -1; + } + } + else if (fields != NULL) { + if (from_fields(self, fields) < 0) { + return -1; + } + } + else if (int_value != NULL) { + if (from_int(self, int_value, 1) < 0) { + return -1; + } + } + else { + Py_UNREACHABLE(); + } + + if (version != NULL && version != Py_None) { + long version_num = PyLong_AsLong(version); + if (version_num == -1 && PyErr_Occurred()) { + return -1; + } + if (version_num < 1 || version_num > 8) { + PyErr_SetString(PyExc_ValueError, "illegal version number"); + return -1; + } + + // Clear variant bits (keep only lower 6 bits of byte 8) + self->bytes[8] &= 0x3f; // 0011 1111 + // Clear version bits (keep only lower 4 bits of byte 6) + self->bytes[6] &= 0x0f; // 0000 1111 + // Set the variant to RFC 4122/9562 (binary 10xx xxxx) + self->bytes[8] |= 0x80; // 1000 0000 + // Set the version number (upper 4 bits of byte 6) + self->bytes[6] |= (version_num << 4); + } + + if (is_safe != NULL) { + // Validate by calling SafeUUID(is_safe) to ensure it's a valid enum member + PyObject *validated = PyObject_CallOneArg(state->safe_uuid, is_safe); + if (validated == NULL) { + return -1; + } + Py_CLEAR(self->is_safe); + self->is_safe = validated; // reuse reference + } + + return 0; +} + + +static const uint8_t INT_TO_HEX[] = "0123456789abcdef"; + +// Lookup table for hex character to value conversion +// -1 for invalid characters +static const int8_t HEX_TO_INT[256] = { + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, + -1,-1, 0,1,2,3,4,5,6,7,8,9,-1,-1,-1,-1,-1,-1,-1,10,11,12,13,14,15,-1, + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, + -1,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 +}; + +static inline void +byte_to_hex(uint8_t byte, char *hex) +{ + hex[0] = INT_TO_HEX[(byte >> 4) & 0xf]; + hex[1] = INT_TO_HEX[byte & 0xf]; +} + +static int +from_hex(uuidobject *self, PyObject *hex) +{ + Py_ssize_t size; + const char *start = PyUnicode_AsUTF8AndSize(hex, &size); + if (start == NULL) { + return -1; + } + + uint8_t ch; + uint8_t acc, acc_set; + int8_t part; + int i, j; + + // Reimplement `hex = hex.replace('urn:', '').replace('uuid:', '')` + if (size > 0 && start[0] == 'u') { + if (size >= 9 && strncmp(start, "urn:uuid:", 9) == 0) { + start += 9; + size -= 9; + } + else if (size >= 4 && strncmp(start, "urn:", 4) == 0) { + start += 4; + size -= 4; + } + else if (size >= 5 && strncmp(start, "uuid:", 5) == 0) { + start += 5; + size -= 5; + } + } + + // Reimplement `hex = hex.strip('{}')` + if (size >= 1 && start[0] == '{') { + start++; + size -= 1; + } + if (size >= 1 && start[size - 1] == '}') { + size -= 1; + } + + if (size < 32) { + PyErr_SetString( + PyExc_ValueError, + "badly formed hexadecimal UUID string" + ); + return -1; + } + + acc_set = 0; + j = 0; + + for (i = 0; i < size; i++) { + ch = (uint8_t)start[i]; + + if (ch == '-') { + continue; + } + + part = HEX_TO_INT[ch]; + if (part == -1) { + PyErr_SetString( + PyExc_ValueError, + "badly formed hexadecimal UUID string" + ); + return -1; + } + + if (acc_set) { + acc |= (uint8_t)part; + self->bytes[j] = acc; + acc_set = 0; + j++; + } + else { + acc = (uint8_t)part << 4; + acc_set = 1; + } + + if (j > 16 || (j == 16 && acc_set)) { + PyErr_Format(PyExc_ValueError, + "invalid UUID '%s': decodes to more than 16 bytes", + hex); + return -1; + } + } + + if (j != 16) { + PyErr_Format(PyExc_ValueError, + "invalid UUID '%s': decodes to less than 16 bytes", + hex); + return -1; + } + + return 0; +} + +static int +from_bytes_le(uuidobject *self, Py_buffer *bytes_le) +{ + if (bytes_le->len != 16) { + PyErr_SetString(PyExc_ValueError, + "bytes_le is not a 16-char string"); + return -1; + } + + // Convert from little-endian to big-endian UUID format + // UUID fields in little-endian order need to be byte-swapped: + // - time_low (4 bytes) + // - time_mid (2 bytes) + // - time_hi_version (2 bytes) + // - clock_seq_hi_variant (1 byte) - no swap needed + // - clock_seq_low (1 byte) - no swap needed + // - node (6 bytes) - no swap needed + + unsigned char *src = (unsigned char *)bytes_le->buf; + unsigned char *dst = (unsigned char *)self->bytes; + + // Swap time_low (bytes 0-3) + dst[0] = src[3]; + dst[1] = src[2]; + dst[2] = src[1]; + dst[3] = src[0]; + + // Swap time_mid (bytes 4-5) + dst[4] = src[5]; + dst[5] = src[4]; + + // Swap time_hi_version (bytes 6-7) + dst[6] = src[7]; + dst[7] = src[6]; + + // Copy clock_seq and node as-is (bytes 8-15) + memcpy(dst + 8, src + 8, 8); + + return 0; +} + +static int +validate_int(uuid_state *state, PyObject *int_value) +{ + if (!PyLong_Check(int_value)) { + PyErr_SetString(PyExc_TypeError, "value must be an integer"); + return -1; + } + + int cmp = PyLong_IsNegative(int_value); + if (cmp < 0) { + return -1; + } + if (cmp == 1) { + PyErr_SetString(PyExc_ValueError, + "int is out of range (need a 128-bit value)"); + return -1; + } + + // Check if it's greater than max (2^128 - 1) + cmp = PyObject_RichCompareBool(int_value, state->uint128_max, Py_GT); + if (cmp < 0) { + return -1; + } + if (cmp == 1) { + PyErr_SetString(PyExc_ValueError, + "int is out of range (need a 128-bit value)"); + return -1; + } + + return 0; +} + +static int +from_int(uuidobject *self, PyObject *int_value, int validate) +{ + // Convert a 128-bit integer to UUID bytes (big-endian) + + uuid_state *state = get_uuid_state_by_cls(Py_TYPE(self)); + + if (validate && validate_int(state, int_value) < 0) { + return -1; + } + + if (_PyLong_AsByteArray( + (PyLongObject *)int_value, + (unsigned char *)self->bytes, + 16, + 0, // big-endian + 0, // unsigned + 1 // with_exceptions + ) < 0) + { + return -1; + } + + return 0; +} + +static int +extract_field( + PyObject *fields, + int field_num, + uint64_t max_value, + const char *error_msg, + uint64_t *result +) { + PyObject *field = PySequence_GetItem(fields, field_num); + if (field == NULL) { + return -1; + } + + if (!PyLong_Check(field)) { + PyErr_Format(PyExc_TypeError, "field %d must be an integer", field_num); + goto fail; + } + + int overflow; + uint64_t value = PyLong_AsLongLongAndOverflow(field, &overflow); + if (overflow || (value == (uint64_t)-1 && PyErr_Occurred())) { + PyErr_Format(PyExc_ValueError, "%s", error_msg); + goto fail; + } + + if (value > max_value) { + PyErr_Format(PyExc_ValueError, "%s", error_msg); + goto fail; + } + + *result = value; + Py_DECREF(field); + return 0; + +fail: + Py_DECREF(field); + return -1; +} + +static int +from_fields(uuidobject *self, PyObject *fields) +{ + // Validate that fields is a sequence with exactly 6 elements + if (!PySequence_Check(fields)) { + PyErr_SetString(PyExc_TypeError, "fields must be a sequence"); + return -1; + } + + Py_ssize_t len = PySequence_Size(fields); + if (len != 6) { + PyErr_SetString(PyExc_ValueError, "fields is not a 6-tuple"); + return -1; + } + + #define EXTRACT_FIELD(field_num, max_value, error_msg, type, name) \ + type name; \ + uint64_t name##_extracted; \ + if (extract_field(fields, field_num, max_value, error_msg, \ + &(name##_extracted)) < 0) { \ + return -1; \ + } \ + name = (type)name##_extracted; + + EXTRACT_FIELD( + 0, (1ULL << 32) - 1, "field 1 out of range (need a 32-bit value)", + uint32_t, time_low + ); + EXTRACT_FIELD( + 1, (1ULL << 16) - 1, "field 2 out of range (need a 16-bit value)", + uint16_t, time_mid + ); + EXTRACT_FIELD( + 2, (1ULL << 16) - 1, "field 3 out of range (need a 16-bit value)", + uint16_t, time_hi_version + ); + EXTRACT_FIELD( + 3, (1ULL << 8) - 1, "field 4 out of range (need an 8-bit value)", + uint8_t, clock_seq_hi_variant + ); + EXTRACT_FIELD( + 4, (1ULL << 8) - 1, "field 5 out of range (need an 8-bit value)", + uint8_t, clock_seq_low + ); + EXTRACT_FIELD( + 5, (1ULL << 48) - 1, "field 6 out of range (need a 48-bit value)", + uint64_t, node + ); + + self->bytes[0] = time_low >> 24; + self->bytes[1] = time_low >> 16; + self->bytes[2] = time_low >> 8; + self->bytes[3] = time_low; + + self->bytes[4] = time_mid >> 8; + self->bytes[5] = time_mid; + + self->bytes[6] = time_hi_version >> 8; + self->bytes[7] = time_hi_version; + + self->bytes[8] = clock_seq_hi_variant; + + self->bytes[9] = clock_seq_low; + + self->bytes[10] = node >> 40; + self->bytes[11] = node >> 32; + self->bytes[12] = node >> 24; + self->bytes[13] = node >> 16; + self->bytes[14] = node >> 8; + self->bytes[15] = node; + + return 0; +} + +static PyObject * +get_int(uuidobject *self) +{ + return _PyLong_FromByteArray((unsigned char *)self->bytes, 16, 0, 0); +} + +static uuidobject * +make_uuid(PyTypeObject *type) +{ + uuidobject *self = NULL; + uuid_state *state = get_uuid_state_by_cls(type); + + Py_BEGIN_CRITICAL_SECTION(type); + if (state->freelist_size > 0) { + self = state->freelist; + state->freelist = (uuidobject *)self->weakreflist; + state->freelist_size--; + } + Py_END_CRITICAL_SECTION(); + + if (self != NULL) { + // Reinitialize the object from freelist + _Py_NewReference((PyObject *)self); + } + else { + self = PyObject_New(uuidobject, type); + if (self == NULL) { + return NULL; + } + } + + // During module initialization, safe_uuid_unknown might not be set yet + if (state->safe_uuid_unknown != NULL) { + self->is_safe = Py_NewRef(state->safe_uuid_unknown); + } else { + self->is_safe = Py_NewRef(Py_None); + } + + self->weakreflist = NULL; + self->cached_hash = -1; + + return self; +} + +static PyObject * +Uuid_alloc(PyTypeObject *type, PyObject *args, PyObject *kwargs) +{ + uuidobject *self = make_uuid(type); + if (self == NULL) { + return NULL; + } + memset(self->bytes, 0, 16); + return (PyObject *)self; +} + +static void +Uuid_dealloc(PyObject *obj) +{ + PyTypeObject *type = Py_TYPE(obj); + uuid_state *state = get_uuid_state_by_cls(type); + + uuidobject *uuid = (uuidobject *)obj; + if (uuid->weakreflist != NULL) { + PyObject_ClearWeakRefs(obj); + } + Py_CLEAR(uuid->is_safe); + + int added_to_freelist = 0; + Py_BEGIN_CRITICAL_SECTION(type); + if (state->freelist_size < MAX_FREE_LIST_SIZE) { + uuidobject *head = state->freelist; + state->freelist = uuid; + uuid->weakreflist = (PyObject *)head; + state->freelist_size++; + added_to_freelist = 1; + } + Py_END_CRITICAL_SECTION(); + + if (!added_to_freelist) { + type->tp_free(uuid); + // UUID is a heap allocated type so we have to decref the type ref + Py_DECREF(type); + } +} + + +static PyObject * +Uuid_get_int(uuidobject *self, void *closure) +{ + return get_int(self); +} + +static PyObject * +Uuid_get_is_safe(uuidobject *self, void *closure) +{ + if (self->is_safe == NULL) { + Py_RETURN_NONE; + } + return Py_NewRef(self->is_safe); +} + +static PyObject * +Uuid_get_hex(uuidobject *self, void *closure) +{ + char hex[32]; + for (int i = 0; i < 16; i++) { + byte_to_hex(self->bytes[i], &hex[i * 2]); + } + return PyUnicode_FromStringAndSize(hex, 32); +} + +static PyObject * +Uuid_get_variant(uuidobject *self, void *closure) +{ + uuid_state *state = get_uuid_state_by_cls(Py_TYPE(self)); + + uint8_t variant_byte = self->bytes[8]; + + // xxx - three high bits of variant_byte are unknown + if (!(variant_byte & 0x80)) { // & 0b1000_0000 + // 0xx - RESERVED_NCS + return Py_NewRef(state->reserved_ncs); + } + + // 1xx -- we know that high bit must be 1 + if (!(variant_byte & 0x40)) { // & 0b0100_0000 + // 10x - RFC_4122 + return Py_NewRef(state->rfc_4122); + } + + // 11x -- we know that two high bits are 1 + if (!(variant_byte & 0x20)) { // & 0b0010_0000 + // 110 - RESERVED_MICROSOFT + return Py_NewRef(state->reserved_microsoft); + } + + // 111 -- we know that all three high bits are 1 - RESERVED_FUTURE + return Py_NewRef(state->reserved_future); +} + +static int +is_rfc_4122(uuidobject *self) +{ + return (self->bytes[8] & 0xc0) == 0x80; +} + +static long +get_version(uuidobject *self) +{ + // RFC_4122 is when bit 7 is set (0x80) and bit 6 is not set (0x40) + // 0xc0 = 0b11000000 + // 0x80 = 0b10000000 + if (!is_rfc_4122(self)) { + return 0; + } + return (self->bytes[6] >> 4) & 0xf; +} + +static PyObject * +Uuid_get_version(uuidobject *self, void *closure) +{ + if (!is_rfc_4122(self)) { + Py_RETURN_NONE; + } + return PyLong_FromLong(get_version(self)); +} + +static inline uint32_t +get_time_low(uuidobject *self) +{ + return ((uint32_t)self->bytes[0] << 24) | + ((uint32_t)self->bytes[1] << 16) | + ((uint32_t)self->bytes[2] << 8) | + ((uint32_t)self->bytes[3]); +} + +static inline uint16_t +get_time_mid(uuidobject *self) +{ + return ((uint16_t)self->bytes[4] << 8) | + ((uint16_t)self->bytes[5]); +} + +static inline uint16_t +get_time_hi_version(uuidobject *self) +{ + return ((uint16_t)self->bytes[6] << 8) | + ((uint16_t)self->bytes[7]); +} + +static inline uint8_t +get_clock_seq_hi_variant(uuidobject *self) +{ + return self->bytes[8]; +} + +static inline uint8_t +get_clock_seq_low(uuidobject *self) +{ + return self->bytes[9]; +} + +static inline uint64_t +get_node(uuidobject *self) +{ + return ((uint64_t)self->bytes[10] << 40) | + ((uint64_t)self->bytes[11] << 32) | + ((uint64_t)self->bytes[12] << 24) | + ((uint64_t)self->bytes[13] << 16) | + ((uint64_t)self->bytes[14] << 8) | + ((uint64_t)self->bytes[15]); +} + +static PyObject * +Uuid_get_time_low(uuidobject *self, void *closure) +{ + return PyLong_FromUnsignedLong(get_time_low(self)); +} + +static PyObject * +Uuid_get_time_mid(uuidobject *self, void *closure) +{ + return PyLong_FromUnsignedLong(get_time_mid(self)); +} + +static PyObject * +Uuid_get_time_hi_version(uuidobject *self, void *closure) +{ + return PyLong_FromUnsignedLong(get_time_hi_version(self)); +} + +static PyObject * +Uuid_get_clock_seq_hi_variant(uuidobject *self, void *closure) +{ + return PyLong_FromUnsignedLong(get_clock_seq_hi_variant(self)); +} + +static PyObject * +Uuid_get_clock_seq_low(uuidobject *self, void *closure) +{ + return PyLong_FromUnsignedLong(get_clock_seq_low(self)); +} + +static PyObject * +Uuid_get_time(uuidobject *self, void *closure) +{ + long version = get_version(self); + + if (version == 6) { + // UUID v6: time_hi (32) | time_mid (16) | ver (4) | time_lo (12) | ... (64) + uint32_t time_hi = get_time_low(self); + uint16_t time_mid = get_time_mid(self); + uint16_t time_lo = ((uint16_t)(self->bytes[6] & 0x0f) << 8) | + ((uint16_t)self->bytes[7]); + + uint64_t time = ((uint64_t)time_hi << 28) | + ((uint64_t)time_mid << 12) | + (uint64_t)time_lo; + return PyLong_FromUnsignedLongLong(time); + } + else if (version == 7) { + // UUID v7: unix_ts_ms (48) | ... (80) + // First 6 bytes are the 48-bit timestamp + uint64_t unix_ts_ms = ((uint64_t)self->bytes[0] << 40) | + ((uint64_t)self->bytes[1] << 32) | + ((uint64_t)self->bytes[2] << 24) | + ((uint64_t)self->bytes[3] << 16) | + ((uint64_t)self->bytes[4] << 8) | + ((uint64_t)self->bytes[5]); + return PyLong_FromUnsignedLongLong(unix_ts_ms); + } + else { + // UUID v1 and others: time_lo (32) | time_mid (16) | ver (4) + // | time_hi (12) | ... (64) + uint32_t time_lo = get_time_low(self); + uint16_t time_mid = get_time_mid(self); + uint16_t time_hi = ((uint16_t)(self->bytes[6] & 0x0f) << 8) | + ((uint16_t)self->bytes[7]); + + uint64_t time = ((uint64_t)time_hi << 48) | + ((uint64_t)time_mid << 32) | + (uint64_t)time_lo; + return PyLong_FromUnsignedLongLong(time); + } +} + +static PyObject * +Uuid_get_clock_seq(uuidobject *self, void *closure) +{ + // clock_seq_hi_variant (byte 8) & 0x3f, then clock_seq_low (byte 9) + uint16_t clock_seq = ((uint16_t)(get_clock_seq_hi_variant(self) & 0x3f) << 8) | + ((uint16_t)get_clock_seq_low(self)); + return PyLong_FromUnsignedLong(clock_seq); +} + +static PyObject * +Uuid_get_node(uuidobject *self, void *closure) +{ + return PyLong_FromUnsignedLongLong(get_node(self)); +} + +static PyObject * +Uuid_get_bytes(uuidobject *self, void *closure) +{ + return PyBytes_FromStringAndSize((const char *)self->bytes, 16); +} + +static PyObject * +Uuid_get_bytes_le(uuidobject *self, void *closure) +{ + // UUID fields in little-endian order need to be byte-swapped: + // - time_low (4 bytes) - reversed + // - time_mid (2 bytes) - reversed + // - time_hi_version (2 bytes) - reversed + // - clock_seq and node (8 bytes) - unchanged + + unsigned char bytes_le[16]; + + // Reverse time_low (bytes 0-3) + bytes_le[0] = self->bytes[3]; + bytes_le[1] = self->bytes[2]; + bytes_le[2] = self->bytes[1]; + bytes_le[3] = self->bytes[0]; + + // Reverse time_mid (bytes 4-5) + bytes_le[4] = self->bytes[5]; + bytes_le[5] = self->bytes[4]; + + // Reverse time_hi_version (bytes 6-7) + bytes_le[6] = self->bytes[7]; + bytes_le[7] = self->bytes[6]; + + // Copy clock_seq and node as-is (bytes 8-15) + memcpy(bytes_le + 8, self->bytes + 8, 8); + + return PyBytes_FromStringAndSize((const char *)bytes_le, 16); +} + +static PyObject * +Uuid_get_fields(uuidobject *self, void *closure) +{ + uint32_t time_low = get_time_low(self); + uint16_t time_mid = get_time_mid(self); + uint16_t time_hi_version = get_time_hi_version(self); + uint8_t clock_seq_hi_variant = get_clock_seq_hi_variant(self); + uint8_t clock_seq_low = get_clock_seq_low(self); + uint64_t node = get_node(self); + + // Build and return the tuple + return Py_BuildValue( + "(kHHBBK)", + (unsigned long)time_low, + (unsigned short)time_mid, + (unsigned short)time_hi_version, + (unsigned char)clock_seq_hi_variant, + (unsigned char)clock_seq_low, + (unsigned long long)node + ); +} + +static PyObject * +uuid_from_bytes_array(PyTypeObject *type, uint8_t bytes[16]) +{ + uuidobject *self = make_uuid(type); + if (self == NULL) { + return NULL; + } + memcpy(self->bytes, bytes, 16); + return (PyObject *)self; +} + +static PyObject * +Uuid_nb_int(PyObject *self) +{ + return get_int((uuidobject *)self); +} + +static PyObject * +Uuid_richcompare(PyObject *self, PyObject *other, int op) +{ + uuid_state *state = get_uuid_state_by_cls(Py_TYPE(self)); + + if (!PyObject_TypeCheck(other, state->UuidType)) { + Py_RETURN_NOTIMPLEMENTED; + } + + uuidobject *uuid_self = (uuidobject *)self; + uuidobject *uuid_other = (uuidobject *)other; + + int cmp = memcmp(uuid_self->bytes, uuid_other->bytes, 16); + + int result; + switch (op) { + case Py_EQ: + result = (cmp == 0); + break; + case Py_NE: + result = (cmp != 0); + break; + case Py_LT: + result = (cmp < 0); + break; + case Py_LE: + result = (cmp <= 0); + break; + case Py_GT: + result = (cmp > 0); + break; + case Py_GE: + result = (cmp >= 0); + break; + default: + Py_RETURN_NOTIMPLEMENTED; + } + + if (result) { + Py_RETURN_TRUE; + } else { + Py_RETURN_FALSE; + } +} + +static PyObject * +Uuid_str(PyObject *self) +{ + uuidobject *uuid = (uuidobject *)self; + + // UUID string format: xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx (36 chars) + char str[36]; + + for (int i = 0; i < 4; i++) { + byte_to_hex(uuid->bytes[i], &str[i * 2]); + } + str[8] = '-'; + + for (int i = 4; i < 6; i++) { + byte_to_hex(uuid->bytes[i], &str[9 + (i - 4) * 2]); + } + str[13] = '-'; + + for (int i = 6; i < 8; i++) { + byte_to_hex(uuid->bytes[i], &str[14 + (i - 6) * 2]); + } + str[18] = '-'; + + for (int i = 8; i < 10; i++) { + byte_to_hex(uuid->bytes[i], &str[19 + (i - 8) * 2]); + } + str[23] = '-'; + + for (int i = 10; i < 16; i++) { + byte_to_hex(uuid->bytes[i], &str[24 + (i - 10) * 2]); + } + + return PyUnicode_FromStringAndSize(str, 36); +} + +static PyObject * +Uuid_repr(PyObject *self) +{ + PyObject *str_obj = Uuid_str(self); + if (str_obj == NULL) { + return NULL; + } + + // Get the class name (sadly can't use tp_name -- we don't need the full name) + PyObject *cls_name = PyObject_GetAttrString((PyObject *)Py_TYPE(self), "__name__"); + if (cls_name == NULL) { + Py_DECREF(str_obj); + return NULL; + } + + PyObject *repr = PyUnicode_FromFormat("%U('%U')", cls_name, str_obj); + Py_DECREF(str_obj); + Py_DECREF(cls_name); + return repr; +} + +static int +Uuid_setattr(PyObject *self, PyObject *name, PyObject *value) +{ + PyErr_SetString(PyExc_TypeError, "UUID objects are immutable"); + return -1; +} + +static PyObject * +Uuid_get_urn(uuidobject *self, void *closure) +{ + PyObject *str_obj = Uuid_str((PyObject *)self); + if (str_obj == NULL) { + return NULL; + } + + PyObject *urn = PyUnicode_FromFormat("urn:uuid:%U", str_obj); + Py_DECREF(str_obj); + return urn; +} + +static Py_hash_t +Uuid_hash(PyObject *self) +{ + uuidobject *uuid = (uuidobject *)self; + if (uuid->cached_hash != -1) { + // UUIDs are very often used in dicts/sets, so it makes + // sense to cache the computed hash (like we do for str) + return uuid->cached_hash; + } + + Py_hash_t hash = Py_HashBuffer(uuid->bytes, 16); + uuid->cached_hash = hash; + return hash; +} + + +/*[clinic input] +@classmethod +uuid.UUID._from_int + + value: object + / + +Create a UUID from an integer value. Internal use only. +[clinic start generated code]*/ + +static PyObject * +_uuid_UUID__from_int_impl(PyTypeObject *type, PyObject *value) +/*[clinic end generated code: output=05af0cfa4805fcae input=3f472ebfd07bbf50]*/ +{ + uuid_state *state = get_uuid_state_by_cls(type); + + if (validate_int(state, value) < 0) { + return NULL; + } + + uuidobject *self = make_uuid(type); + if (self == NULL) { + return NULL; + } + + if (from_int(self, value, 0) < 0) { + // We validated before creating an instance, so now we don't need to + // validate again + return NULL; + } + + return (PyObject *)self; +} + +static PyGetSetDef Uuid_getset[] = { + {"int", (getter)Uuid_get_int, NULL, "UUID as a 128-bit integer", NULL}, + {"is_safe", (getter)Uuid_get_is_safe, NULL, "UUID safety status", NULL}, + {"fields", (getter)Uuid_get_fields, NULL, "UUID as a 6-tuple", NULL}, + {"hex", (getter)Uuid_get_hex, NULL, "UUID as a 32-character hex string", NULL}, + {"urn", (getter)Uuid_get_urn, NULL, "UUID as a URN", NULL}, + {"variant", (getter)Uuid_get_variant, NULL, "UUID variant", NULL}, + {"version", (getter)Uuid_get_version, NULL, "UUID version", NULL}, + {"time_low", (getter)Uuid_get_time_low, NULL, "Time low field (32 bits)", NULL}, + {"time_mid", (getter)Uuid_get_time_mid, NULL, "Time mid field (16 bits)", NULL}, + {"bytes", (getter)Uuid_get_bytes, NULL, "UUID as a 16-byte string", NULL}, + {"bytes_le", (getter)Uuid_get_bytes_le, NULL, + "UUID as a 16-byte string in little-endian byte order", NULL}, + {"time_hi_version", (getter)Uuid_get_time_hi_version, NULL, + "Time high and version field (16 bits)", NULL}, + {"clock_seq_hi_variant", (getter)Uuid_get_clock_seq_hi_variant, NULL, + "Clock sequence high and variant field (8 bits)", NULL}, + {"clock_seq_low", (getter)Uuid_get_clock_seq_low, NULL, + "Clock sequence low field (8 bits)", NULL}, + {"time", (getter)Uuid_get_time, NULL, + "Time field (60 bits for v1/v6, 48 bits for v7)", NULL}, + {"clock_seq", (getter)Uuid_get_clock_seq, NULL, + "Clock sequence field (14 bits)", NULL}, + {"node", (getter)Uuid_get_node, NULL, + "Node field (48 bits)", NULL}, + {NULL} +}; + +/*[clinic input] +uuid.UUID.__getstate__ + +Return the UUID's state for pickling. +[clinic start generated code]*/ + +static PyObject * +_uuid_UUID___getstate___impl(uuidobject *self) +/*[clinic end generated code: output=f9278a4d28ccac91 input=4b471ae24b705e8e]*/ +{ + PyObject *dict = PyDict_New(); + if (dict == NULL) { + return NULL; + } + + // Always add 'int' key + PyObject *int_value = get_int(self); + if (int_value == NULL) { + Py_DECREF(dict); + return NULL; + } + if (PyDict_SetItemString(dict, "int", int_value) < 0) { + Py_DECREF(int_value); + Py_DECREF(dict); + return NULL; + } + Py_DECREF(int_value); + + if (PyDict_SetItemString(dict, "is_safe", self->is_safe) < 0) { + Py_DECREF(dict); + return NULL; + } + + return dict; +} + +/*[clinic input] +uuid.UUID.__setstate__ + + state: object + / + +Restore the UUID's state from pickling. + +Expects a dictionary with 'int' and optionally 'is_safe' keys. +[clinic start generated code]*/ + +static PyObject * +_uuid_UUID___setstate___impl(uuidobject *self, PyObject *state) +/*[clinic end generated code: output=cdf6bd4a2a680b3f input=b1ec0744788a73a0]*/ +{ + uuid_state *module_state = get_uuid_state_by_cls(Py_TYPE(self)); + + if (!PyDict_Check(state)) { + PyErr_SetString(PyExc_TypeError, "state must be a dictionary"); + return NULL; + } + + // Get and set the 'int' value + PyObject *int_value = PyDict_GetItemString(state, "int"); + if (int_value == NULL) { + PyErr_SetString(PyExc_ValueError, "state must have 'int' key"); + return NULL; + } + + if (from_int(self, int_value, 1) < 0) { + return NULL; + } + + // Get and set 'is_safe' if present + PyObject *is_safe = PyDict_GetItemString(state, "is_safe"); + if (is_safe != NULL) { + // is_safe is the integer value, we need to call SafeUUID(value) + PyObject *safe_uuid_member = PyObject_CallOneArg(module_state->safe_uuid, is_safe); + if (safe_uuid_member == NULL) { + return NULL; + } + Py_XDECREF(self->is_safe); + self->is_safe = safe_uuid_member; + } else { + // No is_safe in state, set to SafeUUID.unknown + Py_XDECREF(self->is_safe); + self->is_safe = Py_NewRef(module_state->safe_uuid_unknown); + } + + Py_RETURN_NONE; +} + +static PyMethodDef Uuid_methods[] = { + _UUID_UUID__FROM_INT_METHODDEF + _UUID_UUID___GETSTATE___METHODDEF + _UUID_UUID___SETSTATE___METHODDEF + {NULL, NULL} +}; + +static PyMemberDef Uuid_members[] = { + {"__weaklistoffset__", Py_T_PYSSIZET, offsetof(uuidobject, weakreflist), Py_READONLY}, + {NULL} +}; + +static PyType_Slot Uuid_slots[] = { + {Py_tp_alloc, Uuid_alloc}, + {Py_tp_dealloc, Uuid_dealloc}, + {Py_tp_getattro, PyObject_GenericGetAttr}, + {Py_tp_setattro, Uuid_setattr}, + {Py_tp_getset, Uuid_getset}, + {Py_tp_members, Uuid_members}, + {Py_tp_init, _uuid_UUID___init__}, + {Py_tp_doc, (void *)_uuid_UUID___init____doc__}, + {Py_tp_str, Uuid_str}, + {Py_tp_repr, Uuid_repr}, + {Py_tp_hash, Uuid_hash}, + {Py_tp_richcompare, Uuid_richcompare}, + {Py_nb_int, Uuid_nb_int}, + {Py_tp_methods, Uuid_methods}, + {0, NULL}, +}; + + +static PyType_Spec Uuid_spec = { + .name = "uuid.UUID", + .basicsize = sizeof(uuidobject), + .flags = ( + Py_TPFLAGS_DEFAULT + | Py_TPFLAGS_BASETYPE + | Py_TPFLAGS_HEAPTYPE + | Py_TPFLAGS_IMMUTABLETYPE + ), + .slots = Uuid_slots, +}; + + +static int +module_traverse(PyObject *mod, visitproc visit, void *arg) +{ + uuid_state *state = get_uuid_state(mod); + Py_VISIT(state->UuidType); + Py_VISIT(state->safe_uuid); + Py_VISIT(state->safe_uuid_safe); + Py_VISIT(state->safe_uuid_unsafe); + Py_VISIT(state->safe_uuid_unknown); + Py_VISIT(state->uint128_max); + Py_VISIT(state->reserved_ncs); + Py_VISIT(state->rfc_4122); + Py_VISIT(state->reserved_microsoft); + Py_VISIT(state->reserved_future); + return 0; +} + +static int +module_clear(PyObject *mod) +{ + uuid_state *state = get_uuid_state(mod); + + Py_CLEAR(state->UuidType); + Py_CLEAR(state->safe_uuid); + Py_CLEAR(state->safe_uuid_safe); + Py_CLEAR(state->safe_uuid_unsafe); + Py_CLEAR(state->safe_uuid_unknown); + Py_CLEAR(state->uint128_max); + Py_CLEAR(state->reserved_ncs); + Py_CLEAR(state->rfc_4122); + Py_CLEAR(state->reserved_microsoft); + Py_CLEAR(state->reserved_future); + + if (state->freelist != NULL) { + while (state->freelist != NULL) { + uuidobject *cur = state->freelist; + state->freelist = (uuidobject *)cur->weakreflist; + PyObject_Free(cur); + } + state->freelist = NULL; + state->freelist_size = 0; + } + + return 0; +} + +static void +module_free(void *mod) +{ + (void)module_clear((PyObject *)mod); +} + + static int uuid_exec(PyObject *module) { + uuid_state *state = get_uuid_state(module); + PyObject *uuid_mod = NULL; + PyObject *safe_uuid = NULL; + #define ADD_INT(NAME, VALUE) \ do { \ if (PyModule_AddIntConstant(module, (NAME), (VALUE)) < 0) { \ - return -1; \ + goto fail; \ } \ } while (0) @@ -119,17 +1619,93 @@ uuid_exec(PyObject *module) #endif #undef ADD_INT + + state->UuidType = (PyTypeObject *)PyType_FromMetaclass( + NULL, + module, + &Uuid_spec, + NULL + ); + if (state->UuidType == NULL) { + goto fail; + } + if (PyModule_AddType(module, state->UuidType) < 0) { + goto fail; + } + + uuid_mod = PyImport_ImportModule("uuid"); + if (uuid_mod == NULL) { + goto fail; + } + safe_uuid = state->safe_uuid = PyObject_GetAttrString(uuid_mod, "SafeUUID"); + if (safe_uuid == NULL) { + goto fail; + } + state->safe_uuid_safe = PyObject_GetAttrString(safe_uuid, "safe"); + if (state->safe_uuid_safe == NULL) { + goto fail; + } + state->safe_uuid_unsafe = PyObject_GetAttrString(safe_uuid, "unsafe"); + if (state->safe_uuid_unsafe == NULL) { + goto fail; + } + state->safe_uuid_unknown = PyObject_GetAttrString(safe_uuid, "unknown"); + if (state->safe_uuid_unknown == NULL) { + goto fail; + } + + state->uint128_max = PyObject_GetAttrString(uuid_mod, "_UINT_128_MAX"); + if (state->uint128_max == NULL) { + goto fail; + } + + state->reserved_ncs = PyObject_GetAttrString(uuid_mod, "RESERVED_NCS"); + if (state->reserved_ncs == NULL) { + goto fail; + } + + state->rfc_4122 = PyObject_GetAttrString(uuid_mod, "RFC_4122"); + if (state->rfc_4122 == NULL) { + goto fail; + } + + state->reserved_microsoft = PyObject_GetAttrString(uuid_mod, "RESERVED_MICROSOFT"); + if (state->reserved_microsoft == NULL) { + goto fail; + } + + state->reserved_future = PyObject_GetAttrString(uuid_mod, "RESERVED_FUTURE"); + if (state->reserved_future == NULL) { + goto fail; + } + + state->last_timestamp_v7 = 0; + state->last_counter_v7 = 0; + state->random_last_pid = uuid_getpid(); + + state->freelist = NULL; + state->freelist_size = 0; + + state->random_idx = RANDOM_BUF_SIZE; + + Py_CLEAR(uuid_mod); return 0; + +fail: + Py_CLEAR(uuid_mod); + return -1; } static PyMethodDef uuid_methods[] = { + _UUID_UUID4_METHODDEF + _UUID_UUID7_METHODDEF #if defined(HAVE_UUID_UUID_H) || defined(HAVE_UUID_H) {"generate_time_safe", py_uuid_generate_time_safe, METH_NOARGS, NULL}, #endif #if defined(MS_WINDOWS) {"UuidCreate", py_UuidCreate, METH_NOARGS, NULL}, #endif - {NULL, NULL, 0, NULL} /* sentinel */ + {NULL, NULL, 0, NULL} }; static PyModuleDef_Slot uuid_slots[] = { @@ -142,9 +1718,12 @@ static PyModuleDef_Slot uuid_slots[] = { static struct PyModuleDef uuidmodule = { PyModuleDef_HEAD_INIT, .m_name = "_uuid", - .m_size = 0, + .m_size = sizeof(uuid_state), .m_methods = uuid_methods, + .m_traverse = module_traverse, + .m_clear = module_clear, .m_slots = uuid_slots, + .m_free = module_free, }; PyMODINIT_FUNC diff --git a/Modules/clinic/_uuidmodule.c.h b/Modules/clinic/_uuidmodule.c.h new file mode 100644 index 00000000000000..3d9c374d83087d --- /dev/null +++ b/Modules/clinic/_uuidmodule.c.h @@ -0,0 +1,255 @@ +/*[clinic input] +preserve +[clinic start generated code]*/ + +#if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) +# include "pycore_gc.h" // PyGC_Head +# include "pycore_runtime.h" // _Py_ID() +#endif +#include "pycore_critical_section.h"// Py_BEGIN_CRITICAL_SECTION() +#include "pycore_modsupport.h" // _PyArg_UnpackKeywords() + +PyDoc_STRVAR(_uuid_uuid4__doc__, +"uuid4($module, /)\n" +"--\n" +"\n" +"Generate a random UUID (version 4)."); + +#define _UUID_UUID4_METHODDEF \ + {"uuid4", (PyCFunction)_uuid_uuid4, METH_NOARGS, _uuid_uuid4__doc__}, + +static PyObject * +_uuid_uuid4_impl(PyObject *module); + +static PyObject * +_uuid_uuid4(PyObject *module, PyObject *Py_UNUSED(ignored)) +{ + PyObject *return_value = NULL; + + Py_BEGIN_CRITICAL_SECTION(module); + return_value = _uuid_uuid4_impl(module); + Py_END_CRITICAL_SECTION(); + + return return_value; +} + +PyDoc_STRVAR(_uuid_uuid7__doc__, +"uuid7($module, /)\n" +"--\n" +"\n" +"Generate a UUID from a Unix timestamp in milliseconds and random bits.\n" +"\n" +"UUIDv7 objects feature monotonicity within a millisecond."); + +#define _UUID_UUID7_METHODDEF \ + {"uuid7", (PyCFunction)_uuid_uuid7, METH_NOARGS, _uuid_uuid7__doc__}, + +static PyObject * +_uuid_uuid7_impl(PyObject *module); + +static PyObject * +_uuid_uuid7(PyObject *module, PyObject *Py_UNUSED(ignored)) +{ + PyObject *return_value = NULL; + + Py_BEGIN_CRITICAL_SECTION(module); + return_value = _uuid_uuid7_impl(module); + Py_END_CRITICAL_SECTION(); + + return return_value; +} + +PyDoc_STRVAR(_uuid_UUID___init____doc__, +"UUID(hex=, bytes=None, bytes_le=None,\n" +" fields=, int=,\n" +" version=, *, is_safe=)\n" +"--\n" +"\n" +"UUID is a fast base implementation type for uuid.UUID."); + +static int +_uuid_UUID___init___impl(uuidobject *self, PyObject *hex, Py_buffer *bytes, + Py_buffer *bytes_le, PyObject *fields, + PyObject *int_value, PyObject *version, + PyObject *is_safe); + +static int +_uuid_UUID___init__(PyObject *self, PyObject *args, PyObject *kwargs) +{ + int return_value = -1; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + + #define NUM_KEYWORDS 7 + static struct { + PyGC_Head _this_is_not_used; + PyObject_VAR_HEAD + Py_hash_t ob_hash; + PyObject *ob_item[NUM_KEYWORDS]; + } _kwtuple = { + .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) + .ob_hash = -1, + .ob_item = { &_Py_ID(hex), &_Py_ID(bytes), &_Py_ID(bytes_le), &_Py_ID(fields), &_Py_ID(int), &_Py_ID(version), &_Py_ID(is_safe), }, + }; + #undef NUM_KEYWORDS + #define KWTUPLE (&_kwtuple.ob_base.ob_base) + + #else // !Py_BUILD_CORE + # define KWTUPLE NULL + #endif // !Py_BUILD_CORE + + static const char * const _keywords[] = {"hex", "bytes", "bytes_le", "fields", "int", "version", "is_safe", NULL}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "UUID", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE + PyObject *argsbuf[7]; + PyObject * const *fastargs; + Py_ssize_t nargs = PyTuple_GET_SIZE(args); + Py_ssize_t noptargs = nargs + (kwargs ? PyDict_GET_SIZE(kwargs) : 0) - 0; + PyObject *hex = NULL; + Py_buffer bytes = {NULL, NULL}; + Py_buffer bytes_le = {NULL, NULL}; + PyObject *fields = NULL; + PyObject *int_value = NULL; + PyObject *version = NULL; + PyObject *is_safe = NULL; + + fastargs = _PyArg_UnpackKeywords(_PyTuple_CAST(args)->ob_item, nargs, kwargs, NULL, &_parser, + /*minpos*/ 0, /*maxpos*/ 6, /*minkw*/ 0, /*varpos*/ 0, argsbuf); + if (!fastargs) { + goto exit; + } + if (!noptargs) { + goto skip_optional_pos; + } + if (fastargs[0]) { + if (!PyUnicode_Check(fastargs[0])) { + _PyArg_BadArgument("UUID", "argument 'hex'", "str", fastargs[0]); + goto exit; + } + hex = fastargs[0]; + if (!--noptargs) { + goto skip_optional_pos; + } + } + if (fastargs[1]) { + if (PyObject_GetBuffer(fastargs[1], &bytes, PyBUF_SIMPLE) != 0) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_pos; + } + } + if (fastargs[2]) { + if (PyObject_GetBuffer(fastargs[2], &bytes_le, PyBUF_SIMPLE) != 0) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_pos; + } + } + if (fastargs[3]) { + fields = fastargs[3]; + if (!--noptargs) { + goto skip_optional_pos; + } + } + if (fastargs[4]) { + int_value = fastargs[4]; + if (!--noptargs) { + goto skip_optional_pos; + } + } + if (fastargs[5]) { + version = fastargs[5]; + if (!--noptargs) { + goto skip_optional_pos; + } + } +skip_optional_pos: + if (!noptargs) { + goto skip_optional_kwonly; + } + is_safe = fastargs[6]; +skip_optional_kwonly: + return_value = _uuid_UUID___init___impl((uuidobject *)self, hex, &bytes, &bytes_le, fields, int_value, version, is_safe); + +exit: + /* Cleanup for bytes */ + if (bytes.obj) { + PyBuffer_Release(&bytes); + } + /* Cleanup for bytes_le */ + if (bytes_le.obj) { + PyBuffer_Release(&bytes_le); + } + + return return_value; +} + +PyDoc_STRVAR(_uuid_UUID__from_int__doc__, +"_from_int($type, value, /)\n" +"--\n" +"\n" +"Create a UUID from an integer value. Internal use only."); + +#define _UUID_UUID__FROM_INT_METHODDEF \ + {"_from_int", (PyCFunction)_uuid_UUID__from_int, METH_O|METH_CLASS, _uuid_UUID__from_int__doc__}, + +static PyObject * +_uuid_UUID__from_int_impl(PyTypeObject *type, PyObject *value); + +static PyObject * +_uuid_UUID__from_int(PyObject *type, PyObject *value) +{ + PyObject *return_value = NULL; + + return_value = _uuid_UUID__from_int_impl((PyTypeObject *)type, value); + + return return_value; +} + +PyDoc_STRVAR(_uuid_UUID___getstate____doc__, +"__getstate__($self, /)\n" +"--\n" +"\n" +"Return the UUID\'s state for pickling."); + +#define _UUID_UUID___GETSTATE___METHODDEF \ + {"__getstate__", (PyCFunction)_uuid_UUID___getstate__, METH_NOARGS, _uuid_UUID___getstate____doc__}, + +static PyObject * +_uuid_UUID___getstate___impl(uuidobject *self); + +static PyObject * +_uuid_UUID___getstate__(PyObject *self, PyObject *Py_UNUSED(ignored)) +{ + return _uuid_UUID___getstate___impl((uuidobject *)self); +} + +PyDoc_STRVAR(_uuid_UUID___setstate____doc__, +"__setstate__($self, state, /)\n" +"--\n" +"\n" +"Restore the UUID\'s state from pickling.\n" +"\n" +"Expects a dictionary with \'int\' and optionally \'is_safe\' keys."); + +#define _UUID_UUID___SETSTATE___METHODDEF \ + {"__setstate__", (PyCFunction)_uuid_UUID___setstate__, METH_O, _uuid_UUID___setstate____doc__}, + +static PyObject * +_uuid_UUID___setstate___impl(uuidobject *self, PyObject *state); + +static PyObject * +_uuid_UUID___setstate__(PyObject *self, PyObject *state) +{ + PyObject *return_value = NULL; + + return_value = _uuid_UUID___setstate___impl((uuidobject *)self, state); + + return return_value; +} +/*[clinic end generated code: output=ec50cafa0e028d2b input=a9049054013a1b77]*/ From 5ff81f3ee8cbf06010c77b7d655133787d53740b Mon Sep 17 00:00:00 2001 From: Yury Selivanov Date: Fri, 19 Sep 2025 12:19:58 +0100 Subject: [PATCH 02/16] Address @picnixz's review --- Modules/_uuidmodule.c | 36 ++++++++++++++++++++++++------------ 1 file changed, 24 insertions(+), 12 deletions(-) diff --git a/Modules/_uuidmodule.c b/Modules/_uuidmodule.c index b7bdb02381db64..f569d0df46b181 100644 --- a/Modules/_uuidmodule.c +++ b/Modules/_uuidmodule.c @@ -1,4 +1,13 @@ -// UUID accelerator base type. +/* + * Python UUID module: + * - wraps libuuid or Windows rpcrt4.dll. + * - implements fast version of the uuid.py:UUID class. + * - re-implements uuid4() and uuid7() functions with improved performance + * by virtue of them being implemented in C and better entropy fetching + * strategy. + * + * DCE compatible Universally Unique Identifier library. + */ #ifndef Py_BUILD_CORE_BUILTIN # define Py_BUILD_CORE_MODULE 1 @@ -137,6 +146,8 @@ typedef struct uuidobject { // Bytes 8-9: clock_seq_and_variant (16 bits) // Bytes 10-15: node (48 bits) // +// Note that the time attributes are only relevant to versions 1, 6 and 7. +// // Version field is located in byte 6; most significant 4 bits: // // Variant field is located in byte 8; most significant variable bits: @@ -486,8 +497,7 @@ _uuid_UUID___init___impl(uuidobject *self, PyObject *hex, Py_buffer *bytes, if (validated == NULL) { return -1; } - Py_CLEAR(self->is_safe); - self->is_safe = validated; // reuse reference + Py_XSETREF(self->is_safe, validated); } return 0; @@ -734,12 +744,12 @@ extract_field( int overflow; uint64_t value = PyLong_AsLongLongAndOverflow(field, &overflow); if (overflow || (value == (uint64_t)-1 && PyErr_Occurred())) { - PyErr_Format(PyExc_ValueError, "%s", error_msg); + PyErr_SetString(PyExc_ValueError, error_msg); goto fail; } if (value > max_value) { - PyErr_Format(PyExc_ValueError, "%s", error_msg); + PyErr_SetString(PyExc_ValueError, error_msg); goto fail; } @@ -767,14 +777,16 @@ from_fields(uuidobject *self, PyObject *fields) return -1; } - #define EXTRACT_FIELD(field_num, max_value, error_msg, type, name) \ +# define EXTRACT_FIELD(field_num, max_value, error_msg, type, name) \ type name; \ - uint64_t name##_extracted; \ - if (extract_field(fields, field_num, max_value, error_msg, \ - &(name##_extracted)) < 0) { \ - return -1; \ - } \ - name = (type)name##_extracted; + do { \ + uint64_t name##_extracted; \ + if (extract_field(fields, field_num, max_value, error_msg, \ + &(name##_extracted)) < 0) { \ + return -1; \ + } \ + name = (type)name##_extracted; \ + } while(0) EXTRACT_FIELD( 0, (1ULL << 32) - 1, "field 1 out of range (need a 32-bit value)", From 2b8508ec238d97993fd985e3b2f9a515fa793472 Mon Sep 17 00:00:00 2001 From: Yury Selivanov Date: Fri, 19 Sep 2025 12:27:11 +0100 Subject: [PATCH 03/16] Regen the clinic files --- Modules/_uuidmodule.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Modules/_uuidmodule.c b/Modules/_uuidmodule.c index f569d0df46b181..4d66f9a6ff3b60 100644 --- a/Modules/_uuidmodule.c +++ b/Modules/_uuidmodule.c @@ -193,7 +193,7 @@ typedef struct { #include "clinic/_uuidmodule.c.h" /*[clinic input] -class uuid.UUID "uuidobject *" "&UuidType" +class _uuid.UUID "uuidobject *" "&UuidType" [clinic start generated code]*/ /*[clinic end generated code: output=da39a3ee5e6b4b0d input=84ae6e2089cffd3f]*/ @@ -399,7 +399,7 @@ _uuid_uuid7_impl(PyObject *module) } /*[clinic input] -uuid.UUID.__init__ +_uuid.UUID.__init__ hex: 'U' = NULL bytes: 'y*' = None @@ -1354,7 +1354,7 @@ Uuid_hash(PyObject *self) /*[clinic input] @classmethod -uuid.UUID._from_int +_uuid.UUID._from_int value: object / @@ -1415,7 +1415,7 @@ static PyGetSetDef Uuid_getset[] = { }; /*[clinic input] -uuid.UUID.__getstate__ +_uuid.UUID.__getstate__ Return the UUID's state for pickling. [clinic start generated code]*/ @@ -1451,7 +1451,7 @@ _uuid_UUID___getstate___impl(uuidobject *self) } /*[clinic input] -uuid.UUID.__setstate__ +_uuid.UUID.__setstate__ state: object / From 9261f1c66c04e53088f5502e2ec39c26ed1de726 Mon Sep 17 00:00:00 2001 From: Yury Selivanov Date: Fri, 19 Sep 2025 12:29:18 +0100 Subject: [PATCH 04/16] Clarify the type name --- Modules/_uuidmodule.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Modules/_uuidmodule.c b/Modules/_uuidmodule.c index 4d66f9a6ff3b60..7df542b4d98e2e 100644 --- a/Modules/_uuidmodule.c +++ b/Modules/_uuidmodule.c @@ -1534,7 +1534,13 @@ static PyType_Slot Uuid_slots[] = { static PyType_Spec Uuid_spec = { + // We use "uuid.UUID" here and not "_uuid.UUID" to have full + // compatibility with old pickled UUIDs. There's no workaround + // if we want both to produce compatible pickles that can be read + // by older Pythons (using ancient pickle protocol verions) and + // restore from pickles produced by old Python versions. .name = "uuid.UUID", + .basicsize = sizeof(uuidobject), .flags = ( Py_TPFLAGS_DEFAULT From 7f2b904018d99c5c9b0cd8e064c270fb7295ad55 Mon Sep 17 00:00:00 2001 From: Yury Selivanov Date: Fri, 19 Sep 2025 13:05:05 +0100 Subject: [PATCH 05/16] Fix news --- .../next/Library/2025-09-18-14-13-00.gh-issue-139122.m3lp66.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Misc/NEWS.d/next/Library/2025-09-18-14-13-00.gh-issue-139122.m3lp66.rst b/Misc/NEWS.d/next/Library/2025-09-18-14-13-00.gh-issue-139122.m3lp66.rst index 72eb12fecf9853..243d91112f4077 100644 --- a/Misc/NEWS.d/next/Library/2025-09-18-14-13-00.gh-issue-139122.m3lp66.rst +++ b/Misc/NEWS.d/next/Library/2025-09-18-14-13-00.gh-issue-139122.m3lp66.rst @@ -25,7 +25,7 @@ Summary of changes: Python implementation fully. * The Python implementation stores UUID values as int objects. The C - implementation stores them as `uint8_t[16]` array. + implementation stores them as ``uint8_t[16]`` array. * The C implementation has faster hash() implementation but also caches the computed hash value to speedup cases when UUIDs are used as From 21ca384bcf1bd276f3fc07909f5f129f93bc98cd Mon Sep 17 00:00:00 2001 From: Yury Selivanov Date: Fri, 19 Sep 2025 13:54:21 +0100 Subject: [PATCH 06/16] Trim the NEWS file down --- ...-09-18-14-13-00.gh-issue-139122.m3lp66.rst | 55 ------------------- 1 file changed, 55 deletions(-) diff --git a/Misc/NEWS.d/next/Library/2025-09-18-14-13-00.gh-issue-139122.m3lp66.rst b/Misc/NEWS.d/next/Library/2025-09-18-14-13-00.gh-issue-139122.m3lp66.rst index 243d91112f4077..bc3fd369dd5566 100644 --- a/Misc/NEWS.d/next/Library/2025-09-18-14-13-00.gh-issue-139122.m3lp66.rst +++ b/Misc/NEWS.d/next/Library/2025-09-18-14-13-00.gh-issue-139122.m3lp66.rst @@ -1,57 +1,2 @@ Reimplement base UUID type, uuid4(), and uuid7() in C -The C implementation considerably boosts the performance of the key UUID -operations: - ------------------------------------- -Operation Speedup ------------------------------------- -uuid4() generation 15.01x -uuid7() generation 29.64x -UUID from string 6.76x -UUID from bytes 5.16x -str(uuid) conversion 6.66x ------------------------------------- - -Summary of changes: - -* The UUID type is reimplemented in C in its entirety. - -* The pure-Python is kept around and is used of the C implementation - isn't available for some reason. - -* Both implementations are tested extensively; additional tests are - added to ensure that the C implementation of the type follows the pure - Python implementation fully. - -* The Python implementation stores UUID values as int objects. The C - implementation stores them as ``uint8_t[16]`` array. - -* The C implementation has faster hash() implementation but also caches - the computed hash value to speedup cases when UUIDs are used as - set/dict keys. - -* The C implementation has a freelist to make new UUID object - instantiation as fast as possible. - -* uuid4() and uuid7() are now implmented in C. The most performance - boost (10x) comes from overfetching entropy to decrease the number of - _PyOS_URandom() calls. On its own it's a safe optimization with the - edge case that Unix fork needs to be explicitly handled. We do that by - comparing the current PID to the PID of when the random buffer was - populated. - -* Portions of code are coming from my implementation of faster UUID - in gel-python [1]. I did use AI during the development, but basically - had to rewrite the code it generated to be more idiomatic and - efficient. - -* The benchmark can be found here [2]. - -* This PR makes Python UUID operations as fast as they are in NodeJS and - Bun runtimes. - -[1] -https://github.com/MagicStack/py-pgproto/blob/b8109fb311a59f30f9947567a13508da9a776564/uuid.pyx - -[2] https://gist.github.com/1st1/f03e816f34a61e4d46c78ff98baf4818 From 8bceabd0ed6a8d34e0c831335e90cda5d4b3a950 Mon Sep 17 00:00:00 2001 From: Yury Selivanov Date: Fri, 19 Sep 2025 14:03:15 +0100 Subject: [PATCH 07/16] Use PyObject* in getters --- Modules/_uuidmodule.c | 93 +++++++++++++++++++++++++------------------ 1 file changed, 54 insertions(+), 39 deletions(-) diff --git a/Modules/_uuidmodule.c b/Modules/_uuidmodule.c index 7df542b4d98e2e..1c8695dcbcad5e 100644 --- a/Modules/_uuidmodule.c +++ b/Modules/_uuidmodule.c @@ -925,14 +925,15 @@ Uuid_dealloc(PyObject *obj) static PyObject * -Uuid_get_int(uuidobject *self, void *closure) +Uuid_get_int(PyObject *o, void *closure) { - return get_int(self); + return get_int((uuidobject *)o); } static PyObject * -Uuid_get_is_safe(uuidobject *self, void *closure) +Uuid_get_is_safe(PyObject *o, void *closure) { + uuidobject *self = (uuidobject *)o; if (self->is_safe == NULL) { Py_RETURN_NONE; } @@ -940,8 +941,9 @@ Uuid_get_is_safe(uuidobject *self, void *closure) } static PyObject * -Uuid_get_hex(uuidobject *self, void *closure) +Uuid_get_hex(PyObject *o, void *closure) { + uuidobject *self = (uuidobject *)o; char hex[32]; for (int i = 0; i < 16; i++) { byte_to_hex(self->bytes[i], &hex[i * 2]); @@ -950,8 +952,9 @@ Uuid_get_hex(uuidobject *self, void *closure) } static PyObject * -Uuid_get_variant(uuidobject *self, void *closure) +Uuid_get_variant(PyObject *o, void *closure) { + uuidobject *self = (uuidobject *)o; uuid_state *state = get_uuid_state_by_cls(Py_TYPE(self)); uint8_t variant_byte = self->bytes[8]; @@ -997,8 +1000,9 @@ get_version(uuidobject *self) } static PyObject * -Uuid_get_version(uuidobject *self, void *closure) +Uuid_get_version(PyObject *o, void *closure) { + uuidobject *self = (uuidobject *)o; if (!is_rfc_4122(self)) { Py_RETURN_NONE; } @@ -1052,38 +1056,45 @@ get_node(uuidobject *self) } static PyObject * -Uuid_get_time_low(uuidobject *self, void *closure) +Uuid_get_time_low(PyObject *o, void *closure) { + uuidobject *self = (uuidobject *)o; return PyLong_FromUnsignedLong(get_time_low(self)); } static PyObject * -Uuid_get_time_mid(uuidobject *self, void *closure) +Uuid_get_time_mid(PyObject *o, void *closure) { + uuidobject *self = (uuidobject *)o; return PyLong_FromUnsignedLong(get_time_mid(self)); } static PyObject * -Uuid_get_time_hi_version(uuidobject *self, void *closure) +Uuid_get_time_hi_version(PyObject *o, void *closure) { + uuidobject *self = (uuidobject *)o; return PyLong_FromUnsignedLong(get_time_hi_version(self)); } static PyObject * -Uuid_get_clock_seq_hi_variant(uuidobject *self, void *closure) +Uuid_get_clock_seq_hi_variant(PyObject *o, void *closure) { + uuidobject *self = (uuidobject *)o; return PyLong_FromUnsignedLong(get_clock_seq_hi_variant(self)); } static PyObject * -Uuid_get_clock_seq_low(uuidobject *self, void *closure) +Uuid_get_clock_seq_low(PyObject *o, void *closure) { + uuidobject *self = (uuidobject *)o; return PyLong_FromUnsignedLong(get_clock_seq_low(self)); } static PyObject * -Uuid_get_time(uuidobject *self, void *closure) +Uuid_get_time(PyObject *o, void *closure) { + uuidobject *self = (uuidobject *)o; + long version = get_version(self); if (version == 6) { @@ -1125,8 +1136,9 @@ Uuid_get_time(uuidobject *self, void *closure) } static PyObject * -Uuid_get_clock_seq(uuidobject *self, void *closure) +Uuid_get_clock_seq(PyObject *o, void *closure) { + uuidobject *self = (uuidobject *)o; // clock_seq_hi_variant (byte 8) & 0x3f, then clock_seq_low (byte 9) uint16_t clock_seq = ((uint16_t)(get_clock_seq_hi_variant(self) & 0x3f) << 8) | ((uint16_t)get_clock_seq_low(self)); @@ -1134,20 +1146,23 @@ Uuid_get_clock_seq(uuidobject *self, void *closure) } static PyObject * -Uuid_get_node(uuidobject *self, void *closure) +Uuid_get_node(PyObject *o, void *closure) { + uuidobject *self = (uuidobject *)o; return PyLong_FromUnsignedLongLong(get_node(self)); } static PyObject * -Uuid_get_bytes(uuidobject *self, void *closure) +Uuid_get_bytes(PyObject *o, void *closure) { + uuidobject *self = (uuidobject *)o; return PyBytes_FromStringAndSize((const char *)self->bytes, 16); } static PyObject * -Uuid_get_bytes_le(uuidobject *self, void *closure) +Uuid_get_bytes_le(PyObject *o, void *closure) { + uuidobject *self = (uuidobject *)o; // UUID fields in little-endian order need to be byte-swapped: // - time_low (4 bytes) - reversed // - time_mid (2 bytes) - reversed @@ -1177,8 +1192,9 @@ Uuid_get_bytes_le(uuidobject *self, void *closure) } static PyObject * -Uuid_get_fields(uuidobject *self, void *closure) +Uuid_get_fields(PyObject *o, void *closure) { + uuidobject *self = (uuidobject *)o; uint32_t time_low = get_time_low(self); uint16_t time_mid = get_time_mid(self); uint16_t time_hi_version = get_time_hi_version(self); @@ -1324,8 +1340,9 @@ Uuid_setattr(PyObject *self, PyObject *name, PyObject *value) } static PyObject * -Uuid_get_urn(uuidobject *self, void *closure) +Uuid_get_urn(PyObject *o, void *closure) { + uuidobject *self = (uuidobject *)o; PyObject *str_obj = Uuid_str((PyObject *)self); if (str_obj == NULL) { return NULL; @@ -1337,9 +1354,9 @@ Uuid_get_urn(uuidobject *self, void *closure) } static Py_hash_t -Uuid_hash(PyObject *self) +Uuid_hash(PyObject *o) { - uuidobject *uuid = (uuidobject *)self; + uuidobject *uuid = (uuidobject *)o; if (uuid->cached_hash != -1) { // UUIDs are very often used in dicts/sets, so it makes // sense to cache the computed hash (like we do for str) @@ -1387,30 +1404,28 @@ _uuid_UUID__from_int_impl(PyTypeObject *type, PyObject *value) } static PyGetSetDef Uuid_getset[] = { - {"int", (getter)Uuid_get_int, NULL, "UUID as a 128-bit integer", NULL}, - {"is_safe", (getter)Uuid_get_is_safe, NULL, "UUID safety status", NULL}, - {"fields", (getter)Uuid_get_fields, NULL, "UUID as a 6-tuple", NULL}, - {"hex", (getter)Uuid_get_hex, NULL, "UUID as a 32-character hex string", NULL}, - {"urn", (getter)Uuid_get_urn, NULL, "UUID as a URN", NULL}, - {"variant", (getter)Uuid_get_variant, NULL, "UUID variant", NULL}, - {"version", (getter)Uuid_get_version, NULL, "UUID version", NULL}, - {"time_low", (getter)Uuid_get_time_low, NULL, "Time low field (32 bits)", NULL}, - {"time_mid", (getter)Uuid_get_time_mid, NULL, "Time mid field (16 bits)", NULL}, - {"bytes", (getter)Uuid_get_bytes, NULL, "UUID as a 16-byte string", NULL}, - {"bytes_le", (getter)Uuid_get_bytes_le, NULL, + {"int", Uuid_get_int, NULL, "UUID as a 128-bit integer", NULL}, + {"is_safe", Uuid_get_is_safe, NULL, "UUID safety status", NULL}, + {"fields", Uuid_get_fields, NULL, "UUID as a 6-tuple", NULL}, + {"hex", Uuid_get_hex, NULL, "UUID as a 32-character hex string", NULL}, + {"urn", Uuid_get_urn, NULL, "UUID as a URN", NULL}, + {"variant", Uuid_get_variant, NULL, "UUID variant", NULL}, + {"version", Uuid_get_version, NULL, "UUID version", NULL}, + {"time_low", Uuid_get_time_low, NULL, "Time low field (32 bits)", NULL}, + {"time_mid", Uuid_get_time_mid, NULL, "Time mid field (16 bits)", NULL}, + {"bytes", Uuid_get_bytes, NULL, "UUID as a 16-byte string", NULL}, + {"bytes_le", Uuid_get_bytes_le, NULL, "UUID as a 16-byte string in little-endian byte order", NULL}, - {"time_hi_version", (getter)Uuid_get_time_hi_version, NULL, + {"time_hi_version", Uuid_get_time_hi_version, NULL, "Time high and version field (16 bits)", NULL}, - {"clock_seq_hi_variant", (getter)Uuid_get_clock_seq_hi_variant, NULL, + {"clock_seq_hi_variant", Uuid_get_clock_seq_hi_variant, NULL, "Clock sequence high and variant field (8 bits)", NULL}, - {"clock_seq_low", (getter)Uuid_get_clock_seq_low, NULL, + {"clock_seq_low", Uuid_get_clock_seq_low, NULL, "Clock sequence low field (8 bits)", NULL}, - {"time", (getter)Uuid_get_time, NULL, + {"time", Uuid_get_time, NULL, "Time field (60 bits for v1/v6, 48 bits for v7)", NULL}, - {"clock_seq", (getter)Uuid_get_clock_seq, NULL, - "Clock sequence field (14 bits)", NULL}, - {"node", (getter)Uuid_get_node, NULL, - "Node field (48 bits)", NULL}, + {"clock_seq", Uuid_get_clock_seq, NULL, "Clock sequence field (14 bits)", NULL}, + {"node", Uuid_get_node, NULL, "Node field (48 bits)", NULL}, {NULL} }; From d25862a4673c83bd5ed2fdca59208447f5ebdc57 Mon Sep 17 00:00:00 2001 From: Yury Selivanov Date: Fri, 19 Sep 2025 15:29:57 +0100 Subject: [PATCH 08/16] Codegen string literals and use them --- .../pycore_global_objects_fini_generated.h | 4 ++++ Include/internal/pycore_global_strings.h | 4 ++++ Include/internal/pycore_runtime_init_generated.h | 4 ++++ .../internal/pycore_unicodeobject_generated.h | 16 ++++++++++++++++ Modules/_uuidmodule.c | 9 +++++---- 5 files changed, 33 insertions(+), 4 deletions(-) diff --git a/Include/internal/pycore_global_objects_fini_generated.h b/Include/internal/pycore_global_objects_fini_generated.h index a598af4f37c123..e99af5a60ab2c1 100644 --- a/Include/internal/pycore_global_objects_fini_generated.h +++ b/Include/internal/pycore_global_objects_fini_generated.h @@ -833,6 +833,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(byte_size)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(byteorder)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(bytes)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(bytes_le)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(bytes_per_sep)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(c_call)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(c_exception)); @@ -1013,6 +1014,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(hash_name)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(header)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(headers)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(hex)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(hi)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(hook)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(hour)); @@ -1044,6 +1046,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(insert_comments)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(insert_pis)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(instructions)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(int)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(intern)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(intersection)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(interval)); @@ -1051,6 +1054,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(is_compress)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(is_raw)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(is_running)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(is_safe)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(is_struct)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(isatty)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(isinstance)); diff --git a/Include/internal/pycore_global_strings.h b/Include/internal/pycore_global_strings.h index 6959343947c1f4..a3e86da7c50304 100644 --- a/Include/internal/pycore_global_strings.h +++ b/Include/internal/pycore_global_strings.h @@ -324,6 +324,7 @@ struct _Py_global_strings { STRUCT_FOR_ID(byte_size) STRUCT_FOR_ID(byteorder) STRUCT_FOR_ID(bytes) + STRUCT_FOR_ID(bytes_le) STRUCT_FOR_ID(bytes_per_sep) STRUCT_FOR_ID(c_call) STRUCT_FOR_ID(c_exception) @@ -504,6 +505,7 @@ struct _Py_global_strings { STRUCT_FOR_ID(hash_name) STRUCT_FOR_ID(header) STRUCT_FOR_ID(headers) + STRUCT_FOR_ID(hex) STRUCT_FOR_ID(hi) STRUCT_FOR_ID(hook) STRUCT_FOR_ID(hour) @@ -535,6 +537,7 @@ struct _Py_global_strings { STRUCT_FOR_ID(insert_comments) STRUCT_FOR_ID(insert_pis) STRUCT_FOR_ID(instructions) + STRUCT_FOR_ID(int) STRUCT_FOR_ID(intern) STRUCT_FOR_ID(intersection) STRUCT_FOR_ID(interval) @@ -542,6 +545,7 @@ struct _Py_global_strings { STRUCT_FOR_ID(is_compress) STRUCT_FOR_ID(is_raw) STRUCT_FOR_ID(is_running) + STRUCT_FOR_ID(is_safe) STRUCT_FOR_ID(is_struct) STRUCT_FOR_ID(isatty) STRUCT_FOR_ID(isinstance) diff --git a/Include/internal/pycore_runtime_init_generated.h b/Include/internal/pycore_runtime_init_generated.h index 314837c5b3f288..8a1fda294aa65c 100644 --- a/Include/internal/pycore_runtime_init_generated.h +++ b/Include/internal/pycore_runtime_init_generated.h @@ -831,6 +831,7 @@ extern "C" { INIT_ID(byte_size), \ INIT_ID(byteorder), \ INIT_ID(bytes), \ + INIT_ID(bytes_le), \ INIT_ID(bytes_per_sep), \ INIT_ID(c_call), \ INIT_ID(c_exception), \ @@ -1011,6 +1012,7 @@ extern "C" { INIT_ID(hash_name), \ INIT_ID(header), \ INIT_ID(headers), \ + INIT_ID(hex), \ INIT_ID(hi), \ INIT_ID(hook), \ INIT_ID(hour), \ @@ -1042,6 +1044,7 @@ extern "C" { INIT_ID(insert_comments), \ INIT_ID(insert_pis), \ INIT_ID(instructions), \ + INIT_ID(int), \ INIT_ID(intern), \ INIT_ID(intersection), \ INIT_ID(interval), \ @@ -1049,6 +1052,7 @@ extern "C" { INIT_ID(is_compress), \ INIT_ID(is_raw), \ INIT_ID(is_running), \ + INIT_ID(is_safe), \ INIT_ID(is_struct), \ INIT_ID(isatty), \ INIT_ID(isinstance), \ diff --git a/Include/internal/pycore_unicodeobject_generated.h b/Include/internal/pycore_unicodeobject_generated.h index 45b00a20a07dda..643a05fd906519 100644 --- a/Include/internal/pycore_unicodeobject_generated.h +++ b/Include/internal/pycore_unicodeobject_generated.h @@ -1084,6 +1084,10 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(bytes_le); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); string = &_Py_ID(bytes_per_sep); _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); @@ -1804,6 +1808,10 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(hex); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); string = &_Py_ID(hi); _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); @@ -1928,6 +1936,10 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(int); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); string = &_Py_ID(intern); _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); @@ -1956,6 +1968,10 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(is_safe); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); string = &_Py_ID(is_struct); _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); diff --git a/Modules/_uuidmodule.c b/Modules/_uuidmodule.c index 1c8695dcbcad5e..c06192ee476831 100644 --- a/Modules/_uuidmodule.c +++ b/Modules/_uuidmodule.c @@ -21,6 +21,7 @@ #include "pycore_long.h" // _PyLong_FromByteArray, _PyLong_AsByteArray #include "pycore_pylifecycle.h" // _PyOS_URandom() #include "pycore_time.h" // PyTime_Time +#include "pycore_runtime_init.h" // _Py_ID() #if defined(HAVE_UUID_H) // AIX, FreeBSD, libuuid with pkgconf @@ -1450,14 +1451,14 @@ _uuid_UUID___getstate___impl(uuidobject *self) Py_DECREF(dict); return NULL; } - if (PyDict_SetItemString(dict, "int", int_value) < 0) { + if (PyDict_SetItem(dict, &_Py_ID(int), int_value) < 0) { Py_DECREF(int_value); Py_DECREF(dict); return NULL; } Py_DECREF(int_value); - if (PyDict_SetItemString(dict, "is_safe", self->is_safe) < 0) { + if (PyDict_SetItem(dict, &_Py_ID(is_safe), self->is_safe) < 0) { Py_DECREF(dict); return NULL; } @@ -1488,7 +1489,7 @@ _uuid_UUID___setstate___impl(uuidobject *self, PyObject *state) } // Get and set the 'int' value - PyObject *int_value = PyDict_GetItemString(state, "int"); + PyObject *int_value = PyDict_GetItem(state, &_Py_ID(int)); if (int_value == NULL) { PyErr_SetString(PyExc_ValueError, "state must have 'int' key"); return NULL; @@ -1499,7 +1500,7 @@ _uuid_UUID___setstate___impl(uuidobject *self, PyObject *state) } // Get and set 'is_safe' if present - PyObject *is_safe = PyDict_GetItemString(state, "is_safe"); + PyObject *is_safe = PyDict_GetItem(state, &_Py_ID(is_safe)); if (is_safe != NULL) { // is_safe is the integer value, we need to call SafeUUID(value) PyObject *safe_uuid_member = PyObject_CallOneArg(module_state->safe_uuid, is_safe); From 4ba14a84b372db7a4b4dbe1e8094f9f2deec2d8f Mon Sep 17 00:00:00 2001 From: Yury Selivanov Date: Sun, 21 Sep 2025 19:51:37 +0100 Subject: [PATCH 09/16] Ensure total compatibility of C/Python implementations of uuid4 / uuid7 A side effect of some compatility fixes is the new code, with which the new C uuid7() is now 35x faster that pure Python (used to be 30x). --- Lib/test/test_uuid.py | 67 ++++++++++++ Lib/uuid.py | 37 ++++++- Modules/_uuidmodule.c | 183 +++++++++++++++++++++++++++------ Modules/clinic/_uuidmodule.c.h | 64 +++++++++++- 4 files changed, 312 insertions(+), 39 deletions(-) diff --git a/Lib/test/test_uuid.py b/Lib/test/test_uuid.py index a315268c3d3cb7..5648dc68946efe 100755 --- a/Lib/test/test_uuid.py +++ b/Lib/test/test_uuid.py @@ -1524,6 +1524,24 @@ def test_windll_getnode(self): self.check_node(node) +class UuidHooks: + + def __init__(self, *, start_at=0, inc_by=0, seed=0): + self._time = start_at + self._inc_by = inc_by + self._rnd = random.Random(seed) + + def random_func (self, size) : + ret = b'' + for _ in range(size) : + ret += self._rnd.getrandbits(8).to_bytes(1, 'big') + return ret + + def time_func(self): + self._time += self._inc_by + return self._time + + @unittest.skipUnless(c_uuid, "requires the C _uuid module") class TestCImplementationCompat(unittest.TestCase): def test_compatibility(self): @@ -1595,6 +1613,55 @@ def full_test(p, u): self.assertEqual(len(all_ps), len(all_us)) self.assertEqual(len(all_ps), len(uuids)) + def _install_hooks(self, uuid_mod, *, start_at=0, inc_by=0, seed=0): + py_hooks = UuidHooks(start_at=start_at, inc_by=inc_by, seed=seed) + uuid_mod._install_py_hooks( + random_func=py_hooks.random_func, + time_func=py_hooks.time_func + ) + + c_hooks = UuidHooks(start_at=start_at, inc_by=inc_by, seed=seed) + uuid_mod._install_c_hooks( + random_func=c_hooks.random_func, + time_func=c_hooks.time_func + ) + + def _reset_hooks(self, uuid_mod): + uuid_mod._install_c_hooks(random_func=None, time_func=None) + uuid_mod._install_py_hooks(random_func=None, time_func=None) + + def test_exact_same_algo_uuid4(self): + import uuid + + self._install_hooks(uuid) + try: + for seq_number in range(100): + with self.subTest(seq_number=seq_number): + self.assertEqual( + uuid._py_uuid4().hex, + uuid._c_uuid4().hex, + ) + finally: + self._reset_hooks(uuid) + + def test_exact_same_algo_uuid7(self): + import uuid + + try: + for start_at, inc_by in [ + (0, 0), (0, 10_000_000), (10_000_000 + 142, 1131827398127397) + ]: + self._install_hooks(uuid, start_at=start_at, inc_by=inc_by) + for seq_number in range(100): + with self.subTest( + seq_number=seq_number, start_at=start_at, inc_by=inc_by, + ): + self.assertEqual( + uuid._py_uuid7().hex, + uuid._c_uuid7().hex, + ) + finally: + self._reset_hooks(uuid) if __name__ == '__main__': diff --git a/Lib/uuid.py b/Lib/uuid.py index 03206dd28faf61..e72129f6d604eb 100644 --- a/Lib/uuid.py +++ b/Lib/uuid.py @@ -111,6 +111,33 @@ class SafeUUID: _RFC_4122_VERSION_7_FLAGS = ((7 << 76) | (0x8000 << 48)) _RFC_4122_VERSION_8_FLAGS = ((8 << 76) | (0x8000 << 48)) +_random_hook = None +_time_hook = None + + +def _gen_random(size): + if _random_hook is None: + return os.urandom(size) + else: + return _random_hook(size) + + +def _gen_time(): + if _time_hook is None: + return time.time_ns() + else: + return _time_hook() + + +def _install_py_hooks(*, random_func, time_func): + global _random_hook + global _time_hook + global _last_timestamp_v7 + _random_hook = random_func + _time_hook = time_func + # Reset _last_timestamp_v7 for repeatability of tests + _last_timestamp_v7 = None + # Import optional C extension at toplevel, to help disabling it when testing try: @@ -783,7 +810,7 @@ def uuid3(namespace, name): def uuid4(): """Generate a random UUID.""" - int_uuid_4 = int.from_bytes(os.urandom(16)) + int_uuid_4 = int.from_bytes(_gen_random(16)) int_uuid_4 &= _RFC_4122_CLEARFLAGS_MASK int_uuid_4 |= _RFC_4122_VERSION_4_FLAGS return UUID._from_int(int_uuid_4) @@ -843,7 +870,8 @@ def uuid6(node=None, clock_seq=None): _last_counter_v7 = 0 # 42-bit counter def _uuid7_get_counter_and_tail(): - rand = int.from_bytes(os.urandom(10)) + rand = int.from_bytes(_gen_random(10), 'big') + # 42-bit counter with MSB set to 0 counter = (rand >> 32) & 0x1ff_ffff_ffff # 32-bit random data @@ -872,7 +900,7 @@ def uuid7(): global _last_timestamp_v7 global _last_counter_v7 - nanoseconds = time.time_ns() + nanoseconds = _gen_time() timestamp_ms = nanoseconds // 1_000_000 if _last_timestamp_v7 is None or timestamp_ms > _last_timestamp_v7: @@ -888,7 +916,7 @@ def uuid7(): counter, tail = _uuid7_get_counter_and_tail() else: # 32-bit random data - tail = int.from_bytes(os.urandom(4)) + tail = int.from_bytes(_gen_random(4)) unix_ts_ms = timestamp_ms & 0xffff_ffff_ffff counter_msbs = counter >> 30 @@ -946,6 +974,7 @@ def uuid8(a=None, b=None, c=None): _py_UUID = UUID try: from _uuid import UUID, uuid4, uuid7 + from _uuid import _install_c_hooks except ImportError: _c_UUID = None _c_uuid4 = None diff --git a/Modules/_uuidmodule.c b/Modules/_uuidmodule.c index c06192ee476831..fa882c3209570b 100644 --- a/Modules/_uuidmodule.c +++ b/Modules/_uuidmodule.c @@ -176,9 +176,13 @@ typedef struct { PyObject *reserved_microsoft; PyObject *reserved_future; - // UUID v7 state - uint64_t last_timestamp_v7; - uint64_t last_counter_v7; + PyObject *random_func; + PyObject *random_size_int; + PyObject *time_func; + + // A freelist for uuid objects -- 15-20% performance boost. + uuidobject *freelist; + uint64_t freelist_size; // We overfetch entropy to speed up successive uuid generations; // this enables 10x peformance boost. @@ -186,9 +190,10 @@ typedef struct { uint64_t random_idx; uint64_t random_last_pid; - // A freelist for uuid objects -- 15-20% performance boost. - uuidobject *freelist; - uint64_t freelist_size; + // UUID v7 state + uint64_t last_timestamp_v7; + uint64_t last_counter_v7; + uint8_t last_timestamp_v7_init; } uuid_state; #include "clinic/_uuidmodule.c.h" @@ -232,6 +237,27 @@ get_uuid_state_by_cls(PyTypeObject *cls) // Forward declaration static PyObject *uuid_from_bytes_array(PyTypeObject *type, uint8_t bytes[16]); +static int +gen_time(uuid_state *state, PyTime_t* time) +{ + if (state->time_func == NULL) { + return PyTime_Time(time); + } + + PyObject *ret = PyObject_CallNoArgs(state->time_func); + if (ret == NULL) { + return -1; + } + + if (!PyLong_CheckExact(ret)) { + PyErr_SetString(PyExc_ValueError, "random_time must return int"); + } + + int res = PyLong_AsInt64(ret, time); + Py_DECREF(ret); + return res; +} + static int gen_random(uuid_state *state, uint8_t *bytes, Py_ssize_t size) { @@ -251,19 +277,62 @@ gen_random(uuid_state *state, uint8_t *bytes, Py_ssize_t size) state->random_idx = RANDOM_BUF_SIZE; } - if (state->random_idx + size <= RANDOM_BUF_SIZE) { + if (state->random_idx + size < RANDOM_BUF_SIZE) { memcpy(bytes, state->random_buf + state->random_idx, size); state->random_idx += size; } else { - // Pure Python implementation uses os.urandom() which - // wraps _PyOS_URandom - if (_PyOS_URandom(state->random_buf, RANDOM_BUF_SIZE) < 0) { - return -1; + if (state->random_idx < RANDOM_BUF_SIZE) { + // We exhaustively consume cached entropy. We do that + // because we have tests that compare Python and C + // implementations and it's important that they see incoming + // entropy as a continuous stream. + // The overhead here must be negligible, but we want the same + // code to be run in production and in tests. + Py_ssize_t partial = RANDOM_BUF_SIZE - state->random_idx; + memcpy(bytes, state->random_buf + state->random_idx, partial); + bytes += partial; + size -= partial; + } + + if (state->random_func != NULL) { + PyObject *buf = PyObject_CallOneArg( + state->random_func, state->random_size_int); + if (buf == NULL) { + return -1; + } + + if (!PyBytes_CheckExact(buf)) { + PyErr_SetString(PyExc_ValueError, "random_func must return bytes"); + Py_DECREF(buf); + return -1; + } + + if (PyBytes_Size(buf) != (Py_ssize_t)RANDOM_BUF_SIZE) { + PyErr_Format( + PyExc_ValueError, + "random_func must return bytes of length %zd exactly", + (Py_ssize_t)RANDOM_BUF_SIZE + ); + Py_DECREF(buf); + return -1; + } + + memcpy(state->random_buf, PyBytes_AsString(buf), RANDOM_BUF_SIZE); + Py_DECREF(buf); + } + else { + // Pure Python implementation uses os.urandom() which + // wraps _PyOS_URandom + if (_PyOS_URandom(state->random_buf, RANDOM_BUF_SIZE) < 0) { + return -1; + } } + memcpy(bytes, state->random_buf, size); state->random_idx = size; } + return 0; } @@ -293,25 +362,21 @@ _uuid_uuid4_impl(PyObject *module) } static inline int -uuid7_get_counter_and_tail(uuid_state *state, uint64_t *counter, uint32_t *tail) +uuid7_get_counter_and_tail(uuid_state *state, uint64_t *counter, uint8_t *tail) { uint8_t rand_bytes[10]; if (gen_random(state, rand_bytes, 10) < 0) { return -1; } - uint16_t high = ((uint16_t)rand_bytes[0] << 8) | rand_bytes[1]; - uint64_t low = ((uint64_t)rand_bytes[2] << 56) | - ((uint64_t)rand_bytes[3] << 48) | - ((uint64_t)rand_bytes[4] << 40) | - ((uint64_t)rand_bytes[5] << 32) | - ((uint64_t)rand_bytes[6] << 24) | - ((uint64_t)rand_bytes[7] << 16) | - ((uint64_t)rand_bytes[8] << 8) | - ((uint64_t)rand_bytes[9]); + *counter = (((uint64_t)rand_bytes[0] & 0x01) << 40) | + ((uint64_t)rand_bytes[1] << 32) | + ((uint64_t)rand_bytes[2] << 24) | + ((uint64_t)rand_bytes[3] << 16) | + ((uint64_t)rand_bytes[4] << 8) | + ((uint64_t)rand_bytes[5]); - *counter = (((uint64_t)(high & 0x1FF) << 32) | (low >> 32)) & 0x1FFFFFFFFFF; - *tail = (uint32_t)low; + memcpy(tail, rand_bytes + 6, 4); return 0; } @@ -335,16 +400,15 @@ _uuid_uuid7_impl(PyObject *module) uuid_state *state = get_uuid_state(module); uint8_t bytes[16]; uint64_t timestamp_ms, counter; - uint32_t tail; PyTime_t pytime; - if (PyTime_Time(&pytime) < 0) { + if (gen_time(state, &pytime) < 0) { return NULL; } timestamp_ms = (uint64_t)(pytime / 1000000); - if (state->last_timestamp_v7 == 0 || timestamp_ms > state->last_timestamp_v7) { - if (uuid7_get_counter_and_tail(state, &counter, &tail) < 0) { + if (!state->last_timestamp_v7_init || timestamp_ms > state->last_timestamp_v7) { + if (uuid7_get_counter_and_tail(state, &counter, bytes + 12) < 0) { return NULL; } } else { @@ -356,13 +420,13 @@ _uuid_uuid7_impl(PyObject *module) if (counter > 0x3FFFFFFFFFF) { // advance the 48-bit timestamp timestamp_ms += 1; - if (uuid7_get_counter_and_tail(state, &counter, &tail) < 0) { + if (uuid7_get_counter_and_tail(state, &counter, bytes + 12) < 0) { return NULL; } } else { // This is the common fast path, we only need 4 bytes of entropy // 32-bit random data - if (gen_random(state, (uint8_t *)&tail, 4) < 0) { + if (gen_random(state, bytes + 12, 4) < 0) { return NULL; } } @@ -388,17 +452,53 @@ _uuid_uuid7_impl(PyObject *module) bytes[10] = counter_lo >> 8; bytes[11] = counter_lo; - bytes[12] = tail >> 24; - bytes[13] = tail >> 16; - bytes[14] = tail >> 8; - bytes[15] = tail; - + state->last_timestamp_v7_init = 1; state->last_timestamp_v7 = timestamp_ms; state->last_counter_v7 = counter; return uuid_from_bytes_array(state->UuidType, bytes); } +/*[clinic input] +@critical_section +_uuid._install_c_hooks + + * + random_func: object + time_func: object + +[clinic start generated code]*/ + +static PyObject * +_uuid__install_c_hooks_impl(PyObject *module, PyObject *random_func, + PyObject *time_func) +/*[clinic end generated code: output=884aa6e91b2ea832 input=6c5017297067e2ea]*/ +{ + uuid_state *state = get_uuid_state(module); + + // Reset bufferred entropy -- tests need to always start fresh + // when the random function is changed. Reset last_timestamp_v7 -- + // important for repeatable tests + state->random_idx = RANDOM_BUF_SIZE; + state->last_timestamp_v7_init = 0; + + if (random_func == Py_None) { + Py_CLEAR(state->random_func); + } else { + Py_INCREF(random_func); + Py_XSETREF(state->random_func, random_func); + } + + if (time_func == Py_None) { + Py_CLEAR(state->time_func); + } else { + Py_INCREF(time_func); + Py_XSETREF(state->time_func, time_func); + } + + Py_RETURN_NONE; +} + /*[clinic input] _uuid.UUID.__init__ @@ -1582,6 +1682,9 @@ module_traverse(PyObject *mod, visitproc visit, void *arg) Py_VISIT(state->rfc_4122); Py_VISIT(state->reserved_microsoft); Py_VISIT(state->reserved_future); + Py_VISIT(state->random_func); + Py_VISIT(state->time_func); + Py_VISIT(state->random_size_int); return 0; } @@ -1600,6 +1703,9 @@ module_clear(PyObject *mod) Py_CLEAR(state->rfc_4122); Py_CLEAR(state->reserved_microsoft); Py_CLEAR(state->reserved_future); + Py_CLEAR(state->random_func); + Py_CLEAR(state->time_func); + Py_CLEAR(state->random_size_int); if (state->freelist != NULL) { while (state->freelist != NULL) { @@ -1714,9 +1820,17 @@ uuid_exec(PyObject *module) } state->last_timestamp_v7 = 0; + state->last_timestamp_v7_init = 0; state->last_counter_v7 = 0; state->random_last_pid = uuid_getpid(); + state->time_func = NULL; + state->random_func = NULL; + state->random_size_int = PyLong_FromSize_t((Py_ssize_t)RANDOM_BUF_SIZE); + if (state->random_size_int == NULL) { + goto fail; + } + state->freelist = NULL; state->freelist_size = 0; @@ -1733,6 +1847,7 @@ uuid_exec(PyObject *module) static PyMethodDef uuid_methods[] = { _UUID_UUID4_METHODDEF _UUID_UUID7_METHODDEF + _UUID__INSTALL_C_HOOKS_METHODDEF #if defined(HAVE_UUID_UUID_H) || defined(HAVE_UUID_H) {"generate_time_safe", py_uuid_generate_time_safe, METH_NOARGS, NULL}, #endif diff --git a/Modules/clinic/_uuidmodule.c.h b/Modules/clinic/_uuidmodule.c.h index 3d9c374d83087d..f8fffa9b41b7b7 100644 --- a/Modules/clinic/_uuidmodule.c.h +++ b/Modules/clinic/_uuidmodule.c.h @@ -59,6 +59,68 @@ _uuid_uuid7(PyObject *module, PyObject *Py_UNUSED(ignored)) return return_value; } +PyDoc_STRVAR(_uuid__install_c_hooks__doc__, +"_install_c_hooks($module, /, *, random_func, time_func)\n" +"--\n" +"\n"); + +#define _UUID__INSTALL_C_HOOKS_METHODDEF \ + {"_install_c_hooks", _PyCFunction_CAST(_uuid__install_c_hooks), METH_FASTCALL|METH_KEYWORDS, _uuid__install_c_hooks__doc__}, + +static PyObject * +_uuid__install_c_hooks_impl(PyObject *module, PyObject *random_func, + PyObject *time_func); + +static PyObject * +_uuid__install_c_hooks(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ + PyObject *return_value = NULL; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + + #define NUM_KEYWORDS 2 + static struct { + PyGC_Head _this_is_not_used; + PyObject_VAR_HEAD + Py_hash_t ob_hash; + PyObject *ob_item[NUM_KEYWORDS]; + } _kwtuple = { + .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) + .ob_hash = -1, + .ob_item = { &_Py_ID(random_func), &_Py_ID(time_func), }, + }; + #undef NUM_KEYWORDS + #define KWTUPLE (&_kwtuple.ob_base.ob_base) + + #else // !Py_BUILD_CORE + # define KWTUPLE NULL + #endif // !Py_BUILD_CORE + + static const char * const _keywords[] = {"random_func", "time_func", NULL}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "_install_c_hooks", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE + PyObject *argsbuf[2]; + PyObject *random_func; + PyObject *time_func; + + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, + /*minpos*/ 0, /*maxpos*/ 0, /*minkw*/ 2, /*varpos*/ 0, argsbuf); + if (!args) { + goto exit; + } + random_func = args[0]; + time_func = args[1]; + Py_BEGIN_CRITICAL_SECTION(module); + return_value = _uuid__install_c_hooks_impl(module, random_func, time_func); + Py_END_CRITICAL_SECTION(); + +exit: + return return_value; +} + PyDoc_STRVAR(_uuid_UUID___init____doc__, "UUID(hex=, bytes=None, bytes_le=None,\n" " fields=, int=,\n" @@ -252,4 +314,4 @@ _uuid_UUID___setstate__(PyObject *self, PyObject *state) return return_value; } -/*[clinic end generated code: output=ec50cafa0e028d2b input=a9049054013a1b77]*/ +/*[clinic end generated code: output=095610812af4b3bd input=a9049054013a1b77]*/ From 0d4e6058133fd49ea293073b4c83cf117a48e462 Mon Sep 17 00:00:00 2001 From: Yury Selivanov Date: Sun, 21 Sep 2025 20:48:48 +0100 Subject: [PATCH 10/16] Stop importing unused SafeUUID members --- Modules/_uuidmodule.c | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/Modules/_uuidmodule.c b/Modules/_uuidmodule.c index fa882c3209570b..44708dc415ab83 100644 --- a/Modules/_uuidmodule.c +++ b/Modules/_uuidmodule.c @@ -165,8 +165,6 @@ typedef struct { PyTypeObject *UuidType; PyObject *safe_uuid; - PyObject *safe_uuid_safe; - PyObject *safe_uuid_unsafe; PyObject *safe_uuid_unknown; PyObject *uint128_max; @@ -1674,8 +1672,6 @@ module_traverse(PyObject *mod, visitproc visit, void *arg) uuid_state *state = get_uuid_state(mod); Py_VISIT(state->UuidType); Py_VISIT(state->safe_uuid); - Py_VISIT(state->safe_uuid_safe); - Py_VISIT(state->safe_uuid_unsafe); Py_VISIT(state->safe_uuid_unknown); Py_VISIT(state->uint128_max); Py_VISIT(state->reserved_ncs); @@ -1695,8 +1691,6 @@ module_clear(PyObject *mod) Py_CLEAR(state->UuidType); Py_CLEAR(state->safe_uuid); - Py_CLEAR(state->safe_uuid_safe); - Py_CLEAR(state->safe_uuid_unsafe); Py_CLEAR(state->safe_uuid_unknown); Py_CLEAR(state->uint128_max); Py_CLEAR(state->reserved_ncs); @@ -1781,14 +1775,6 @@ uuid_exec(PyObject *module) if (safe_uuid == NULL) { goto fail; } - state->safe_uuid_safe = PyObject_GetAttrString(safe_uuid, "safe"); - if (state->safe_uuid_safe == NULL) { - goto fail; - } - state->safe_uuid_unsafe = PyObject_GetAttrString(safe_uuid, "unsafe"); - if (state->safe_uuid_unsafe == NULL) { - goto fail; - } state->safe_uuid_unknown = PyObject_GetAttrString(safe_uuid, "unknown"); if (state->safe_uuid_unknown == NULL) { goto fail; From b15b85b9314680a48146655eb3a1dd342fcfb49b Mon Sep 17 00:00:00 2001 From: Yury Selivanov Date: Sun, 21 Sep 2025 20:58:46 +0100 Subject: [PATCH 11/16] Drop SafeUUID.unknown --- Modules/_uuidmodule.c | 76 +++++++++++++++++++++++++------------------ 1 file changed, 45 insertions(+), 31 deletions(-) diff --git a/Modules/_uuidmodule.c b/Modules/_uuidmodule.c index 44708dc415ab83..396f7fc3381303 100644 --- a/Modules/_uuidmodule.c +++ b/Modules/_uuidmodule.c @@ -165,7 +165,6 @@ typedef struct { PyTypeObject *UuidType; PyObject *safe_uuid; - PyObject *safe_uuid_unknown; PyObject *uint128_max; @@ -968,12 +967,7 @@ make_uuid(PyTypeObject *type) } } - // During module initialization, safe_uuid_unknown might not be set yet - if (state->safe_uuid_unknown != NULL) { - self->is_safe = Py_NewRef(state->safe_uuid_unknown); - } else { - self->is_safe = Py_NewRef(Py_None); - } + self->is_safe = NULL; self->weakreflist = NULL; self->cached_hash = -1; @@ -1033,8 +1027,9 @@ static PyObject * Uuid_get_is_safe(PyObject *o, void *closure) { uuidobject *self = (uuidobject *)o; + uuid_state *state = get_uuid_state_by_cls(Py_TYPE(self)); if (self->is_safe == NULL) { - Py_RETURN_NONE; + return PyObject_GetAttrString(state->safe_uuid, "unknown"); } return Py_NewRef(self->is_safe); } @@ -1579,8 +1574,6 @@ static PyObject * _uuid_UUID___setstate___impl(uuidobject *self, PyObject *state) /*[clinic end generated code: output=cdf6bd4a2a680b3f input=b1ec0744788a73a0]*/ { - uuid_state *module_state = get_uuid_state_by_cls(Py_TYPE(self)); - if (!PyDict_Check(state)) { PyErr_SetString(PyExc_TypeError, "state must be a dictionary"); return NULL; @@ -1597,25 +1590,52 @@ _uuid_UUID___setstate___impl(uuidobject *self, PyObject *state) return NULL; } - // Get and set 'is_safe' if present - PyObject *is_safe = PyDict_GetItem(state, &_Py_ID(is_safe)); - if (is_safe != NULL) { - // is_safe is the integer value, we need to call SafeUUID(value) - PyObject *safe_uuid_member = PyObject_CallOneArg(module_state->safe_uuid, is_safe); - if (safe_uuid_member == NULL) { - return NULL; - } - Py_XDECREF(self->is_safe); - self->is_safe = safe_uuid_member; - } else { - // No is_safe in state, set to SafeUUID.unknown - Py_XDECREF(self->is_safe); - self->is_safe = Py_NewRef(module_state->safe_uuid_unknown); + Py_CLEAR(self->is_safe); + if (PyDict_GetItemRef(state, &_Py_ID(is_safe), &self->is_safe) < 0) { + return NULL; } Py_RETURN_NONE; } +static PyObject * +compute_uuid_max(void) +{ + // Compute `(1 << 128) - 1` + + PyObject *one = NULL; + PyObject *shift = NULL; + PyObject *shifted = NULL; + + one = PyLong_FromLong(1); + if (one == NULL) { + goto err; + } + + shift = PyLong_FromLong(128); + if (shift == NULL) { + goto err; + } + + shifted = PyNumber_Lshift(one, shift); + if (shifted == NULL) { + goto err; + } + Py_DECREF(shift); + + PyObject *result = PyNumber_Subtract(shifted, one); + Py_DECREF(shifted); + Py_DECREF(one); + + return result; + +err: + Py_XDECREF(one); + Py_XDECREF(shift); + Py_XDECREF(shifted); + return NULL; +} + static PyMethodDef Uuid_methods[] = { _UUID_UUID__FROM_INT_METHODDEF _UUID_UUID___GETSTATE___METHODDEF @@ -1672,7 +1692,6 @@ module_traverse(PyObject *mod, visitproc visit, void *arg) uuid_state *state = get_uuid_state(mod); Py_VISIT(state->UuidType); Py_VISIT(state->safe_uuid); - Py_VISIT(state->safe_uuid_unknown); Py_VISIT(state->uint128_max); Py_VISIT(state->reserved_ncs); Py_VISIT(state->rfc_4122); @@ -1691,7 +1710,6 @@ module_clear(PyObject *mod) Py_CLEAR(state->UuidType); Py_CLEAR(state->safe_uuid); - Py_CLEAR(state->safe_uuid_unknown); Py_CLEAR(state->uint128_max); Py_CLEAR(state->reserved_ncs); Py_CLEAR(state->rfc_4122); @@ -1775,12 +1793,8 @@ uuid_exec(PyObject *module) if (safe_uuid == NULL) { goto fail; } - state->safe_uuid_unknown = PyObject_GetAttrString(safe_uuid, "unknown"); - if (state->safe_uuid_unknown == NULL) { - goto fail; - } - state->uint128_max = PyObject_GetAttrString(uuid_mod, "_UINT_128_MAX"); + state->uint128_max = compute_uuid_max(); if (state->uint128_max == NULL) { goto fail; } From 37685da6187febc9aa0e7eaa1afb8bf863a60d6e Mon Sep 17 00:00:00 2001 From: Yury Selivanov Date: Sun, 21 Sep 2025 21:06:07 +0100 Subject: [PATCH 12/16] Drop more things that can be imported dynamically --- Modules/_uuidmodule.c | 54 ++++++++++++++----------------------------- 1 file changed, 17 insertions(+), 37 deletions(-) diff --git a/Modules/_uuidmodule.c b/Modules/_uuidmodule.c index 396f7fc3381303..d6629086436cd0 100644 --- a/Modules/_uuidmodule.c +++ b/Modules/_uuidmodule.c @@ -168,11 +168,6 @@ typedef struct { PyObject *uint128_max; - PyObject *reserved_ncs; - PyObject *rfc_4122; - PyObject *reserved_microsoft; - PyObject *reserved_future; - PyObject *random_func; PyObject *random_size_int; PyObject *time_func; @@ -1045,6 +1040,19 @@ Uuid_get_hex(PyObject *o, void *closure) return PyUnicode_FromStringAndSize(hex, 32); } +static PyObject * +get_variant_marker(uuid_state *state, const char *name) +{ + PyObject *mod = PyImport_ImportModule("uuid"); + if (mod == NULL) { + return NULL; + } + + PyObject *ret = PyObject_GetAttrString(mod, name); + Py_DECREF(mod); + return ret; +} + static PyObject * Uuid_get_variant(PyObject *o, void *closure) { @@ -1056,23 +1064,23 @@ Uuid_get_variant(PyObject *o, void *closure) // xxx - three high bits of variant_byte are unknown if (!(variant_byte & 0x80)) { // & 0b1000_0000 // 0xx - RESERVED_NCS - return Py_NewRef(state->reserved_ncs); + return get_variant_marker(state, "RESERVED_NCS"); } // 1xx -- we know that high bit must be 1 if (!(variant_byte & 0x40)) { // & 0b0100_0000 // 10x - RFC_4122 - return Py_NewRef(state->rfc_4122); + return get_variant_marker(state, "RFC_4122"); } // 11x -- we know that two high bits are 1 if (!(variant_byte & 0x20)) { // & 0b0010_0000 // 110 - RESERVED_MICROSOFT - return Py_NewRef(state->reserved_microsoft); + return get_variant_marker(state, "RESERVED_MICROSOFT"); } // 111 -- we know that all three high bits are 1 - RESERVED_FUTURE - return Py_NewRef(state->reserved_future); + return get_variant_marker(state, "RESERVED_FUTURE"); } static int @@ -1693,10 +1701,6 @@ module_traverse(PyObject *mod, visitproc visit, void *arg) Py_VISIT(state->UuidType); Py_VISIT(state->safe_uuid); Py_VISIT(state->uint128_max); - Py_VISIT(state->reserved_ncs); - Py_VISIT(state->rfc_4122); - Py_VISIT(state->reserved_microsoft); - Py_VISIT(state->reserved_future); Py_VISIT(state->random_func); Py_VISIT(state->time_func); Py_VISIT(state->random_size_int); @@ -1711,10 +1715,6 @@ module_clear(PyObject *mod) Py_CLEAR(state->UuidType); Py_CLEAR(state->safe_uuid); Py_CLEAR(state->uint128_max); - Py_CLEAR(state->reserved_ncs); - Py_CLEAR(state->rfc_4122); - Py_CLEAR(state->reserved_microsoft); - Py_CLEAR(state->reserved_future); Py_CLEAR(state->random_func); Py_CLEAR(state->time_func); Py_CLEAR(state->random_size_int); @@ -1799,26 +1799,6 @@ uuid_exec(PyObject *module) goto fail; } - state->reserved_ncs = PyObject_GetAttrString(uuid_mod, "RESERVED_NCS"); - if (state->reserved_ncs == NULL) { - goto fail; - } - - state->rfc_4122 = PyObject_GetAttrString(uuid_mod, "RFC_4122"); - if (state->rfc_4122 == NULL) { - goto fail; - } - - state->reserved_microsoft = PyObject_GetAttrString(uuid_mod, "RESERVED_MICROSOFT"); - if (state->reserved_microsoft == NULL) { - goto fail; - } - - state->reserved_future = PyObject_GetAttrString(uuid_mod, "RESERVED_FUTURE"); - if (state->reserved_future == NULL) { - goto fail; - } - state->last_timestamp_v7 = 0; state->last_timestamp_v7_init = 0; state->last_counter_v7 = 0; From f83d1869db622eb82e86f15a81046de7a231de26 Mon Sep 17 00:00:00 2001 From: Yury Selivanov Date: Sun, 21 Sep 2025 21:29:12 +0100 Subject: [PATCH 13/16] Remove circular import; import 'uuid' from '_uuid' lazily --- Modules/_uuidmodule.c | 97 ++++++++++++++++++++++--------------------- 1 file changed, 49 insertions(+), 48 deletions(-) diff --git a/Modules/_uuidmodule.c b/Modules/_uuidmodule.c index d6629086436cd0..4f5116d3c96764 100644 --- a/Modules/_uuidmodule.c +++ b/Modules/_uuidmodule.c @@ -200,6 +200,7 @@ static int from_hex(uuidobject *self, PyObject *hex); static int from_bytes_le(uuidobject *self, Py_buffer *bytes_le); static int from_int(uuidobject *self, PyObject *int_value, int validate); static int from_fields(uuidobject *self, PyObject *fields); +static PyObject * get_SafeUUID(uuid_state *state); static uint64_t uuid_getpid(void) { @@ -514,8 +515,6 @@ _uuid_UUID___init___impl(uuidobject *self, PyObject *hex, Py_buffer *bytes, /*[clinic end generated code: output=93a6881c8f79bf9b input=b9c79672fbd76a99]*/ { - uuid_state *state = get_uuid_state_by_cls(Py_TYPE(self)); - int passed = 0; if (hex != NULL) passed++; if (bytes->obj != NULL) passed++; @@ -584,13 +583,13 @@ _uuid_UUID___init___impl(uuidobject *self, PyObject *hex, Py_buffer *bytes, self->bytes[6] |= (version_num << 4); } + if (is_safe == Py_None) { + // Py_None is immortal; skip decref + is_safe = NULL; + } if (is_safe != NULL) { - // Validate by calling SafeUUID(is_safe) to ensure it's a valid enum member - PyObject *validated = PyObject_CallOneArg(state->safe_uuid, is_safe); - if (validated == NULL) { - return -1; - } - Py_XSETREF(self->is_safe, validated); + Py_INCREF(is_safe); + Py_XSETREF(self->is_safe, is_safe); } return 0; @@ -937,6 +936,33 @@ get_int(uuidobject *self) return _PyLong_FromByteArray((unsigned char *)self->bytes, 16, 0, 0); } +static PyObject * +get_uuidmod_attr(uuid_state *state, const char *name) +{ + PyObject *mod = PyImport_ImportModule("uuid"); + if (mod == NULL) { + return NULL; + } + + PyObject *ret = PyObject_GetAttrString(mod, name); + Py_DECREF(mod); + return ret; +} + +static PyObject * +get_SafeUUID(uuid_state *state) +{ + if (state->safe_uuid != NULL) { + return Py_NewRef(state->safe_uuid); + } + + state->safe_uuid = get_uuidmod_attr(state, "SafeUUID"); + if (state->safe_uuid == NULL) { + return NULL; + } + return Py_NewRef(state->safe_uuid); +} + static uuidobject * make_uuid(PyTypeObject *type) { @@ -1024,7 +1050,10 @@ Uuid_get_is_safe(PyObject *o, void *closure) uuidobject *self = (uuidobject *)o; uuid_state *state = get_uuid_state_by_cls(Py_TYPE(self)); if (self->is_safe == NULL) { - return PyObject_GetAttrString(state->safe_uuid, "unknown"); + PyObject *SafeUUID = get_SafeUUID(state); + PyObject *ret = PyObject_GetAttrString(SafeUUID, "unknown"); + Py_DECREF(SafeUUID); + return ret; } return Py_NewRef(self->is_safe); } @@ -1040,19 +1069,6 @@ Uuid_get_hex(PyObject *o, void *closure) return PyUnicode_FromStringAndSize(hex, 32); } -static PyObject * -get_variant_marker(uuid_state *state, const char *name) -{ - PyObject *mod = PyImport_ImportModule("uuid"); - if (mod == NULL) { - return NULL; - } - - PyObject *ret = PyObject_GetAttrString(mod, name); - Py_DECREF(mod); - return ret; -} - static PyObject * Uuid_get_variant(PyObject *o, void *closure) { @@ -1064,23 +1080,23 @@ Uuid_get_variant(PyObject *o, void *closure) // xxx - three high bits of variant_byte are unknown if (!(variant_byte & 0x80)) { // & 0b1000_0000 // 0xx - RESERVED_NCS - return get_variant_marker(state, "RESERVED_NCS"); + return get_uuidmod_attr(state, "RESERVED_NCS"); } // 1xx -- we know that high bit must be 1 if (!(variant_byte & 0x40)) { // & 0b0100_0000 // 10x - RFC_4122 - return get_variant_marker(state, "RFC_4122"); + return get_uuidmod_attr(state, "RFC_4122"); } // 11x -- we know that two high bits are 1 if (!(variant_byte & 0x20)) { // & 0b0010_0000 // 110 - RESERVED_MICROSOFT - return get_variant_marker(state, "RESERVED_MICROSOFT"); + return get_uuidmod_attr(state, "RESERVED_MICROSOFT"); } // 111 -- we know that all three high bits are 1 - RESERVED_FUTURE - return get_variant_marker(state, "RESERVED_FUTURE"); + return get_uuidmod_attr(state, "RESERVED_FUTURE"); } static int @@ -1743,13 +1759,11 @@ static int uuid_exec(PyObject *module) { uuid_state *state = get_uuid_state(module); - PyObject *uuid_mod = NULL; - PyObject *safe_uuid = NULL; #define ADD_INT(NAME, VALUE) \ do { \ if (PyModule_AddIntConstant(module, (NAME), (VALUE)) < 0) { \ - goto fail; \ + return -1; \ } \ } while (0) @@ -1779,24 +1793,17 @@ uuid_exec(PyObject *module) NULL ); if (state->UuidType == NULL) { - goto fail; + return -1; } if (PyModule_AddType(module, state->UuidType) < 0) { - goto fail; + return -1; } - uuid_mod = PyImport_ImportModule("uuid"); - if (uuid_mod == NULL) { - goto fail; - } - safe_uuid = state->safe_uuid = PyObject_GetAttrString(uuid_mod, "SafeUUID"); - if (safe_uuid == NULL) { - goto fail; - } + state->safe_uuid = NULL; state->uint128_max = compute_uuid_max(); if (state->uint128_max == NULL) { - goto fail; + return -1; } state->last_timestamp_v7 = 0; @@ -1808,20 +1815,14 @@ uuid_exec(PyObject *module) state->random_func = NULL; state->random_size_int = PyLong_FromSize_t((Py_ssize_t)RANDOM_BUF_SIZE); if (state->random_size_int == NULL) { - goto fail; + return -1; } + state->random_idx = RANDOM_BUF_SIZE; state->freelist = NULL; state->freelist_size = 0; - state->random_idx = RANDOM_BUF_SIZE; - - Py_CLEAR(uuid_mod); return 0; - -fail: - Py_CLEAR(uuid_mod); - return -1; } static PyMethodDef uuid_methods[] = { From baacd100acfbafafc55bf9a45d2661392561dc5b Mon Sep 17 00:00:00 2001 From: Yury Selivanov Date: Sun, 21 Sep 2025 21:22:47 -0700 Subject: [PATCH 14/16] Regen files --- Include/internal/pycore_global_objects_fini_generated.h | 2 ++ Include/internal/pycore_global_strings.h | 2 ++ Include/internal/pycore_runtime_init_generated.h | 2 ++ Include/internal/pycore_unicodeobject_generated.h | 8 ++++++++ 4 files changed, 14 insertions(+) diff --git a/Include/internal/pycore_global_objects_fini_generated.h b/Include/internal/pycore_global_objects_fini_generated.h index e99af5a60ab2c1..5c938b487c6c31 100644 --- a/Include/internal/pycore_global_objects_fini_generated.h +++ b/Include/internal/pycore_global_objects_fini_generated.h @@ -1212,6 +1212,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(query)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(queuetype)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(quotetabs)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(random_func)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(raw)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(read)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(read1)); @@ -1315,6 +1316,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(threading)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(throw)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(time)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(time_func)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(timeout)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(timer)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(times)); diff --git a/Include/internal/pycore_global_strings.h b/Include/internal/pycore_global_strings.h index a3e86da7c50304..3954ffa0a55d8d 100644 --- a/Include/internal/pycore_global_strings.h +++ b/Include/internal/pycore_global_strings.h @@ -703,6 +703,7 @@ struct _Py_global_strings { STRUCT_FOR_ID(query) STRUCT_FOR_ID(queuetype) STRUCT_FOR_ID(quotetabs) + STRUCT_FOR_ID(random_func) STRUCT_FOR_ID(raw) STRUCT_FOR_ID(read) STRUCT_FOR_ID(read1) @@ -806,6 +807,7 @@ struct _Py_global_strings { STRUCT_FOR_ID(threading) STRUCT_FOR_ID(throw) STRUCT_FOR_ID(time) + STRUCT_FOR_ID(time_func) STRUCT_FOR_ID(timeout) STRUCT_FOR_ID(timer) STRUCT_FOR_ID(times) diff --git a/Include/internal/pycore_runtime_init_generated.h b/Include/internal/pycore_runtime_init_generated.h index 8a1fda294aa65c..20bd9186fb455a 100644 --- a/Include/internal/pycore_runtime_init_generated.h +++ b/Include/internal/pycore_runtime_init_generated.h @@ -1210,6 +1210,7 @@ extern "C" { INIT_ID(query), \ INIT_ID(queuetype), \ INIT_ID(quotetabs), \ + INIT_ID(random_func), \ INIT_ID(raw), \ INIT_ID(read), \ INIT_ID(read1), \ @@ -1313,6 +1314,7 @@ extern "C" { INIT_ID(threading), \ INIT_ID(throw), \ INIT_ID(time), \ + INIT_ID(time_func), \ INIT_ID(timeout), \ INIT_ID(timer), \ INIT_ID(times), \ diff --git a/Include/internal/pycore_unicodeobject_generated.h b/Include/internal/pycore_unicodeobject_generated.h index 643a05fd906519..c39eea669b0c4e 100644 --- a/Include/internal/pycore_unicodeobject_generated.h +++ b/Include/internal/pycore_unicodeobject_generated.h @@ -2600,6 +2600,10 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(random_func); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); string = &_Py_ID(raw); _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); @@ -3012,6 +3016,10 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(time_func); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); string = &_Py_ID(timeout); _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); From bd673501c17e393a5e8276b537eb4d007a7b1bb2 Mon Sep 17 00:00:00 2001 From: Yury Selivanov Date: Sun, 21 Sep 2025 21:41:05 -0700 Subject: [PATCH 15/16] Get module state assuming there could be subclasses of UUID This fixes a segfault; regr test added. While there, make the freelist only work with our base UUID type and no subclasses of it. --- Lib/test/test_uuid.py | 11 +++++++++++ Modules/_uuidmodule.c | 19 +++++++++++++++---- 2 files changed, 26 insertions(+), 4 deletions(-) diff --git a/Lib/test/test_uuid.py b/Lib/test/test_uuid.py index 5648dc68946efe..d171c404aef9a5 100755 --- a/Lib/test/test_uuid.py +++ b/Lib/test/test_uuid.py @@ -1663,6 +1663,17 @@ def test_exact_same_algo_uuid7(self): finally: self._reset_hooks(uuid) + def test_subclassing(self): + import uuid + + class U(uuid._c_UUID): + pass + + u = U(int=1) + u_str = str(u) + del u + self.assertEqual(u_str, '00000000-0000-0000-0000-000000000001') + if __name__ == '__main__': unittest.main() diff --git a/Modules/_uuidmodule.c b/Modules/_uuidmodule.c index 4f5116d3c96764..4d992a9087f54f 100644 --- a/Modules/_uuidmodule.c +++ b/Modules/_uuidmodule.c @@ -211,6 +211,8 @@ uuid_getpid(void) { #endif } +static struct PyModuleDef uuidmodule; + static inline uuid_state * get_uuid_state(PyObject *mod) { @@ -222,7 +224,9 @@ get_uuid_state(PyObject *mod) static inline uuid_state * get_uuid_state_by_cls(PyTypeObject *cls) { - uuid_state *state = (uuid_state *)PyType_GetModuleState(cls); + PyObject *module = PyType_GetModuleByDef(cls, &uuidmodule); + assert(module != NULL); + uuid_state *state = get_uuid_state(module); assert(state != NULL); return state; } @@ -1019,18 +1023,25 @@ Uuid_dealloc(PyObject *obj) } Py_CLEAR(uuid->is_safe); - int added_to_freelist = 0; + int skip_dealloc = 0; + if (type != state->UuidType) { + // We only apply the freelist optimization to the known C type, + // not any subclasses of it. + goto dealloc; + } + Py_BEGIN_CRITICAL_SECTION(type); if (state->freelist_size < MAX_FREE_LIST_SIZE) { uuidobject *head = state->freelist; state->freelist = uuid; uuid->weakreflist = (PyObject *)head; state->freelist_size++; - added_to_freelist = 1; + skip_dealloc = 1; } Py_END_CRITICAL_SECTION(); - if (!added_to_freelist) { +dealloc: + if (!skip_dealloc) { type->tp_free(uuid); // UUID is a heap allocated type so we have to decref the type ref Py_DECREF(type); From b10a1f7c68d3defd49f7b761c70d20a1bb9b12e3 Mon Sep 17 00:00:00 2001 From: Yury Selivanov Date: Sun, 21 Sep 2025 21:50:15 -0700 Subject: [PATCH 16/16] More ergonomic int cast code --- Modules/_uuidmodule.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/Modules/_uuidmodule.c b/Modules/_uuidmodule.c index 4d992a9087f54f..0ed8a1274e7b3a 100644 --- a/Modules/_uuidmodule.c +++ b/Modules/_uuidmodule.c @@ -837,10 +837,8 @@ extract_field( goto fail; } - int overflow; - uint64_t value = PyLong_AsLongLongAndOverflow(field, &overflow); - if (overflow || (value == (uint64_t)-1 && PyErr_Occurred())) { - PyErr_SetString(PyExc_ValueError, error_msg); + uint64_t value; + if (PyLong_AsUInt64(field, &value) < 0) { goto fail; }