Skip to content

Commit e6754e9

Browse files
gh-142889: Address PR review comments on PyDictKeysObject restructure
replace char dk_indices[] with explicit union named dk_entries because union has non-zero size, update allocation and size reporting to use offsetof in place of sizeof, keeping actual memory footprint unchanged remove _DK_INDICES_END(), inline uses in macros adds PyDictKeysObject pointer note to layout diagram for dictobject.c
1 parent cc0a356 commit e6754e9

3 files changed

Lines changed: 22 additions & 41 deletions

File tree

Include/internal/pycore_dict.h

Lines changed: 11 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -198,29 +198,13 @@ struct _dictkeysobject {
198198
/* Number of used entries in dk_entries. */
199199
Py_ssize_t dk_nentries;
200200

201-
/* Offset to entries within this allocation.
202-
203-
PyDictKeysObject * points to dk_refcnt. The actual hash table
204-
(dk_indices) is stored immediately before the struct in memory;
205-
see _DK_INDICES_END() and _DK_INDICES_BASE().
206-
207-
dk_indices marks the start of the entries array and is used by
208-
DK_ENTRIES() / DK_UNICODE_ENTRIES(). */
209-
char dk_indices[]; /* char is required to avoid strict aliasing. */
210-
211-
/* dk_indices is the actual hash table of dk_size entries. It holds
212-
indices in dk_entries, or DKIX_EMPTY(-1) or DKIX_DUMMY(-2).
213-
214-
Indices must be: 0 <= indice < USABLE_FRACTION(dk_size).
215-
216-
The size in bytes of an indice depends on dk_size:
217-
218-
- 1 byte if dk_size <= 0xff (char*)
219-
- 2 bytes if dk_size <= 0xffff (int16_t*)
220-
- 4 bytes if dk_size <= 0xffffffff (int32_t*)
221-
- 8 bytes otherwise (int64_t*)
222-
223-
Dynamically sized, SIZEOF_VOID_P is minimum. */
201+
/* The actual hash table (dk_indices) is stored immediately before this
202+
struct in memory (negative offsets from dk); see _DK_INDICES_BASE().
203+
The entries array is stored here, at the end of the struct. */
204+
union {
205+
PyDictKeyEntry entries[1];
206+
PyDictUnicodeEntry unicode_entries[1];
207+
} dk_entries;
224208
};
225209

226210
/* This must be no more than 250, for the prefix size to fit in one byte. */
@@ -248,10 +232,6 @@ struct _dictvalues {
248232
#define DK_SIZE(dk) (1<<DK_LOG_SIZE(dk))
249233
#endif
250234

251-
static inline void* _DK_INDICES_END(const PyDictKeysObject *dk) {
252-
return (void *)dk;
253-
}
254-
255235
static inline void* _DK_INDICES_BASE(const PyDictKeysObject *dk) {
256236
size_t indices_size = (size_t)1 << dk->dk_log2_index_bytes;
257237
return (char *)dk - indices_size;
@@ -262,16 +242,17 @@ static inline void* _DK_ALLOC_BASE(PyDictKeysObject *dk) {
262242
}
263243

264244
static inline void* _DK_ENTRIES(PyDictKeysObject *dk) {
265-
return (void *)(&dk->dk_indices[0]);
245+
return (void *)(&dk->dk_entries);
266246
}
267247

268248
static inline PyDictKeyEntry* DK_ENTRIES(PyDictKeysObject *dk) {
269249
assert(dk->dk_kind == DICT_KEYS_GENERAL);
270-
return (PyDictKeyEntry*)_DK_ENTRIES(dk);
250+
return dk->dk_entries.entries;
271251
}
252+
272253
static inline PyDictUnicodeEntry* DK_UNICODE_ENTRIES(PyDictKeysObject *dk) {
273254
assert(dk->dk_kind != DICT_KEYS_GENERAL);
274-
return (PyDictUnicodeEntry*)_DK_ENTRIES(dk);
255+
return dk->dk_entries.unicode_entries;
275256
}
276257

277258
#define DK_IS_UNICODE(dk) ((dk)->dk_kind != DICT_KEYS_GENERAL)

Modules/_testinternalcapi.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1942,12 +1942,12 @@ dict_check_indices_layout(PyObject *self, PyObject *arg)
19421942

19431943
bool ok = true;
19441944
ok &= (header == base + indices_size);
1945-
ok &= (entries == header + sizeof(PyDictKeysObject));
1945+
ok &= (entries == header + offsetof(PyDictKeysObject, dk_entries));
19461946

19471947
size_t index_bytes = dict_index_bytes_for_keys(keys);
19481948
char *idx_base = (char *)_DK_INDICES_BASE(keys);
19491949
/* Index 0 is stored immediately before the header. */
1950-
char *idx0 = (char *)_DK_INDICES_END(keys) - (ptrdiff_t)index_bytes;
1950+
char *idx0 = (char *)keys - (ptrdiff_t)index_bytes;
19511951
ok &= (idx0 == idx_base + indices_size - (ptrdiff_t)index_bytes);
19521952

19531953
return PyBool_FromLong(ok);

Objects/dictobject.c

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ As of Python 3.6, this is compact and ordered. Basic idea is described here:
1919
+---------------------+
2020
| dk_indices[] |
2121
| |
22-
+---------------------+
22+
+---------------------+ <---- PyDictKeysObject* pointer points here
2323
| dk_refcnt |
2424
| dk_log2_size |
2525
| dk_log2_index_bytes |
@@ -182,8 +182,8 @@ ASSERT_DICT_LOCKED(PyObject *op)
182182

183183
#define IS_DICT_SHARED(mp) _PyObject_GC_IS_SHARED(mp)
184184
#define SET_DICT_SHARED(mp) _PyObject_GC_SET_SHARED(mp)
185-
#define LOAD_INDEX(keys, size, idx) _Py_atomic_load_int##size##_relaxed(&((const int##size##_t*)_DK_INDICES_END(keys))[-1 - (idx)]);
186-
#define STORE_INDEX(keys, size, idx, value) _Py_atomic_store_int##size##_relaxed(&((int##size##_t*)_DK_INDICES_END(keys))[-1 - (idx)], (int##size##_t)value);
185+
#define LOAD_INDEX(keys, size, idx) _Py_atomic_load_int##size##_relaxed(&((const int##size##_t*)(keys))[-1 - (idx)]);
186+
#define STORE_INDEX(keys, size, idx, value) _Py_atomic_store_int##size##_relaxed(&((int##size##_t*)(keys))[-1 - (idx)], (int##size##_t)value);
187187
#define ASSERT_OWNED_OR_SHARED(mp) \
188188
assert(_Py_IsOwnedByCurrentThread((PyObject *)mp) || IS_DICT_SHARED(mp));
189189

@@ -262,8 +262,8 @@ static inline void split_keys_entry_added(PyDictKeysObject *keys)
262262
#define UNLOCK_KEYS_IF_SPLIT(keys, kind)
263263
#define IS_DICT_SHARED(mp) (false)
264264
#define SET_DICT_SHARED(mp)
265-
#define LOAD_INDEX(keys, size, idx) ((const int##size##_t*)_DK_INDICES_END(keys))[-1 - (idx)]
266-
#define STORE_INDEX(keys, size, idx, value) ((int##size##_t*)_DK_INDICES_END(keys))[-1 - (idx)] = (int##size##_t)value
265+
#define LOAD_INDEX(keys, size, idx) ((const int##size##_t*)(keys))[-1 - (idx)]
266+
#define STORE_INDEX(keys, size, idx, value) ((int##size##_t*)(keys))[-1 - (idx)] = (int##size##_t)value
267267

268268
static inline void split_keys_entry_added(PyDictKeysObject *keys)
269269
{
@@ -651,7 +651,7 @@ static const _PyDict_EmptyKeysStorage empty_keys_storage = {
651651
1, /* dk_version */
652652
0, /* dk_usable (immutable) */
653653
0, /* dk_nentries */
654-
{},
654+
{}, /* dk_entries */
655655
}
656656
};
657657

@@ -836,7 +836,7 @@ new_keys_object(uint8_t log2_size, bool unicode)
836836

837837
if (base == NULL) {
838838
base = PyMem_Malloc(indices_size
839-
+ sizeof(PyDictKeysObject)
839+
+ offsetof(PyDictKeysObject, dk_entries)
840840
+ entry_size * usable);
841841
if (base == NULL) {
842842
PyErr_NoMemory();
@@ -859,7 +859,7 @@ new_keys_object(uint8_t log2_size, bool unicode)
859859
dk->dk_usable = usable;
860860
dk->dk_version = 0;
861861
memset(_DK_INDICES_BASE(dk), 0xff, indices_size);
862-
memset(&dk->dk_indices[0], 0, entry_size * usable);
862+
memset(&dk->dk_entries, 0, entry_size * usable);
863863
return dk;
864864
}
865865

@@ -4921,7 +4921,7 @@ _PyDict_KeysSize(PyDictKeysObject *keys)
49214921
{
49224922
size_t es = (keys->dk_kind == DICT_KEYS_GENERAL
49234923
? sizeof(PyDictKeyEntry) : sizeof(PyDictUnicodeEntry));
4924-
size_t size = sizeof(PyDictKeysObject);
4924+
size_t size = offsetof(PyDictKeysObject, dk_entries);
49254925
size += (size_t)1 << keys->dk_log2_index_bytes;
49264926
size += USABLE_FRACTION((size_t)DK_SIZE(keys)) * es;
49274927
return size;

0 commit comments

Comments
 (0)