Skip to content

Commit 96e3c79

Browse files
committed
Add fuzzer for collections module
1 parent 71ede86 commit 96e3c79

File tree

3 files changed

+200
-2
lines changed

3 files changed

+200
-2
lines changed

Makefile

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
1-
all : fuzzer-html fuzzer-email fuzzer-httpclient fuzzer-json fuzzer-difflib fuzzer-csv fuzzer-decode fuzzer-ast fuzzer-tarfile fuzzer-tarfile-hypothesis fuzzer-zipfile fuzzer-zipfile-hypothesis fuzzer-re fuzzer-configparser fuzzer-tomllib fuzzer-plistlib fuzzer-xml fuzzer-zoneinfo
1+
all : fuzzer-html fuzzer-email fuzzer-httpclient fuzzer-json fuzzer-difflib fuzzer-csv fuzzer-decode fuzzer-ast fuzzer-tarfile fuzzer-tarfile-hypothesis fuzzer-zipfile fuzzer-zipfile-hypothesis fuzzer-re fuzzer-configparser fuzzer-tomllib fuzzer-plistlib fuzzer-xml fuzzer-zoneinfo fuzzer-collections
22

33
PYTHON_CONFIG_PATH=$(CPYTHON_INSTALL_PATH)/bin/python3-config
44
CXXFLAGS += $(shell $(PYTHON_CONFIG_PATH) --cflags)
5-
LDFLAGS += -rdynamic $(shell $(PYTHON_CONFIG_PATH) --ldflags --embed)
5+
LDFLAGS += -rdynamic $(shell $(PYTHON_CONFIG_PATH) --ldflags --embed) $(CPYTHON_MODLIBS) -Wl,--allow-multiple-definition
66

77
fuzzer-html:
88
clang++ $(CXXFLAGS) $(LIB_FUZZING_ENGINE) -std=c++17 fuzzer.cpp -DPYTHON_HARNESS_PATH="\"html.py\"" -ldl $(LDFLAGS) -o fuzzer-html
@@ -40,3 +40,6 @@ fuzzer-xml:
4040
clang++ $(CXXFLAGS) $(LIB_FUZZING_ENGINE) -std=c++17 fuzzer.cpp -DPYTHON_HARNESS_PATH="\"xml.py\"" -ldl $(LDFLAGS) -o fuzzer-xml
4141
fuzzer-zoneinfo:
4242
clang++ $(CXXFLAGS) $(LIB_FUZZING_ENGINE) -std=c++17 fuzzer.cpp -DPYTHON_HARNESS_PATH="\"zoneinfo.py\"" -ldl $(LDFLAGS) -o fuzzer-zoneinfo
43+
44+
fuzzer-collections:
45+
clang++ $(CXXFLAGS) $(LIB_FUZZING_ENGINE) -std=c++17 fuzzer.cpp -DPYTHON_HARNESS_PATH="\"collections.py\"" -ldl $(LDFLAGS) -o fuzzer-collections

collections.py

Lines changed: 194 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,194 @@
1+
from fuzzeddataprovider import FuzzedDataProvider
2+
import collections
3+
4+
# Top-level fuzzer dispatch operations
5+
OP_FUZZER_COUNT_ELEMENTS = 0
6+
OP_FUZZER_DEQUE = 1
7+
OP_FUZZER_DEFAULTDICT = 2
8+
OP_FUZZER_ORDERED_DICT = 3
9+
10+
# Deque operations
11+
OP_DEQUE_APPEND = 0
12+
OP_DEQUE_APPENDLEFT = 1
13+
OP_DEQUE_POP = 2
14+
OP_DEQUE_POPLEFT = 3
15+
OP_DEQUE_EXTEND = 4
16+
OP_DEQUE_EXTENDLEFT = 5
17+
OP_DEQUE_ROTATE = 6
18+
OP_DEQUE_REVERSE = 7
19+
OP_DEQUE_COUNT = 8
20+
OP_DEQUE_INDEX = 9
21+
OP_DEQUE_REMOVE = 10
22+
OP_DEQUE_CLEAR = 11
23+
OP_DEQUE_COPY = 12
24+
OP_DEQUE_COMPARE = 13
25+
OP_DEQUE_ITERATE = 14
26+
27+
# Defaultdict operations
28+
OP_DDICT_INCREMENT = 0
29+
OP_DDICT_ACCESS = 1
30+
OP_DDICT_CONTAINS = 2
31+
OP_DDICT_POP = 3
32+
33+
# OrderedDict operations
34+
OP_ODICT_SET = 0
35+
OP_ODICT_POP = 1
36+
OP_ODICT_MOVE_TO_END = 2
37+
OP_ODICT_LIST_KEYS = 3
38+
OP_ODICT_REVERSED = 4
39+
OP_ODICT_POPITEM = 5
40+
41+
42+
# Exercises collections._count_elements(), an internal C helper that counts
43+
# occurrences of each character in a string into a dict. Targets the
44+
# _count_elements C function which has fast-path logic for exact-dict types
45+
# vs dict subclasses.
46+
def op_count_elements(fdp):
47+
n = (
48+
fdp.ConsumeIntInRange(1, min(fdp.remaining_bytes(), 10000))
49+
if fdp.remaining_bytes() > 0
50+
else 0
51+
)
52+
if n == 0:
53+
return
54+
s = fdp.ConsumeBytes(n).decode("latin-1")
55+
d = {}
56+
collections._count_elements(d, s)
57+
58+
59+
# Exercises collections.deque with an optional maxlen constraint. Runs a
60+
# sequence of fuzzed operations that exercise the deque's C implementation:
61+
# append/pop from both ends, extend/extendleft with lists, rotate, reverse,
62+
# search (count/index/remove with random-typed values for error path
63+
# coverage), clear, copy, rich comparison against a second deque, and
64+
# iteration via list()/len()/bool().
65+
def op_deque(fdp):
66+
maxlen = fdp.ConsumeIntInRange(0, 100) if fdp.ConsumeBool() else None
67+
init_n = fdp.ConsumeIntInRange(0, min(fdp.remaining_bytes(), 50))
68+
init_data = fdp.ConsumeIntList(init_n, 1)
69+
dq = collections.deque(init_data, maxlen=maxlen)
70+
num_ops = fdp.ConsumeIntInRange(1, 30)
71+
for _ in range(num_ops):
72+
if fdp.remaining_bytes() == 0:
73+
break
74+
op = fdp.ConsumeIntInRange(OP_DEQUE_APPEND, OP_DEQUE_ITERATE)
75+
if op == OP_DEQUE_APPEND:
76+
dq.append(fdp.ConsumeRandomValue())
77+
elif op == OP_DEQUE_APPENDLEFT:
78+
dq.appendleft(fdp.ConsumeRandomValue())
79+
elif op == OP_DEQUE_POP and len(dq) > 0:
80+
dq.pop()
81+
elif op == OP_DEQUE_POPLEFT and len(dq) > 0:
82+
dq.popleft()
83+
elif op == OP_DEQUE_EXTEND:
84+
n = fdp.ConsumeIntInRange(1, min(fdp.remaining_bytes(), 50))
85+
dq.extend(fdp.ConsumeIntList(n, 1))
86+
elif op == OP_DEQUE_EXTENDLEFT:
87+
n = fdp.ConsumeIntInRange(1, min(fdp.remaining_bytes(), 50))
88+
dq.extendleft(fdp.ConsumeIntList(n, 1))
89+
elif op == OP_DEQUE_ROTATE:
90+
dq.rotate(fdp.ConsumeIntInRange(-10, 10))
91+
elif op == OP_DEQUE_REVERSE:
92+
dq.reverse()
93+
elif op == OP_DEQUE_COUNT:
94+
dq.count(fdp.ConsumeRandomValue())
95+
elif op == OP_DEQUE_INDEX and len(dq) > 0:
96+
try:
97+
dq.index(fdp.ConsumeRandomValue())
98+
except ValueError:
99+
pass
100+
elif op == OP_DEQUE_REMOVE and len(dq) > 0:
101+
try:
102+
dq.remove(fdp.ConsumeRandomValue())
103+
except ValueError:
104+
pass
105+
elif op == OP_DEQUE_CLEAR:
106+
dq.clear()
107+
elif op == OP_DEQUE_COPY:
108+
dq.copy()
109+
elif op == OP_DEQUE_COMPARE:
110+
dq2 = collections.deque(
111+
fdp.ConsumeIntList(
112+
fdp.ConsumeIntInRange(0, min(fdp.remaining_bytes(), 20)), 1
113+
)
114+
)
115+
_ = dq == dq2
116+
_ = dq < dq2
117+
elif op == OP_DEQUE_ITERATE:
118+
_ = list(dq)
119+
_ = len(dq)
120+
_ = bool(dq)
121+
122+
123+
# Exercises collections.defaultdict with int as the default factory.
124+
# Runs fuzzed sequences of key increment (triggers __missing__ on new keys),
125+
# key access, containment checks, and pop operations. Keys are fuzzed
126+
# latin-1 strings so the same key may be accessed multiple times, exercising
127+
# both the hit and miss paths in the underlying dict C implementation.
128+
def op_defaultdict(fdp):
129+
dd = collections.defaultdict(int)
130+
num_ops = fdp.ConsumeIntInRange(1, 20)
131+
for _ in range(num_ops):
132+
if fdp.remaining_bytes() == 0:
133+
break
134+
op = fdp.ConsumeIntInRange(OP_DDICT_INCREMENT, OP_DDICT_POP)
135+
key = fdp.ConsumeBytes(fdp.ConsumeIntInRange(1, 10)).decode("latin-1")
136+
if op == OP_DDICT_INCREMENT:
137+
dd[key] += fdp.ConsumeInt(1)
138+
elif op == OP_DDICT_ACCESS:
139+
_ = dd[key]
140+
elif op == OP_DDICT_CONTAINS:
141+
_ = key in dd
142+
elif op == OP_DDICT_POP:
143+
dd.pop(key, None)
144+
145+
146+
# Exercises collections.OrderedDict's C implementation (odictobject.c).
147+
# Runs fuzzed sequences of set (with random-typed values), pop,
148+
# move_to_end (with fuzzed last= direction), key listing, reversed
149+
# iteration, and popitem (with fuzzed last= direction). The key reuse
150+
# from short fuzzed strings exercises the internal linked-list
151+
# reordering logic.
152+
def op_ordered_dict(fdp):
153+
od = collections.OrderedDict()
154+
num_ops = fdp.ConsumeIntInRange(1, 20)
155+
for _ in range(num_ops):
156+
if fdp.remaining_bytes() == 0:
157+
break
158+
op = fdp.ConsumeIntInRange(OP_ODICT_SET, OP_ODICT_POPITEM)
159+
key = fdp.ConsumeBytes(fdp.ConsumeIntInRange(1, 10)).decode("latin-1")
160+
if op == OP_ODICT_SET:
161+
od[key] = fdp.ConsumeRandomValue()
162+
elif op == OP_ODICT_POP:
163+
od.pop(key, None)
164+
elif op == OP_ODICT_MOVE_TO_END:
165+
od.move_to_end(key, last=fdp.ConsumeBool()) if key in od else None
166+
elif op == OP_ODICT_LIST_KEYS:
167+
_ = list(od.keys())
168+
elif op == OP_ODICT_REVERSED:
169+
_ = list(reversed(od))
170+
elif op == OP_ODICT_POPITEM and len(od) > 0:
171+
od.popitem(last=fdp.ConsumeBool())
172+
173+
174+
# Fuzzes the _collections C module (Modules/_collectionsmodule.c).
175+
# Exercises _count_elements() with fuzzed iterables, deque operations
176+
# (append, pop, extend, rotate, reverse, count, index, remove, copy),
177+
# defaultdict key access patterns, and OrderedDict manipulation
178+
# (set, pop, move_to_end, popitem, reversed iteration).
179+
def FuzzerRunOne(FuzzerInput):
180+
if len(FuzzerInput) < 1 or len(FuzzerInput) > 0x10000:
181+
return
182+
fdp = FuzzedDataProvider(FuzzerInput)
183+
op = fdp.ConsumeIntInRange(OP_FUZZER_COUNT_ELEMENTS, OP_FUZZER_ORDERED_DICT)
184+
try:
185+
if op == OP_FUZZER_COUNT_ELEMENTS:
186+
op_count_elements(fdp)
187+
elif op == OP_FUZZER_DEQUE:
188+
op_deque(fdp)
189+
elif op == OP_FUZZER_DEFAULTDICT:
190+
op_defaultdict(fdp)
191+
else:
192+
op_ordered_dict(fdp)
193+
except Exception:
194+
pass

fuzz_targets.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
ast ast.py
2+
collections collections.py
23
configparser configparser.py
34
csv csv.py
45
decode decode.py

0 commit comments

Comments
 (0)