Skip to content

Commit d8a12f5

Browse files
committed
Add fuzzer for io module
1 parent 71ede86 commit d8a12f5

File tree

3 files changed

+195
-2
lines changed

3 files changed

+195
-2
lines changed

Makefile

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
1-
all : fuzzer-html fuzzer-email fuzzer-httpclient fuzzer-json fuzzer-difflib fuzzer-csv fuzzer-decode fuzzer-ast fuzzer-tarfile fuzzer-tarfile-hypothesis fuzzer-zipfile fuzzer-zipfile-hypothesis fuzzer-re fuzzer-configparser fuzzer-tomllib fuzzer-plistlib fuzzer-xml fuzzer-zoneinfo
1+
all : fuzzer-html fuzzer-email fuzzer-httpclient fuzzer-json fuzzer-difflib fuzzer-csv fuzzer-decode fuzzer-ast fuzzer-tarfile fuzzer-tarfile-hypothesis fuzzer-zipfile fuzzer-zipfile-hypothesis fuzzer-re fuzzer-configparser fuzzer-tomllib fuzzer-plistlib fuzzer-xml fuzzer-zoneinfo fuzzer-io
22

33
PYTHON_CONFIG_PATH=$(CPYTHON_INSTALL_PATH)/bin/python3-config
44
CXXFLAGS += $(shell $(PYTHON_CONFIG_PATH) --cflags)
5-
LDFLAGS += -rdynamic $(shell $(PYTHON_CONFIG_PATH) --ldflags --embed)
5+
LDFLAGS += -rdynamic $(shell $(PYTHON_CONFIG_PATH) --ldflags --embed) $(CPYTHON_MODLIBS) -Wl,--allow-multiple-definition
66

77
fuzzer-html:
88
clang++ $(CXXFLAGS) $(LIB_FUZZING_ENGINE) -std=c++17 fuzzer.cpp -DPYTHON_HARNESS_PATH="\"html.py\"" -ldl $(LDFLAGS) -o fuzzer-html
@@ -40,3 +40,6 @@ fuzzer-xml:
4040
clang++ $(CXXFLAGS) $(LIB_FUZZING_ENGINE) -std=c++17 fuzzer.cpp -DPYTHON_HARNESS_PATH="\"xml.py\"" -ldl $(LDFLAGS) -o fuzzer-xml
4141
fuzzer-zoneinfo:
4242
clang++ $(CXXFLAGS) $(LIB_FUZZING_ENGINE) -std=c++17 fuzzer.cpp -DPYTHON_HARNESS_PATH="\"zoneinfo.py\"" -ldl $(LDFLAGS) -o fuzzer-zoneinfo
43+
44+
fuzzer-io:
45+
clang++ $(CXXFLAGS) $(LIB_FUZZING_ENGINE) -std=c++17 fuzzer.cpp -DPYTHON_HARNESS_PATH="\"io.py\"" -ldl $(LDFLAGS) -o fuzzer-io

fuzz_targets.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ difflib difflib.py
66
email email.py
77
html html.py
88
httpclient httpclient.py
9+
io io.py
910
json json.py
1011
plistlib plist.py
1112
re re.py

io.py

Lines changed: 189 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,189 @@
1+
from fuzzeddataprovider import FuzzedDataProvider
2+
import os
3+
import io
4+
import tempfile
5+
6+
# Top-level operation constants for FuzzerRunOne dispatch
7+
OP_BYTESIO = 0
8+
OP_TEXTIOWRAPPER = 1
9+
OP_BUFFERED_IO = 2
10+
OP_FILEIO = 3
11+
OP_IO_OPEN = 4
12+
OP_NEWLINE_DECODER = 5
13+
OP_STRINGIO = 6
14+
15+
# Buffered IO target constants for op_buffered_io
16+
BUFFERED_READER = 0
17+
BUFFERED_WRITER = 1
18+
BUFFERED_RANDOM = 2
19+
20+
# Tests BytesIO (Modules/_io/bytesio.c): write, seeked read, readline,
21+
# readinto a pre-allocated buffer, getbuffer for the memoryview path,
22+
# truncate at a fuzzed position, and getvalue.
23+
def op_bytesio(fdp):
24+
trunc_pos = fdp.ConsumeIntInRange(0, fdp.remaining_bytes())
25+
data = fdp.ConsumeBytes(fdp.remaining_bytes())
26+
bio = io.BytesIO()
27+
bio.write(data)
28+
bio.seek(0)
29+
bio.read()
30+
bio.seek(0)
31+
bio.readline()
32+
buf = bytearray(min(len(data), 100))
33+
bio.seek(0)
34+
bio.readinto(buf)
35+
bio.getbuffer()
36+
bio.truncate(trunc_pos)
37+
bio.getvalue()
38+
39+
# Tests TextIOWrapper (Modules/_io/textio.c): wraps a BytesIO in a text
40+
# decoder with a fuzzed encoding (utf-8, latin-1, ascii, utf-16) and
41+
# newline mode (None, '', \n, \r, \r\n), then exercises read, readline,
42+
# and detach. Targets the C-level text decoding and newline translation.
43+
def op_textiowrapper(fdp):
44+
encodings = ['utf-8', 'latin-1', 'ascii', 'utf-16']
45+
encoding = fdp.PickValueInList(encodings)
46+
newlines = [None, '', '\n', '\r', '\r\n']
47+
newline = fdp.PickValueInList(newlines)
48+
data = fdp.ConsumeBytes(fdp.remaining_bytes())
49+
bio = io.BytesIO(data)
50+
wrapper = io.TextIOWrapper(bio, encoding=encoding, errors='replace', newline=newline)
51+
wrapper.read()
52+
wrapper.seek(0)
53+
wrapper.readline()
54+
wrapper.detach()
55+
56+
# Tests BufferedReader/Writer/Random (Modules/_io/bufferedio.c): picks
57+
# one of the three buffered I/O types and exercises read, write, seek,
58+
# and flush through the C buffering layer over a BytesIO raw stream.
59+
def op_buffered_io(fdp):
60+
target = fdp.ConsumeIntInRange(BUFFERED_READER, BUFFERED_RANDOM)
61+
read_size = fdp.ConsumeIntInRange(0, 10000)
62+
data = fdp.ConsumeBytes(fdp.remaining_bytes())
63+
if target == BUFFERED_READER:
64+
raw = io.BytesIO(data)
65+
br = io.BufferedReader(raw)
66+
br.read()
67+
elif target == BUFFERED_WRITER:
68+
raw = io.BytesIO()
69+
bw = io.BufferedWriter(raw)
70+
bw.write(data)
71+
bw.flush()
72+
else:
73+
write_data = fdp.ConsumeBytes(fdp.ConsumeIntInRange(0, 10000))
74+
raw = io.BytesIO(data)
75+
brw = io.BufferedRandom(raw)
76+
brw.read(read_size)
77+
brw.write(write_data)
78+
brw.seek(0)
79+
brw.read()
80+
81+
# Tests FileIO (Modules/_io/fileio.c): writes fuzzed data to a temp file
82+
# then reads it back, or reads pre-written data. Exercises the C-level
83+
# file descriptor I/O paths (open, write, read, close).
84+
def op_fileio(fdp):
85+
do_write = fdp.ConsumeBool()
86+
data = fdp.ConsumeBytes(fdp.remaining_bytes())
87+
tmpname = None
88+
try:
89+
with tempfile.NamedTemporaryFile(delete=False) as tmp:
90+
tmpname = tmp.name
91+
if do_write:
92+
f = io.FileIO(tmpname, 'w')
93+
f.write(data)
94+
f.close()
95+
f = io.FileIO(tmpname, 'r')
96+
f.read()
97+
f.close()
98+
else:
99+
tmp.write(data)
100+
tmp.flush()
101+
f = io.FileIO(tmpname, 'r')
102+
f.read()
103+
f.close()
104+
finally:
105+
if tmpname:
106+
try:
107+
os.unlink(tmpname)
108+
except Exception:
109+
pass
110+
111+
# Tests io.open() (Modules/_io/_iomodule.c): the high-level open function
112+
# that selects the appropriate I/O class based on mode. Writes fuzzed data
113+
# to a temp file then opens it in binary or text mode with error handling.
114+
def op_io_open(fdp):
115+
modes = ['rb', 'r', 'rb']
116+
mode = fdp.PickValueInList(modes)
117+
data = fdp.ConsumeBytes(fdp.remaining_bytes())
118+
tmpname = None
119+
try:
120+
with tempfile.NamedTemporaryFile(delete=False) as tmp:
121+
tmpname = tmp.name
122+
tmp.write(data)
123+
tmp.flush()
124+
with io.open(tmpname, mode, errors='replace' if 'b' not in mode else None) as f:
125+
f.read()
126+
finally:
127+
if tmpname:
128+
try:
129+
os.unlink(tmpname)
130+
except Exception:
131+
pass
132+
133+
# Tests IncrementalNewlineDecoder (Modules/_io/textio.c): the C-level
134+
# newline translator that handles \r, \n, \r\n conversion. Exercises
135+
# decode with fuzzed text, then getstate/reset for the state machine.
136+
def op_newline_decoder(fdp):
137+
translate = fdp.ConsumeBool()
138+
n = fdp.ConsumeIntInRange(1, min(fdp.remaining_bytes(), 10000)) if fdp.remaining_bytes() > 0 else 0
139+
if n == 0:
140+
return
141+
s = fdp.ConsumeBytes(n).decode('latin-1')
142+
decoder = io.IncrementalNewlineDecoder(None, translate)
143+
decoder.decode(s)
144+
decoder.getstate()
145+
decoder.reset()
146+
147+
# Tests StringIO (Modules/_io/stringio.c): in-memory text stream.
148+
# Exercises read, readline, seeked write, and getvalue with fuzzed
149+
# Unicode text content.
150+
def op_stringio(fdp):
151+
n = fdp.ConsumeIntInRange(1, min(fdp.remaining_bytes(), 10000)) if fdp.remaining_bytes() > 0 else 0
152+
if n == 0:
153+
return
154+
s = fdp.ConsumeBytes(n).decode('latin-1')
155+
sio = io.StringIO(s)
156+
sio.read()
157+
sio.seek(0)
158+
sio.readline()
159+
sio.seek(0)
160+
sio.write(s)
161+
sio.getvalue()
162+
163+
# Fuzzes CPython's I/O C modules (Modules/_io/). Exercises BytesIO
164+
# (write, seek, read, truncate), TextIOWrapper (read, readline, detach
165+
# with varied encodings and newline modes), BufferedReader/Writer/Random,
166+
# FileIO (read and write modes), io.open(), IncrementalNewlineDecoder
167+
# (decode, getstate, reset), and StringIO operations.
168+
def FuzzerRunOne(FuzzerInput):
169+
if len(FuzzerInput) < 1 or len(FuzzerInput) > 0x100000:
170+
return
171+
fdp = FuzzedDataProvider(FuzzerInput)
172+
op = fdp.ConsumeIntInRange(OP_BYTESIO, OP_STRINGIO)
173+
try:
174+
if op == OP_BYTESIO:
175+
op_bytesio(fdp)
176+
elif op == OP_TEXTIOWRAPPER:
177+
op_textiowrapper(fdp)
178+
elif op == OP_BUFFERED_IO:
179+
op_buffered_io(fdp)
180+
elif op == OP_FILEIO:
181+
op_fileio(fdp)
182+
elif op == OP_IO_OPEN:
183+
op_io_open(fdp)
184+
elif op == OP_NEWLINE_DECODER:
185+
op_newline_decoder(fdp)
186+
else:
187+
op_stringio(fdp)
188+
except Exception:
189+
pass

0 commit comments

Comments
 (0)