Skip to content

Commit 638e587

Browse files
Commit
1 parent 89b5571 commit 638e587

3 files changed

Lines changed: 55 additions & 1 deletion

File tree

Lib/test/test_codecencodings_jp.py

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,27 @@ class Test_SJIS_2004(multibytecodec_support.TestBase, unittest.TestCase):
106106
b"\x85Gℜ\x85Q = ⟨ሴ⟩"
107107
)
108108

109+
def test_null_terminator(self):
110+
# see gh-101828
111+
cases = (
112+
"バルーンフルーツ",
113+
"ライフアップキノコ",
114+
"テスト",
115+
"'Tis but a scratch!"
116+
)
117+
for case in cases:
118+
with self.subTest(case=case):
119+
encode_w_null = (case + "\0").encode(self.encoding)
120+
encode_plus_null = case.encode(self.encoding) + "\0".encode(self.encoding)
121+
self.assertTrue(encode_w_null.endswith(b'\x00'))
122+
self.assertEqual(encode_w_null, encode_plus_null)
123+
124+
encode_w_null_2 = encode_w_null + encode_w_null
125+
encode_plus_null_2 = encode_plus_null + encode_plus_null
126+
self.assertEqual(encode_w_null_2.count(b'\x00'), 2)
127+
self.assertEqual(encode_w_null_2, encode_plus_null_2)
128+
129+
109130
class Test_SJISX0213(multibytecodec_support.TestBase, unittest.TestCase):
110131
encoding = 'shift_jisx0213'
111132
tstring = multibytecodec_support.load_teststring('shift_jisx0213')
@@ -121,6 +142,26 @@ class Test_SJISX0213(multibytecodec_support.TestBase, unittest.TestCase):
121142
"\xab\u211c\xbb = \u2329\u1234\u232a",
122143
b"\x85Gℜ\x85Q = ⟨ሴ⟩"
123144
)
145+
def test_null_terminator(self):
146+
# see gh-101828
147+
cases = (
148+
"バルーンフルーツ",
149+
"ライフアップキノコ",
150+
"テスト",
151+
"'Tis but a scratch!"
152+
)
153+
for case in cases:
154+
with self.subTest(case=case):
155+
encode_w_null = (case + "\0").encode(self.encoding)
156+
encode_plus_null = case.encode(self.encoding) + "\0".encode(self.encoding)
157+
self.assertTrue(encode_w_null.endswith(b'\x00'))
158+
self.assertEqual(encode_w_null, encode_plus_null)
159+
160+
encode_w_null_2 = encode_w_null + encode_w_null
161+
encode_plus_null_2 = encode_plus_null + encode_plus_null
162+
self.assertEqual(encode_w_null_2.count(b'\x00'), 2)
163+
self.assertEqual(encode_w_null_2, encode_plus_null_2)
164+
124165

125166
if __name__ == "__main__":
126167
unittest.main()
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Fix ``'shift_jisx0213'`` and ``'shift_jis_2004'`` codecs truncating null char
2+
as it was treated as part of a multi-character sequence.

Modules/cjkcodecs/_codecs_jp.c

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -611,8 +611,19 @@ ENCODER(shift_jis_2004)
611611
if (code == DBCINV)
612612
return 1;
613613
}
614-
else
614+
else if (ch2 != 0) {
615615
insize = 2;
616+
}
617+
else {
618+
/* Don't consume null char as part of pair */
619+
code = find_pairencmap(
620+
(ucs2_t)c, 0,
621+
jisx0213_pair_encmap,
622+
JISX0213_ENCPAIRS);
623+
if (code == DBCINV) {
624+
return 1;
625+
}
626+
}
616627
}
617628
}
618629
}

0 commit comments

Comments
 (0)