Skip to content

Commit 3ac7310

Browse files
committed
gh-68451: Fix unittest discovery to support Unicode module names
Replace the ASCII-only VALID_MODULE_NAME regex with str.isidentifier() to support test modules whose names start with non-ASCII Unicode letters (e.g., café.py, 測試.py). Also add a directory name validation check so that directories with invalid identifier names (e.g., containing hyphens) are skipped during package discovery.
1 parent 645f5c4 commit 3ac7310

3 files changed

Lines changed: 113 additions & 3 deletions

File tree

Lib/test/test_unittest/test_discovery.py

Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -919,6 +919,107 @@ def _import(packagename, *args, **kwargs):
919919
'don\'t know how to discover from {!r}'
920920
.format(package))
921921

922+
def test_valid_module_name(self):
923+
# gh-68451: _valid_module_name should accept Unicode module names
924+
from unittest.loader import _valid_module_name
925+
926+
# Valid ASCII module names
927+
self.assertTrue(_valid_module_name('test_foo.py'))
928+
self.assertTrue(_valid_module_name('Test_foo.py'))
929+
self.assertTrue(_valid_module_name('_test.py'))
930+
self.assertTrue(_valid_module_name('a.py'))
931+
932+
# Valid Unicode module names (gh-68451)
933+
self.assertTrue(_valid_module_name('café.py'))
934+
self.assertTrue(_valid_module_name('tëst_foo.py'))
935+
self.assertTrue(_valid_module_name('測試.py'))
936+
self.assertTrue(_valid_module_name('テスト.py'))
937+
938+
# Invalid module names
939+
self.assertFalse(_valid_module_name('123.py'))
940+
self.assertFalse(_valid_module_name('test-foo.py'))
941+
self.assertFalse(_valid_module_name('test.foo'))
942+
self.assertFalse(_valid_module_name('test'))
943+
self.assertFalse(_valid_module_name('.py'))
944+
945+
def test_find_tests_with_unicode_modules(self):
946+
# gh-68451: test discovery should find modules with Unicode names
947+
loader = unittest.TestLoader()
948+
949+
original_listdir = os.listdir
950+
original_isfile = os.path.isfile
951+
original_isdir = os.path.isdir
952+
953+
path_lists = [['test2.py', 'test1.py', '測試1.py', 'tëst_três.py',
954+
'not_a_test.py', 'test_dir', 'test.foo',
955+
'test-not-a-module.py', '123bad.py', 'another_dir'],
956+
['test3.py']]
957+
os.listdir = lambda path: path_lists.pop(0)
958+
self.addCleanup(setattr, os, 'listdir', original_listdir)
959+
960+
def isdir(path):
961+
return path.endswith('dir')
962+
os.path.isdir = isdir
963+
self.addCleanup(setattr, os.path, 'isdir', original_isdir)
964+
965+
def isfile(path):
966+
return not path.endswith('dir') and 'another_dir' not in path
967+
os.path.isfile = isfile
968+
self.addCleanup(setattr, os.path, 'isfile', original_isfile)
969+
970+
loader._get_module_from_name = lambda path: path + ' module'
971+
orig_load_tests = loader.loadTestsFromModule
972+
def loadTestsFromModule(module, pattern=None):
973+
base = orig_load_tests(module, pattern=pattern)
974+
return base + [module + ' tests']
975+
loader.loadTestsFromModule = loadTestsFromModule
976+
loader.suiteClass = lambda thing: thing
977+
978+
top_level = os.path.abspath('/foo')
979+
loader._top_level_dir = top_level
980+
suite = list(loader._find_tests(top_level, '*.py'))
981+
982+
# Unicode modules should be discovered alongside ASCII ones.
983+
# test-not-a-module.py and 123bad.py should be excluded;
984+
# test.foo should be excluded (wrong extension).
985+
# Sorted by Unicode code points: test_dir (and its children) come
986+
# before tëst_três since '_' (U+005F) < 'ë' (U+00EB).
987+
expected = [[name + ' module tests'] for name in
988+
('not_a_test', 'test1', 'test2')]
989+
expected.append(['test_dir module tests'])
990+
expected.extend([[('test_dir.%s' % name) + ' module tests']
991+
for name in ('test3',)])
992+
expected.extend([[name + ' module tests'] for name in
993+
('tëst_três', '測試1')])
994+
self.assertEqual(suite, expected)
995+
996+
def test_find_test_path_rejects_invalid_dir_name(self):
997+
# gh-68451: directories with invalid identifier names should be
998+
# skipped during package discovery.
999+
loader = unittest.TestLoader()
1000+
1001+
original_isfile = os.path.isfile
1002+
original_isdir = os.path.isdir
1003+
1004+
os.path.isdir = lambda path: True
1005+
self.addCleanup(setattr, os.path, 'isdir', original_isdir)
1006+
os.path.isfile = lambda path: path.endswith('__init__.py')
1007+
self.addCleanup(setattr, os.path, 'isfile', original_isfile)
1008+
1009+
loader._top_level_dir = '/foo'
1010+
1011+
# A directory with hyphens is not a valid identifier
1012+
tests, should_recurse = loader._find_test_path(
1013+
'/foo/not-a-package', 'test*.py')
1014+
self.assertIsNone(tests)
1015+
self.assertFalse(should_recurse)
1016+
1017+
# A directory starting with a digit is not a valid identifier
1018+
tests, should_recurse = loader._find_test_path(
1019+
'/foo/123bad', 'test*.py')
1020+
self.assertIsNone(tests)
1021+
self.assertFalse(should_recurse)
1022+
9221023

9231024
if __name__ == '__main__':
9241025
unittest.main()

Lib/unittest/loader.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
"""Loading unittests."""
22

33
import os
4-
import re
54
import sys
65
import traceback
76
import types
@@ -16,7 +15,11 @@
1615
# what about .pyc (etc)
1716
# we would need to avoid loading the same tests multiple times
1817
# from '.py', *and* '.pyc'
19-
VALID_MODULE_NAME = re.compile(r'[_a-z]\w*\.py$', re.IGNORECASE)
18+
def _valid_module_name(path):
19+
# gh-68451: use str.isidentifier() to support Unicode module names,
20+
# rather than a restrictive ASCII-only regex.
21+
root, ext = os.path.splitext(path)
22+
return ext == '.py' and root.isidentifier()
2023

2124

2225
class _FailedTest(case.TestCase):
@@ -415,7 +418,7 @@ def _find_test_path(self, full_path, pattern, namespace=False):
415418
"""
416419
basename = os.path.basename(full_path)
417420
if os.path.isfile(full_path):
418-
if not VALID_MODULE_NAME.match(basename):
421+
if not _valid_module_name(basename):
419422
# valid Python identifiers only
420423
return None, False
421424
if not self._match_path(basename, full_path, pattern):
@@ -449,6 +452,8 @@ def _find_test_path(self, full_path, pattern, namespace=False):
449452
msg % (mod_name, module_dir, expected_dir))
450453
return self.loadTestsFromModule(module, pattern=pattern), False
451454
elif os.path.isdir(full_path):
455+
if not os.path.basename(full_path).isidentifier():
456+
return None, False
452457
if (not namespace and
453458
not os.path.isfile(os.path.join(full_path, '__init__.py'))):
454459
return None, False
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
:mod:`unittest` test discovery now correctly finds test modules whose names
2+
start with non-ASCII Unicode letters (e.g., ``café.py``, ``測試.py``).
3+
Previously, discovery only accepted module names starting with ASCII letters
4+
or underscores.

0 commit comments

Comments
 (0)