Skip to content

Commit 89d9346

Browse files
committed
Use translate to improve performance of normalize
1 parent d8a7576 commit 89d9346

2 files changed

Lines changed: 49 additions & 1 deletion

File tree

importlib_metadata/__init__.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -910,6 +910,14 @@ def search(self, prepared: Prepared):
910910
return itertools.chain(infos, eggs)
911911

912912

913+
# Translation table for Prepared.normalize: lowercase and
914+
# replace "-" (hyphen) and "." (dot) with "_" (underscore).
915+
_normalize_table = str.maketrans(
916+
"ABCDEFGHIJKLMNOPQRSTUVWXYZ-.",
917+
"abcdefghijklmnopqrstuvwxyz__",
918+
)
919+
920+
913921
class Prepared:
914922
"""
915923
A prepared search query for metadata on a possibly-named package.
@@ -945,7 +953,13 @@ def normalize(name):
945953
"""
946954
PEP 503 normalization plus dashes as underscores.
947955
"""
948-
return re.sub(r"[-_.]+", "-", name).lower().replace('-', '_')
956+
# Emulates ``re.sub(r"[-_.]+", "-", name).lower()`` from PEP 503
957+
# About 3x faster, safe since packages only support alphanumeric characters
958+
value = name.translate(_normalize_table)
959+
# Condense repeats (faster than regex)
960+
while "__" in value:
961+
value = value.replace("__", "_")
962+
return value
949963

950964
@staticmethod
951965
def legacy_normalize(name):

tests/test_api.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from importlib_metadata import (
77
Distribution,
88
PackageNotFoundError,
9+
Prepared,
910
distribution,
1011
entry_points,
1112
files,
@@ -317,3 +318,36 @@ class InvalidateCache(unittest.TestCase):
317318
def test_invalidate_cache(self):
318319
# No externally observable behavior, but ensures test coverage...
319320
importlib.invalidate_caches()
321+
322+
323+
class PreparedTests(unittest.TestCase):
324+
def test_normalize(self):
325+
tests = [
326+
# Simple
327+
("sample", "sample"),
328+
# Mixed case
329+
("Sample", "sample"),
330+
("SAMPLE", "sample"),
331+
("SaMpLe", "sample"),
332+
# Separator conversions
333+
("sample-pkg", "sample_pkg"),
334+
("sample.pkg", "sample_pkg"),
335+
("sample_pkg", "sample_pkg"),
336+
# Multiple separators
337+
("sample---pkg", "sample_pkg"),
338+
("sample___pkg", "sample_pkg"),
339+
("sample...pkg", "sample_pkg"),
340+
# Mixed separators
341+
("sample-._pkg", "sample_pkg"),
342+
("sample_.-pkg", "sample_pkg"),
343+
# Complex
344+
("Sample__Pkg-name.foo", "sample_pkg_name_foo"),
345+
("Sample__Pkg.name__foo", "sample_pkg_name_foo"),
346+
# Uppercase with separators
347+
("SAMPLE-PKG", "sample_pkg"),
348+
("Sample.Pkg", "sample_pkg"),
349+
("SAMPLE_PKG", "sample_pkg"),
350+
]
351+
for name, expected in tests:
352+
with self.subTest(name=name):
353+
self.assertEqual(Prepared.normalize(name), expected)

0 commit comments

Comments
 (0)