From 1d727b3f8c8d04fe3efee79286cb8c59bb0d8a92 Mon Sep 17 00:00:00 2001 From: Mondo Diaz Date: Fri, 30 Jan 2026 18:25:30 -0600 Subject: [PATCH] Fix proactive caching regex to match both hyphens and underscores PEP 503 normalizes package names to use hyphens, but wheel filenames may use underscores (e.g., typing_extensions-4.0.0-py3-none-any.whl). Convert the search pattern to match either separator. --- backend/app/pypi_proxy.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/backend/app/pypi_proxy.py b/backend/app/pypi_proxy.py index 54bd59e..0b380c8 100644 --- a/backend/app/pypi_proxy.py +++ b/backend/app/pypi_proxy.py @@ -536,13 +536,18 @@ def _cache_dependency_background( # Parse the HTML to find wheel files html = response.text + + # Create pattern that matches both normalized (hyphens) and original (underscores) forms + # PEP 503 normalizes to hyphens, but wheel filenames may use underscores + name_pattern = re.sub(r'[-_]+', '[-_]+', normalized_name) + # Look for wheel files (.whl) - prefer them over sdist - wheel_pattern = rf'href="([^"]*{normalized_name}[^"]*\.whl[^"]*)"' + wheel_pattern = rf'href="([^"]*{name_pattern}[^"]*\.whl[^"]*)"' matches = re.findall(wheel_pattern, html, re.IGNORECASE) if not matches: # Try sdist - sdist_pattern = rf'href="([^"]*{normalized_name}[^"]*\.tar\.gz[^"]*)"' + sdist_pattern = rf'href="([^"]*{name_pattern}[^"]*\.tar\.gz[^"]*)"' matches = re.findall(sdist_pattern, html, re.IGNORECASE) if not matches: