Fix proactive caching regex to match both hyphens and underscores

PEP 503 normalizes package names to use hyphens, but wheel filenames
may use underscores (e.g., typing_extensions-4.0.0-py3-none-any.whl).

Convert the search pattern to match either separator.
This commit is contained in:
Mondo Diaz
2026-01-30 18:25:30 -06:00
parent 522d23ec01
commit 80e2f3d157

View File

@@ -536,13 +536,18 @@ def _cache_dependency_background(
# Parse the HTML to find wheel files # Parse the HTML to find wheel files
html = response.text html = response.text
# Create pattern that matches both normalized (hyphens) and original (underscores) forms
# PEP 503 normalizes to hyphens, but wheel filenames may use underscores
name_pattern = re.sub(r'[-_]+', '[-_]+', normalized_name)
# Look for wheel files (.whl) - prefer them over sdist # Look for wheel files (.whl) - prefer them over sdist
wheel_pattern = rf'href="([^"]*{normalized_name}[^"]*\.whl[^"]*)"' wheel_pattern = rf'href="([^"]*{name_pattern}[^"]*\.whl[^"]*)"'
matches = re.findall(wheel_pattern, html, re.IGNORECASE) matches = re.findall(wheel_pattern, html, re.IGNORECASE)
if not matches: if not matches:
# Try sdist # Try sdist
sdist_pattern = rf'href="([^"]*{normalized_name}[^"]*\.tar\.gz[^"]*)"' sdist_pattern = rf'href="([^"]*{name_pattern}[^"]*\.tar\.gz[^"]*)"'
matches = re.findall(sdist_pattern, html, re.IGNORECASE) matches = re.findall(sdist_pattern, html, re.IGNORECASE)
if not matches: if not matches: