fix: filter platform-specific and extra dependencies in PyPI proxy

The dependency parser was stripping environment markers but not checking
if they indicated optional or platform-specific packages. This caused
packages like jaraco.path to pull in pyobjc (324 sub-packages) even on
non-macOS systems.

Changes:
- Filter dependencies with 'extra ==' markers (optional extras)
- Filter dependencies with 'sys_platform' or 'platform_system' markers
- Add diagnostic logging for depth exceeded errors
- Add unit tests for dependency filtering

Fixes tensorflow dependency resolution exceeding max depth.
This commit is contained in:
Mondo Diaz
2026-02-04 14:56:59 -06:00
parent a45ec46e94
commit 1389a03c69
3 changed files with 131 additions and 6 deletions

View File

@@ -47,17 +47,36 @@ PROXY_READ_TIMEOUT = 60.0
def _parse_requires_dist(requires_dist: str) -> Tuple[str, Optional[str]]:
"""Parse a Requires-Dist line into (package_name, version_constraint).
Filters out optional/extra dependencies and platform-specific dependencies
to avoid pulling in unnecessary packages during dependency resolution.
Examples:
"requests (>=2.25.0)" -> ("requests", ">=2.25.0")
"typing-extensions; python_version < '3.8'" -> ("typing-extensions", None)
"numpy>=1.21.0" -> ("numpy", ">=1.21.0")
"certifi" -> ("certifi", None)
"pytest; extra == 'test'" -> (None, None) # Filtered: extra dependency
"pyobjc; sys_platform == 'darwin'" -> (None, None) # Filtered: platform-specific
Returns:
Tuple of (normalized_package_name, version_constraint or None)
Returns (None, None) for dependencies that should be filtered out.
"""
# Remove any environment markers (after semicolon)
# Check for and filter environment markers (after semicolon)
if ';' in requires_dist:
marker_part = requires_dist.split(';', 1)[1].lower()
# Filter out extra/optional dependencies - these are not core dependencies
# Examples: "pytest; extra == 'test'", "sphinx; extra == 'docs'"
if 'extra' in marker_part:
return None, None
# Filter out platform-specific dependencies to avoid cross-platform bloat
# Examples: "pyobjc; sys_platform == 'darwin'", "pywin32; sys_platform == 'win32'"
if 'sys_platform' in marker_part or 'platform_system' in marker_part:
return None, None
# Strip the marker for remaining dependencies (like python_version constraints)
requires_dist = requires_dist.split(';')[0].strip()
# Match patterns like "package (>=1.0)" or "package>=1.0" or "package"