From 7140c9f4f262827f3bb9671cf52d9108b8e97f54 Mon Sep 17 00:00:00 2001 From: Mondo Diaz Date: Wed, 4 Feb 2026 14:56:59 -0600 Subject: [PATCH] fix: filter platform-specific and extra dependencies in PyPI proxy The dependency parser was stripping environment markers but not checking if they indicated optional or platform-specific packages. This caused packages like jaraco.path to pull in pyobjc (324 sub-packages) even on non-macOS systems. Changes: - Filter dependencies with 'extra ==' markers (optional extras) - Filter dependencies with 'sys_platform' or 'platform_system' markers - Add diagnostic logging for depth exceeded errors - Add unit tests for dependency filtering Fixes tensorflow dependency resolution exceeding max depth. --- backend/app/dependencies.py | 31 ++++++++-- backend/app/pypi_proxy.py | 21 ++++++- backend/tests/unit/test_pypi_proxy.py | 85 +++++++++++++++++++++++++++ 3 files changed, 131 insertions(+), 6 deletions(-) create mode 100644 backend/tests/unit/test_pypi_proxy.py diff --git a/backend/app/dependencies.py b/backend/app/dependencies.py index 8e8d47d..28f9c03 100644 --- a/backend/app/dependencies.py +++ b/backend/app/dependencies.py @@ -730,6 +730,8 @@ def resolve_dependencies( current_path: Dict[str, str] = {} # Resolution order (topological) resolution_order: List[str] = [] + # Track resolution path for debugging + resolution_path_sync: List[str] = [] def _resolve_recursive( artifact_id: str, @@ -741,12 +743,16 @@ def resolve_dependencies( depth: int = 0, ): """Recursively resolve dependencies with cycle/conflict detection.""" + pkg_key = f"{proj_name}/{pkg_name}" + # Safety limit: prevent DoS through deeply nested dependencies if depth > MAX_DEPENDENCY_DEPTH: + logger.error( + f"Dependency depth exceeded at {pkg_key} (depth={depth}). " + f"Resolution path: {' -> '.join(resolution_path_sync[-20:])}" + ) raise DependencyDepthExceededError(MAX_DEPENDENCY_DEPTH) - pkg_key = f"{proj_name}/{pkg_name}" - # Cycle detection (at artifact level) if artifact_id in visiting: # Build cycle path from current_path @@ -770,6 +776,9 @@ def resolve_dependencies( if artifact_id in visited: return + # Track path for debugging (only after early-return checks) + resolution_path_sync.append(f"{pkg_key}@{version_or_tag}") + visiting.add(artifact_id) current_path[artifact_id] = pkg_key @@ -838,6 +847,7 @@ def resolve_dependencies( visiting.remove(artifact_id) del current_path[artifact_id] visited.add(artifact_id) + resolution_path_sync.pop() # Add to resolution order (dependencies before dependents) resolution_order.append(artifact_id) @@ -1086,6 +1096,9 @@ async def resolve_dependencies_with_fetch( logger.warning(f"Error fetching {dep_package}: {e}") return None + # Track resolution path for debugging + resolution_path: List[str] = [] + async def _resolve_recursive_async( artifact_id: str, proj_name: str, @@ -1096,11 +1109,15 @@ async def resolve_dependencies_with_fetch( depth: int = 0, ): """Recursively resolve dependencies with fetch capability.""" - if depth > MAX_DEPENDENCY_DEPTH: - raise DependencyDepthExceededError(MAX_DEPENDENCY_DEPTH) - pkg_key = f"{proj_name}/{pkg_name}" + if depth > MAX_DEPENDENCY_DEPTH: + logger.error( + f"Dependency depth exceeded at {pkg_key} (depth={depth}). " + f"Resolution path: {' -> '.join(resolution_path[-20:])}" + ) + raise DependencyDepthExceededError(MAX_DEPENDENCY_DEPTH) + # Cycle detection if artifact_id in visiting: cycle_start = current_path.get(artifact_id, pkg_key) @@ -1123,6 +1140,9 @@ async def resolve_dependencies_with_fetch( if artifact_id in visited: return + # Track path for debugging (only after early-return checks) + resolution_path.append(f"{pkg_key}@{version_or_tag}") + visiting.add(artifact_id) current_path[artifact_id] = pkg_key @@ -1235,6 +1255,7 @@ async def resolve_dependencies_with_fetch( visiting.remove(artifact_id) del current_path[artifact_id] visited.add(artifact_id) + resolution_path.pop() resolution_order.append(artifact_id) diff --git a/backend/app/pypi_proxy.py b/backend/app/pypi_proxy.py index 4ea7a0b..84b7877 100644 --- a/backend/app/pypi_proxy.py +++ b/backend/app/pypi_proxy.py @@ -47,17 +47,36 @@ PROXY_READ_TIMEOUT = 60.0 def _parse_requires_dist(requires_dist: str) -> Tuple[str, Optional[str]]: """Parse a Requires-Dist line into (package_name, version_constraint). + Filters out optional/extra dependencies and platform-specific dependencies + to avoid pulling in unnecessary packages during dependency resolution. + Examples: "requests (>=2.25.0)" -> ("requests", ">=2.25.0") "typing-extensions; python_version < '3.8'" -> ("typing-extensions", None) "numpy>=1.21.0" -> ("numpy", ">=1.21.0") "certifi" -> ("certifi", None) + "pytest; extra == 'test'" -> (None, None) # Filtered: extra dependency + "pyobjc; sys_platform == 'darwin'" -> (None, None) # Filtered: platform-specific Returns: Tuple of (normalized_package_name, version_constraint or None) + Returns (None, None) for dependencies that should be filtered out. """ - # Remove any environment markers (after semicolon) + # Check for and filter environment markers (after semicolon) if ';' in requires_dist: + marker_part = requires_dist.split(';', 1)[1].lower() + + # Filter out extra/optional dependencies - these are not core dependencies + # Examples: "pytest; extra == 'test'", "sphinx; extra == 'docs'" + if 'extra' in marker_part: + return None, None + + # Filter out platform-specific dependencies to avoid cross-platform bloat + # Examples: "pyobjc; sys_platform == 'darwin'", "pywin32; sys_platform == 'win32'" + if 'sys_platform' in marker_part or 'platform_system' in marker_part: + return None, None + + # Strip the marker for remaining dependencies (like python_version constraints) requires_dist = requires_dist.split(';')[0].strip() # Match patterns like "package (>=1.0)" or "package>=1.0" or "package" diff --git a/backend/tests/unit/test_pypi_proxy.py b/backend/tests/unit/test_pypi_proxy.py new file mode 100644 index 0000000..b399baa --- /dev/null +++ b/backend/tests/unit/test_pypi_proxy.py @@ -0,0 +1,85 @@ +"""Unit tests for PyPI proxy functionality.""" + +import pytest +from app.pypi_proxy import _parse_requires_dist + + +class TestParseRequiresDist: + """Tests for _parse_requires_dist function.""" + + def test_simple_package(self): + """Test parsing a simple package name.""" + name, version = _parse_requires_dist("numpy") + assert name == "numpy" + assert version is None + + def test_package_with_version(self): + """Test parsing package with version constraint.""" + name, version = _parse_requires_dist("numpy>=1.21.0") + assert name == "numpy" + assert version == ">=1.21.0" + + def test_package_with_parenthesized_version(self): + """Test parsing package with parenthesized version.""" + name, version = _parse_requires_dist("requests (>=2.25.0)") + assert name == "requests" + assert version == ">=2.25.0" + + def test_package_with_python_version_marker(self): + """Test that python_version markers are preserved but marker stripped.""" + name, version = _parse_requires_dist("typing-extensions; python_version < '3.8'") + assert name == "typing-extensions" + assert version is None + + def test_filters_extra_dependencies(self): + """Test that extra dependencies are filtered out.""" + # Extra dependencies should return (None, None) + name, version = _parse_requires_dist("pytest; extra == 'test'") + assert name is None + assert version is None + + name, version = _parse_requires_dist("sphinx; extra == 'docs'") + assert name is None + assert version is None + + def test_filters_platform_specific_darwin(self): + """Test that macOS-specific dependencies are filtered out.""" + name, version = _parse_requires_dist("pyobjc; sys_platform == 'darwin'") + assert name is None + assert version is None + + def test_filters_platform_specific_win32(self): + """Test that Windows-specific dependencies are filtered out.""" + name, version = _parse_requires_dist("pywin32; sys_platform == 'win32'") + assert name is None + assert version is None + + def test_filters_platform_system_marker(self): + """Test that platform_system markers are filtered out.""" + name, version = _parse_requires_dist("jaraco-windows; platform_system == 'Windows'") + assert name is None + assert version is None + + def test_normalizes_package_name(self): + """Test that package names are normalized (PEP 503).""" + name, version = _parse_requires_dist("Typing_Extensions>=3.7.4") + assert name == "typing-extensions" + assert version == ">=3.7.4" + + def test_complex_version_constraint(self): + """Test parsing complex version constraints.""" + name, version = _parse_requires_dist("gast!=0.5.0,!=0.5.1,!=0.5.2,>=0.2.1") + assert name == "gast" + assert version == "!=0.5.0,!=0.5.1,!=0.5.2,>=0.2.1" + + def test_version_range(self): + """Test parsing version range constraints.""" + name, version = _parse_requires_dist("grpcio<2.0,>=1.24.3") + assert name == "grpcio" + assert version == "<2.0,>=1.24.3" + + def test_tilde_version(self): + """Test parsing tilde version constraints.""" + name, version = _parse_requires_dist("tensorboard~=2.20.0") + assert name == "tensorboard" + assert version == "~=2.20.0"