fix: filter platform-specific and extra dependencies in PyPI proxy
The dependency parser was stripping environment markers but not checking if they indicated optional or platform-specific packages. This caused packages like jaraco.path to pull in pyobjc (324 sub-packages) even on non-macOS systems. Changes: - Filter dependencies with 'extra ==' markers (optional extras) - Filter dependencies with 'sys_platform' or 'platform_system' markers - Add diagnostic logging for depth exceeded errors - Add unit tests for dependency filtering Fixes tensorflow dependency resolution exceeding max depth.
This commit is contained in:
@@ -730,6 +730,8 @@ def resolve_dependencies(
|
|||||||
current_path: Dict[str, str] = {}
|
current_path: Dict[str, str] = {}
|
||||||
# Resolution order (topological)
|
# Resolution order (topological)
|
||||||
resolution_order: List[str] = []
|
resolution_order: List[str] = []
|
||||||
|
# Track resolution path for debugging
|
||||||
|
resolution_path_sync: List[str] = []
|
||||||
|
|
||||||
def _resolve_recursive(
|
def _resolve_recursive(
|
||||||
artifact_id: str,
|
artifact_id: str,
|
||||||
@@ -741,12 +743,16 @@ def resolve_dependencies(
|
|||||||
depth: int = 0,
|
depth: int = 0,
|
||||||
):
|
):
|
||||||
"""Recursively resolve dependencies with cycle/conflict detection."""
|
"""Recursively resolve dependencies with cycle/conflict detection."""
|
||||||
|
pkg_key = f"{proj_name}/{pkg_name}"
|
||||||
|
|
||||||
# Safety limit: prevent DoS through deeply nested dependencies
|
# Safety limit: prevent DoS through deeply nested dependencies
|
||||||
if depth > MAX_DEPENDENCY_DEPTH:
|
if depth > MAX_DEPENDENCY_DEPTH:
|
||||||
|
logger.error(
|
||||||
|
f"Dependency depth exceeded at {pkg_key} (depth={depth}). "
|
||||||
|
f"Resolution path: {' -> '.join(resolution_path_sync[-20:])}"
|
||||||
|
)
|
||||||
raise DependencyDepthExceededError(MAX_DEPENDENCY_DEPTH)
|
raise DependencyDepthExceededError(MAX_DEPENDENCY_DEPTH)
|
||||||
|
|
||||||
pkg_key = f"{proj_name}/{pkg_name}"
|
|
||||||
|
|
||||||
# Cycle detection (at artifact level)
|
# Cycle detection (at artifact level)
|
||||||
if artifact_id in visiting:
|
if artifact_id in visiting:
|
||||||
# Build cycle path from current_path
|
# Build cycle path from current_path
|
||||||
@@ -770,6 +776,9 @@ def resolve_dependencies(
|
|||||||
if artifact_id in visited:
|
if artifact_id in visited:
|
||||||
return
|
return
|
||||||
|
|
||||||
|
# Track path for debugging (only after early-return checks)
|
||||||
|
resolution_path_sync.append(f"{pkg_key}@{version_or_tag}")
|
||||||
|
|
||||||
visiting.add(artifact_id)
|
visiting.add(artifact_id)
|
||||||
current_path[artifact_id] = pkg_key
|
current_path[artifact_id] = pkg_key
|
||||||
|
|
||||||
@@ -838,6 +847,7 @@ def resolve_dependencies(
|
|||||||
visiting.remove(artifact_id)
|
visiting.remove(artifact_id)
|
||||||
del current_path[artifact_id]
|
del current_path[artifact_id]
|
||||||
visited.add(artifact_id)
|
visited.add(artifact_id)
|
||||||
|
resolution_path_sync.pop()
|
||||||
|
|
||||||
# Add to resolution order (dependencies before dependents)
|
# Add to resolution order (dependencies before dependents)
|
||||||
resolution_order.append(artifact_id)
|
resolution_order.append(artifact_id)
|
||||||
@@ -1086,6 +1096,9 @@ async def resolve_dependencies_with_fetch(
|
|||||||
logger.warning(f"Error fetching {dep_package}: {e}")
|
logger.warning(f"Error fetching {dep_package}: {e}")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
# Track resolution path for debugging
|
||||||
|
resolution_path: List[str] = []
|
||||||
|
|
||||||
async def _resolve_recursive_async(
|
async def _resolve_recursive_async(
|
||||||
artifact_id: str,
|
artifact_id: str,
|
||||||
proj_name: str,
|
proj_name: str,
|
||||||
@@ -1096,11 +1109,15 @@ async def resolve_dependencies_with_fetch(
|
|||||||
depth: int = 0,
|
depth: int = 0,
|
||||||
):
|
):
|
||||||
"""Recursively resolve dependencies with fetch capability."""
|
"""Recursively resolve dependencies with fetch capability."""
|
||||||
if depth > MAX_DEPENDENCY_DEPTH:
|
|
||||||
raise DependencyDepthExceededError(MAX_DEPENDENCY_DEPTH)
|
|
||||||
|
|
||||||
pkg_key = f"{proj_name}/{pkg_name}"
|
pkg_key = f"{proj_name}/{pkg_name}"
|
||||||
|
|
||||||
|
if depth > MAX_DEPENDENCY_DEPTH:
|
||||||
|
logger.error(
|
||||||
|
f"Dependency depth exceeded at {pkg_key} (depth={depth}). "
|
||||||
|
f"Resolution path: {' -> '.join(resolution_path[-20:])}"
|
||||||
|
)
|
||||||
|
raise DependencyDepthExceededError(MAX_DEPENDENCY_DEPTH)
|
||||||
|
|
||||||
# Cycle detection
|
# Cycle detection
|
||||||
if artifact_id in visiting:
|
if artifact_id in visiting:
|
||||||
cycle_start = current_path.get(artifact_id, pkg_key)
|
cycle_start = current_path.get(artifact_id, pkg_key)
|
||||||
@@ -1123,6 +1140,9 @@ async def resolve_dependencies_with_fetch(
|
|||||||
if artifact_id in visited:
|
if artifact_id in visited:
|
||||||
return
|
return
|
||||||
|
|
||||||
|
# Track path for debugging (only after early-return checks)
|
||||||
|
resolution_path.append(f"{pkg_key}@{version_or_tag}")
|
||||||
|
|
||||||
visiting.add(artifact_id)
|
visiting.add(artifact_id)
|
||||||
current_path[artifact_id] = pkg_key
|
current_path[artifact_id] = pkg_key
|
||||||
|
|
||||||
@@ -1235,6 +1255,7 @@ async def resolve_dependencies_with_fetch(
|
|||||||
visiting.remove(artifact_id)
|
visiting.remove(artifact_id)
|
||||||
del current_path[artifact_id]
|
del current_path[artifact_id]
|
||||||
visited.add(artifact_id)
|
visited.add(artifact_id)
|
||||||
|
resolution_path.pop()
|
||||||
|
|
||||||
resolution_order.append(artifact_id)
|
resolution_order.append(artifact_id)
|
||||||
|
|
||||||
|
|||||||
@@ -47,17 +47,36 @@ PROXY_READ_TIMEOUT = 60.0
|
|||||||
def _parse_requires_dist(requires_dist: str) -> Tuple[str, Optional[str]]:
|
def _parse_requires_dist(requires_dist: str) -> Tuple[str, Optional[str]]:
|
||||||
"""Parse a Requires-Dist line into (package_name, version_constraint).
|
"""Parse a Requires-Dist line into (package_name, version_constraint).
|
||||||
|
|
||||||
|
Filters out optional/extra dependencies and platform-specific dependencies
|
||||||
|
to avoid pulling in unnecessary packages during dependency resolution.
|
||||||
|
|
||||||
Examples:
|
Examples:
|
||||||
"requests (>=2.25.0)" -> ("requests", ">=2.25.0")
|
"requests (>=2.25.0)" -> ("requests", ">=2.25.0")
|
||||||
"typing-extensions; python_version < '3.8'" -> ("typing-extensions", None)
|
"typing-extensions; python_version < '3.8'" -> ("typing-extensions", None)
|
||||||
"numpy>=1.21.0" -> ("numpy", ">=1.21.0")
|
"numpy>=1.21.0" -> ("numpy", ">=1.21.0")
|
||||||
"certifi" -> ("certifi", None)
|
"certifi" -> ("certifi", None)
|
||||||
|
"pytest; extra == 'test'" -> (None, None) # Filtered: extra dependency
|
||||||
|
"pyobjc; sys_platform == 'darwin'" -> (None, None) # Filtered: platform-specific
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Tuple of (normalized_package_name, version_constraint or None)
|
Tuple of (normalized_package_name, version_constraint or None)
|
||||||
|
Returns (None, None) for dependencies that should be filtered out.
|
||||||
"""
|
"""
|
||||||
# Remove any environment markers (after semicolon)
|
# Check for and filter environment markers (after semicolon)
|
||||||
if ';' in requires_dist:
|
if ';' in requires_dist:
|
||||||
|
marker_part = requires_dist.split(';', 1)[1].lower()
|
||||||
|
|
||||||
|
# Filter out extra/optional dependencies - these are not core dependencies
|
||||||
|
# Examples: "pytest; extra == 'test'", "sphinx; extra == 'docs'"
|
||||||
|
if 'extra' in marker_part:
|
||||||
|
return None, None
|
||||||
|
|
||||||
|
# Filter out platform-specific dependencies to avoid cross-platform bloat
|
||||||
|
# Examples: "pyobjc; sys_platform == 'darwin'", "pywin32; sys_platform == 'win32'"
|
||||||
|
if 'sys_platform' in marker_part or 'platform_system' in marker_part:
|
||||||
|
return None, None
|
||||||
|
|
||||||
|
# Strip the marker for remaining dependencies (like python_version constraints)
|
||||||
requires_dist = requires_dist.split(';')[0].strip()
|
requires_dist = requires_dist.split(';')[0].strip()
|
||||||
|
|
||||||
# Match patterns like "package (>=1.0)" or "package>=1.0" or "package"
|
# Match patterns like "package (>=1.0)" or "package>=1.0" or "package"
|
||||||
|
|||||||
85
backend/tests/unit/test_pypi_proxy.py
Normal file
85
backend/tests/unit/test_pypi_proxy.py
Normal file
@@ -0,0 +1,85 @@
|
|||||||
|
"""Unit tests for PyPI proxy functionality."""
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
from app.pypi_proxy import _parse_requires_dist
|
||||||
|
|
||||||
|
|
||||||
|
class TestParseRequiresDist:
|
||||||
|
"""Tests for _parse_requires_dist function."""
|
||||||
|
|
||||||
|
def test_simple_package(self):
|
||||||
|
"""Test parsing a simple package name."""
|
||||||
|
name, version = _parse_requires_dist("numpy")
|
||||||
|
assert name == "numpy"
|
||||||
|
assert version is None
|
||||||
|
|
||||||
|
def test_package_with_version(self):
|
||||||
|
"""Test parsing package with version constraint."""
|
||||||
|
name, version = _parse_requires_dist("numpy>=1.21.0")
|
||||||
|
assert name == "numpy"
|
||||||
|
assert version == ">=1.21.0"
|
||||||
|
|
||||||
|
def test_package_with_parenthesized_version(self):
|
||||||
|
"""Test parsing package with parenthesized version."""
|
||||||
|
name, version = _parse_requires_dist("requests (>=2.25.0)")
|
||||||
|
assert name == "requests"
|
||||||
|
assert version == ">=2.25.0"
|
||||||
|
|
||||||
|
def test_package_with_python_version_marker(self):
|
||||||
|
"""Test that python_version markers are preserved but marker stripped."""
|
||||||
|
name, version = _parse_requires_dist("typing-extensions; python_version < '3.8'")
|
||||||
|
assert name == "typing-extensions"
|
||||||
|
assert version is None
|
||||||
|
|
||||||
|
def test_filters_extra_dependencies(self):
|
||||||
|
"""Test that extra dependencies are filtered out."""
|
||||||
|
# Extra dependencies should return (None, None)
|
||||||
|
name, version = _parse_requires_dist("pytest; extra == 'test'")
|
||||||
|
assert name is None
|
||||||
|
assert version is None
|
||||||
|
|
||||||
|
name, version = _parse_requires_dist("sphinx; extra == 'docs'")
|
||||||
|
assert name is None
|
||||||
|
assert version is None
|
||||||
|
|
||||||
|
def test_filters_platform_specific_darwin(self):
|
||||||
|
"""Test that macOS-specific dependencies are filtered out."""
|
||||||
|
name, version = _parse_requires_dist("pyobjc; sys_platform == 'darwin'")
|
||||||
|
assert name is None
|
||||||
|
assert version is None
|
||||||
|
|
||||||
|
def test_filters_platform_specific_win32(self):
|
||||||
|
"""Test that Windows-specific dependencies are filtered out."""
|
||||||
|
name, version = _parse_requires_dist("pywin32; sys_platform == 'win32'")
|
||||||
|
assert name is None
|
||||||
|
assert version is None
|
||||||
|
|
||||||
|
def test_filters_platform_system_marker(self):
|
||||||
|
"""Test that platform_system markers are filtered out."""
|
||||||
|
name, version = _parse_requires_dist("jaraco-windows; platform_system == 'Windows'")
|
||||||
|
assert name is None
|
||||||
|
assert version is None
|
||||||
|
|
||||||
|
def test_normalizes_package_name(self):
|
||||||
|
"""Test that package names are normalized (PEP 503)."""
|
||||||
|
name, version = _parse_requires_dist("Typing_Extensions>=3.7.4")
|
||||||
|
assert name == "typing-extensions"
|
||||||
|
assert version == ">=3.7.4"
|
||||||
|
|
||||||
|
def test_complex_version_constraint(self):
|
||||||
|
"""Test parsing complex version constraints."""
|
||||||
|
name, version = _parse_requires_dist("gast!=0.5.0,!=0.5.1,!=0.5.2,>=0.2.1")
|
||||||
|
assert name == "gast"
|
||||||
|
assert version == "!=0.5.0,!=0.5.1,!=0.5.2,>=0.2.1"
|
||||||
|
|
||||||
|
def test_version_range(self):
|
||||||
|
"""Test parsing version range constraints."""
|
||||||
|
name, version = _parse_requires_dist("grpcio<2.0,>=1.24.3")
|
||||||
|
assert name == "grpcio"
|
||||||
|
assert version == "<2.0,>=1.24.3"
|
||||||
|
|
||||||
|
def test_tilde_version(self):
|
||||||
|
"""Test parsing tilde version constraints."""
|
||||||
|
name, version = _parse_requires_dist("tensorboard~=2.20.0")
|
||||||
|
assert name == "tensorboard"
|
||||||
|
assert version == "~=2.20.0"
|
||||||
Reference in New Issue
Block a user