fix: filter platform-specific and extra dependencies in PyPI proxy

The dependency parser was stripping environment markers but not checking
if they indicated optional or platform-specific packages. This caused
packages like jaraco.path to pull in pyobjc (324 sub-packages) even on
non-macOS systems.

Changes:
- Filter dependencies with 'extra ==' markers (optional extras)
- Filter dependencies with 'sys_platform' or 'platform_system' markers
- Add diagnostic logging for depth exceeded errors
- Add unit tests for dependency filtering

Fixes tensorflow dependency resolution exceeding max depth.
This commit is contained in:
Mondo Diaz
2026-02-04 14:56:59 -06:00
parent a45ec46e94
commit 1389a03c69
3 changed files with 131 additions and 6 deletions

View File

@@ -730,6 +730,8 @@ def resolve_dependencies(
current_path: Dict[str, str] = {}
# Resolution order (topological)
resolution_order: List[str] = []
# Track resolution path for debugging
resolution_path_sync: List[str] = []
def _resolve_recursive(
artifact_id: str,
@@ -741,12 +743,16 @@ def resolve_dependencies(
depth: int = 0,
):
"""Recursively resolve dependencies with cycle/conflict detection."""
pkg_key = f"{proj_name}/{pkg_name}"
# Safety limit: prevent DoS through deeply nested dependencies
if depth > MAX_DEPENDENCY_DEPTH:
logger.error(
f"Dependency depth exceeded at {pkg_key} (depth={depth}). "
f"Resolution path: {' -> '.join(resolution_path_sync[-20:])}"
)
raise DependencyDepthExceededError(MAX_DEPENDENCY_DEPTH)
pkg_key = f"{proj_name}/{pkg_name}"
# Cycle detection (at artifact level)
if artifact_id in visiting:
# Build cycle path from current_path
@@ -770,6 +776,9 @@ def resolve_dependencies(
if artifact_id in visited:
return
# Track path for debugging (only after early-return checks)
resolution_path_sync.append(f"{pkg_key}@{version_or_tag}")
visiting.add(artifact_id)
current_path[artifact_id] = pkg_key
@@ -838,6 +847,7 @@ def resolve_dependencies(
visiting.remove(artifact_id)
del current_path[artifact_id]
visited.add(artifact_id)
resolution_path_sync.pop()
# Add to resolution order (dependencies before dependents)
resolution_order.append(artifact_id)
@@ -1086,6 +1096,9 @@ async def resolve_dependencies_with_fetch(
logger.warning(f"Error fetching {dep_package}: {e}")
return None
# Track resolution path for debugging
resolution_path: List[str] = []
async def _resolve_recursive_async(
artifact_id: str,
proj_name: str,
@@ -1096,11 +1109,15 @@ async def resolve_dependencies_with_fetch(
depth: int = 0,
):
"""Recursively resolve dependencies with fetch capability."""
if depth > MAX_DEPENDENCY_DEPTH:
raise DependencyDepthExceededError(MAX_DEPENDENCY_DEPTH)
pkg_key = f"{proj_name}/{pkg_name}"
if depth > MAX_DEPENDENCY_DEPTH:
logger.error(
f"Dependency depth exceeded at {pkg_key} (depth={depth}). "
f"Resolution path: {' -> '.join(resolution_path[-20:])}"
)
raise DependencyDepthExceededError(MAX_DEPENDENCY_DEPTH)
# Cycle detection
if artifact_id in visiting:
cycle_start = current_path.get(artifact_id, pkg_key)
@@ -1123,6 +1140,9 @@ async def resolve_dependencies_with_fetch(
if artifact_id in visited:
return
# Track path for debugging (only after early-return checks)
resolution_path.append(f"{pkg_key}@{version_or_tag}")
visiting.add(artifact_id)
current_path[artifact_id] = pkg_key
@@ -1235,6 +1255,7 @@ async def resolve_dependencies_with_fetch(
visiting.remove(artifact_id)
del current_path[artifact_id]
visited.add(artifact_id)
resolution_path.pop()
resolution_order.append(artifact_id)