Fix nested dependency depth tracking in PyPI cache worker

When the cache worker downloaded a package through the proxy, dependencies
were always queued with depth=0 instead of depth+1. This meant depth limits
weren't properly enforced for nested dependencies.

Changes:
- Add cache-depth query parameter to pypi_download_file endpoint
- Worker now passes its current depth when fetching packages
- Dependencies are queued at cache_depth+1 instead of hardcoded 0
- Add tests for depth tracking behavior
This commit is contained in:
Mondo Diaz
2026-02-02 13:47:22 -06:00
parent c7eca269f4
commit 5517048f05
3 changed files with 120 additions and 4 deletions

View File

@@ -192,8 +192,8 @@ def _process_cache_task(task_id: UUID):
_mark_task_failed(db, task, f"Max depth {max_depth} exceeded")
return
# Do the actual caching
result = _fetch_and_cache_package(task.package_name, task.version_constraint)
# Do the actual caching - pass depth so nested deps are queued at depth+1
result = _fetch_and_cache_package(task.package_name, task.version_constraint, depth=task.depth)
if result["success"]:
_mark_task_completed(db, task, cached_artifact_id=result.get("artifact_id"))
@@ -256,6 +256,7 @@ def _find_cached_package(db: Session, package_name: str) -> Optional[str]:
def _fetch_and_cache_package(
package_name: str,
version_constraint: Optional[str] = None,
depth: int = 0,
) -> dict:
"""
Fetch and cache a PyPI package by making requests through our own proxy.
@@ -263,6 +264,7 @@ def _fetch_and_cache_package(
Args:
package_name: The package name to cache.
version_constraint: Optional version constraint (currently not used for selection).
depth: Current recursion depth for dependency tracking.
Returns:
Dict with "success" bool, "artifact_id" on success, "error" on failure.
@@ -317,6 +319,11 @@ def _fetch_and_cache_package(
elif not download_url.startswith("http"):
download_url = f"{base_url}/pypi/simple/{normalized_name}/{download_url}"
# Add cache-depth query parameter to track recursion depth
# The proxy will queue dependencies at depth+1
separator = "&" if "?" in download_url else "?"
download_url = f"{download_url}{separator}cache-depth={depth}"
# Step 3: Download the file through our proxy (this caches it)
logger.debug(f"Downloading: {download_url}")
response = client.get(download_url)
@@ -337,6 +344,10 @@ def _fetch_and_cache_package(
return {"success": False, "error": str(e)}
# Alias for backward compatibility and clearer naming
_fetch_and_cache_package_with_depth = _fetch_and_cache_package
def _mark_task_completed(
db: Session,
task: PyPICacheTask,