Fix nested dependency depth tracking in PyPI cache worker

When the cache worker downloaded a package through the proxy, dependencies
were always queued with depth=0 instead of depth+1. This meant depth limits
weren't properly enforced for nested dependencies.

Changes:
- Add cache-depth query parameter to pypi_download_file endpoint
- Worker now passes its current depth when fetching packages
- Dependencies are queued at cache_depth+1 instead of hardcoded 0
- Add tests for depth tracking behavior
This commit is contained in:
Mondo Diaz
2026-02-02 13:47:22 -06:00
parent c7eca269f4
commit 5517048f05
3 changed files with 120 additions and 4 deletions

View File

@@ -192,8 +192,8 @@ def _process_cache_task(task_id: UUID):
_mark_task_failed(db, task, f"Max depth {max_depth} exceeded")
return
# Do the actual caching
result = _fetch_and_cache_package(task.package_name, task.version_constraint)
# Do the actual caching - pass depth so nested deps are queued at depth+1
result = _fetch_and_cache_package(task.package_name, task.version_constraint, depth=task.depth)
if result["success"]:
_mark_task_completed(db, task, cached_artifact_id=result.get("artifact_id"))
@@ -256,6 +256,7 @@ def _find_cached_package(db: Session, package_name: str) -> Optional[str]:
def _fetch_and_cache_package(
package_name: str,
version_constraint: Optional[str] = None,
depth: int = 0,
) -> dict:
"""
Fetch and cache a PyPI package by making requests through our own proxy.
@@ -263,6 +264,7 @@ def _fetch_and_cache_package(
Args:
package_name: The package name to cache.
version_constraint: Optional version constraint (currently not used for selection).
depth: Current recursion depth for dependency tracking.
Returns:
Dict with "success" bool, "artifact_id" on success, "error" on failure.
@@ -317,6 +319,11 @@ def _fetch_and_cache_package(
elif not download_url.startswith("http"):
download_url = f"{base_url}/pypi/simple/{normalized_name}/{download_url}"
# Add cache-depth query parameter to track recursion depth
# The proxy will queue dependencies at depth+1
separator = "&" if "?" in download_url else "?"
download_url = f"{download_url}{separator}cache-depth={depth}"
# Step 3: Download the file through our proxy (this caches it)
logger.debug(f"Downloading: {download_url}")
response = client.get(download_url)
@@ -337,6 +344,10 @@ def _fetch_and_cache_package(
return {"success": False, "error": str(e)}
# Alias for backward compatibility and clearer naming
_fetch_and_cache_package_with_depth = _fetch_and_cache_package
def _mark_task_completed(
db: Session,
task: PyPICacheTask,

View File

@@ -516,6 +516,7 @@ async def pypi_download_file(
package_name: str,
filename: str,
upstream: Optional[str] = None,
cache_depth: int = Query(default=0, ge=0, le=100, alias="cache-depth"),
db: Session = Depends(get_db),
storage: S3Storage = Depends(get_storage),
):
@@ -526,6 +527,7 @@ async def pypi_download_file(
package_name: The package name
filename: The filename to download
upstream: URL-encoded upstream URL to fetch from
cache_depth: Current cache recursion depth (used by cache worker for nested deps)
"""
if not upstream:
raise HTTPException(
@@ -772,17 +774,19 @@ async def pypi_download_file(
db.add(dep)
# Proactively cache dependencies via task queue
# Dependencies are queued at cache_depth + 1 to track recursion
if unique_deps:
next_depth = cache_depth + 1
for dep_name, dep_version in unique_deps:
enqueue_cache_task(
db,
package_name=dep_name,
version_constraint=dep_version,
parent_task_id=None, # Top-level, triggered by user download
depth=0,
depth=next_depth,
triggered_by_artifact=sha256,
)
logger.info(f"PyPI proxy: queued {len(unique_deps)} dependencies for caching")
logger.info(f"PyPI proxy: queued {len(unique_deps)} dependencies for caching (depth={next_depth})")
db.commit()