feat: add auto-fetch for missing dependencies from upstream registries

Add auto_fetch parameter to dependency resolution endpoint that fetches
missing dependencies from upstream registries (PyPI) when resolving.

- Add RegistryClient abstraction with PyPIRegistryClient implementation
- Extract fetch_and_cache_pypi_package() for reuse
- Add resolve_dependencies_with_fetch() async function
- Extend MissingDependency schema with fetch_attempted/fetch_error
- Add fetched list to DependencyResolutionResponse
- Add auto_fetch_max_depth config setting (default: 3)
- Remove Usage section from Package page UI
- Add 6 integration tests for auto-fetch functionality
This commit is contained in:
Mondo Diaz
2026-02-04 12:01:49 -06:00
parent b82bd1c85a
commit 5cff4092e3
10 changed files with 1348 additions and 65 deletions

View File

@@ -11,11 +11,18 @@ Handles:
"""
import re
import logging
import yaml
from typing import List, Dict, Any, Optional, Set, Tuple
from typing import List, Dict, Any, Optional, Set, Tuple, TYPE_CHECKING
from sqlalchemy.orm import Session
from sqlalchemy import and_
if TYPE_CHECKING:
from .storage import S3Storage
from .registry_client import RegistryClient
logger = logging.getLogger(__name__)
# Import packaging for PEP 440 version matching
try:
from packaging.specifiers import SpecifierSet, InvalidSpecifier
@@ -848,6 +855,334 @@ def resolve_dependencies(
},
resolved=resolved_list,
missing=missing_dependencies,
fetched=[], # No fetching in sync version
total_size=total_size,
artifact_count=len(resolved_list),
)
# System project mapping for auto-fetch
SYSTEM_PROJECT_REGISTRY_MAP = {
"_pypi": "pypi",
"_npm": "npm",
"_maven": "maven",
}
async def resolve_dependencies_with_fetch(
db: Session,
project_name: str,
package_name: str,
ref: str,
base_url: str,
storage: "S3Storage",
registry_clients: Dict[str, "RegistryClient"],
max_fetch_depth: int = 3,
) -> DependencyResolutionResponse:
"""
Resolve all dependencies for an artifact recursively, fetching missing ones from upstream.
This async version extends the basic resolution with auto-fetch capability:
when a missing dependency is from a system project (e.g., _pypi), it attempts
to fetch the package from the corresponding upstream registry.
Args:
db: Database session
project_name: Project name
package_name: Package name
ref: Version reference (or artifact:hash)
base_url: Base URL for download URLs
storage: S3 storage for caching fetched artifacts
registry_clients: Map of system project to registry client {"_pypi": PyPIRegistryClient}
max_fetch_depth: Maximum depth for auto-fetching (prevents runaway fetching)
Returns:
DependencyResolutionResponse with all resolved artifacts and fetch status
Raises:
DependencyNotFoundError: If the root artifact cannot be found
CircularDependencyError: If circular dependencies are detected
DependencyConflictError: If conflicting versions are required
"""
# Resolve the initial artifact (same as sync version)
project = db.query(Project).filter(Project.name == project_name).first()
if not project:
raise DependencyNotFoundError(project_name, package_name, ref)
package = db.query(Package).filter(
Package.project_id == project.id,
Package.name == package_name,
).first()
if not package:
raise DependencyNotFoundError(project_name, package_name, ref)
# Handle artifact: prefix for direct artifact ID references
if ref.startswith("artifact:"):
artifact_id = ref[9:]
artifact = db.query(Artifact).filter(Artifact.id == artifact_id).first()
if not artifact:
raise DependencyNotFoundError(project_name, package_name, ref)
root_artifact_id = artifact.id
root_version = artifact_id[:12]
root_size = artifact.size
else:
resolved = _resolve_dependency_to_artifact(
db, project_name, package_name, ref
)
if not resolved:
raise DependencyNotFoundError(project_name, package_name, ref)
root_artifact_id, root_version, root_size = resolved
# Track state
resolved_artifacts: Dict[str, ResolvedArtifact] = {}
missing_dependencies: List[MissingDependency] = []
fetched_artifacts: List[ResolvedArtifact] = [] # Newly fetched
version_requirements: Dict[str, List[Dict[str, Any]]] = {}
visiting: Set[str] = set()
visited: Set[str] = set()
current_path: Dict[str, str] = {}
resolution_order: List[str] = []
# Track fetch attempts to prevent loops
fetch_attempted: Set[str] = set() # "project/package@constraint"
async def _try_fetch_dependency(
dep_project: str,
dep_package: str,
constraint: str,
required_by: str,
fetch_depth: int,
) -> Optional[Tuple[str, str, int]]:
"""
Try to fetch a missing dependency from upstream registry.
Returns (artifact_id, version, size) if successful, None otherwise.
"""
# Only fetch from system projects
registry_type = SYSTEM_PROJECT_REGISTRY_MAP.get(dep_project)
if not registry_type:
logger.debug(
f"Not a system project, skipping fetch: {dep_project}/{dep_package}"
)
return None
# Check fetch depth
if fetch_depth > max_fetch_depth:
logger.info(
f"Max fetch depth ({max_fetch_depth}) exceeded for {dep_project}/{dep_package}"
)
return None
# Build fetch key for loop prevention
fetch_key = f"{dep_project}/{dep_package}@{constraint}"
if fetch_key in fetch_attempted:
logger.debug(f"Already attempted fetch for {fetch_key}")
return None
fetch_attempted.add(fetch_key)
# Get registry client
client = registry_clients.get(dep_project)
if not client:
logger.debug(f"No registry client for {dep_project}")
return None
try:
# Resolve version constraint
version_info = await client.resolve_constraint(dep_package, constraint)
if not version_info:
logger.info(
f"No version of {dep_package} matches constraint '{constraint}' on upstream"
)
return None
# Fetch and cache the package
fetch_result = await client.fetch_package(
dep_package, version_info, db, storage
)
if not fetch_result:
logger.warning(f"Failed to fetch {dep_package}=={version_info.version}")
return None
logger.info(
f"Successfully fetched {dep_package}=={version_info.version} "
f"(artifact {fetch_result.artifact_id[:12]})"
)
# Add to fetched list for response
fetched_artifacts.append(ResolvedArtifact(
artifact_id=fetch_result.artifact_id,
project=dep_project,
package=dep_package,
version=fetch_result.version,
size=fetch_result.size,
download_url=f"{base_url}/api/v1/project/{dep_project}/{dep_package}/+/{fetch_result.version}",
))
return (fetch_result.artifact_id, fetch_result.version, fetch_result.size)
except Exception as e:
logger.warning(f"Error fetching {dep_package}: {e}")
return None
async def _resolve_recursive_async(
artifact_id: str,
proj_name: str,
pkg_name: str,
version_or_tag: str,
size: int,
required_by: Optional[str],
depth: int = 0,
fetch_depth: int = 0,
):
"""Recursively resolve dependencies with fetch capability."""
if depth > MAX_DEPENDENCY_DEPTH:
raise DependencyDepthExceededError(MAX_DEPENDENCY_DEPTH)
pkg_key = f"{proj_name}/{pkg_name}"
# Cycle detection
if artifact_id in visiting:
cycle_start = current_path.get(artifact_id, pkg_key)
cycle = [cycle_start, pkg_key]
raise CircularDependencyError(cycle)
# Conflict detection
if pkg_key in version_requirements:
existing_versions = {r["version"] for r in version_requirements[pkg_key]}
if version_or_tag not in existing_versions:
requirements = version_requirements[pkg_key] + [
{"version": version_or_tag, "required_by": required_by}
]
raise DependencyConflictError([
DependencyConflict(
project=proj_name,
package=pkg_name,
requirements=[
{
"version": r["version"],
"required_by": [{"path": r["required_by"]}] if r["required_by"] else []
}
for r in requirements
],
)
])
if artifact_id in visited:
return
if artifact_id in visited:
return
visiting.add(artifact_id)
current_path[artifact_id] = pkg_key
if pkg_key not in version_requirements:
version_requirements[pkg_key] = []
version_requirements[pkg_key].append({
"version": version_or_tag,
"required_by": required_by,
})
# Get dependencies
deps = db.query(ArtifactDependency).filter(
ArtifactDependency.artifact_id == artifact_id
).all()
for dep in deps:
# Skip self-dependencies
dep_proj_normalized = dep.dependency_project.lower()
dep_pkg_normalized = _normalize_pypi_package_name(dep.dependency_package)
curr_proj_normalized = proj_name.lower()
curr_pkg_normalized = _normalize_pypi_package_name(pkg_name)
if dep_proj_normalized == curr_proj_normalized and dep_pkg_normalized == curr_pkg_normalized:
continue
resolved_dep = _resolve_dependency_to_artifact(
db,
dep.dependency_project,
dep.dependency_package,
dep.version_constraint,
)
if not resolved_dep:
# Try to fetch from upstream if it's a system project
fetched = await _try_fetch_dependency(
dep.dependency_project,
dep.dependency_package,
dep.version_constraint,
pkg_key,
fetch_depth + 1,
)
if fetched:
resolved_dep = fetched
else:
# Still missing - add to missing list with fetch status
fetch_key = f"{dep.dependency_project}/{dep.dependency_package}@{dep.version_constraint}"
was_attempted = fetch_key in fetch_attempted
missing_dependencies.append(MissingDependency(
project=dep.dependency_project,
package=dep.dependency_package,
constraint=dep.version_constraint,
required_by=pkg_key,
fetch_attempted=was_attempted,
fetch_error="Max fetch depth exceeded" if was_attempted and fetch_depth >= max_fetch_depth else None,
))
continue
dep_artifact_id, dep_version, dep_size = resolved_dep
if dep_artifact_id == artifact_id:
continue
await _resolve_recursive_async(
dep_artifact_id,
dep.dependency_project,
dep.dependency_package,
dep_version,
dep_size,
pkg_key,
depth + 1,
fetch_depth + 1 if dep_artifact_id in [f.artifact_id for f in fetched_artifacts] else fetch_depth,
)
visiting.remove(artifact_id)
del current_path[artifact_id]
visited.add(artifact_id)
resolution_order.append(artifact_id)
resolved_artifacts[artifact_id] = ResolvedArtifact(
artifact_id=artifact_id,
project=proj_name,
package=pkg_name,
version=version_or_tag,
size=size,
download_url=f"{base_url}/api/v1/project/{proj_name}/{pkg_name}/+/{version_or_tag}",
)
# Start resolution from root
await _resolve_recursive_async(
root_artifact_id,
project_name,
package_name,
root_version,
root_size,
None,
)
# Build response in topological order
resolved_list = [resolved_artifacts[aid] for aid in resolution_order]
total_size = sum(r.size for r in resolved_list)
return DependencyResolutionResponse(
requested={
"project": project_name,
"package": package_name,
"ref": ref,
},
resolved=resolved_list,
missing=missing_dependencies,
fetched=fetched_artifacts,
total_size=total_size,
artifact_count=len(resolved_list),
)