feat: add auto-fetch for missing dependencies from upstream registries
Add auto_fetch parameter to dependency resolution endpoint that fetches missing dependencies from upstream registries (PyPI) when resolving. - Add RegistryClient abstraction with PyPIRegistryClient implementation - Extract fetch_and_cache_pypi_package() for reuse - Add resolve_dependencies_with_fetch() async function - Extend MissingDependency schema with fetch_attempted/fetch_error - Add fetched list to DependencyResolutionResponse - Add auto_fetch_max_depth config setting (default: 3) - Remove Usage section from Package page UI - Add 6 integration tests for auto-fetch functionality
This commit is contained in:
@@ -11,11 +11,18 @@ Handles:
|
||||
"""
|
||||
|
||||
import re
|
||||
import logging
|
||||
import yaml
|
||||
from typing import List, Dict, Any, Optional, Set, Tuple
|
||||
from typing import List, Dict, Any, Optional, Set, Tuple, TYPE_CHECKING
|
||||
from sqlalchemy.orm import Session
|
||||
from sqlalchemy import and_
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from .storage import S3Storage
|
||||
from .registry_client import RegistryClient
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Import packaging for PEP 440 version matching
|
||||
try:
|
||||
from packaging.specifiers import SpecifierSet, InvalidSpecifier
|
||||
@@ -848,6 +855,334 @@ def resolve_dependencies(
|
||||
},
|
||||
resolved=resolved_list,
|
||||
missing=missing_dependencies,
|
||||
fetched=[], # No fetching in sync version
|
||||
total_size=total_size,
|
||||
artifact_count=len(resolved_list),
|
||||
)
|
||||
|
||||
|
||||
# System project mapping for auto-fetch
|
||||
SYSTEM_PROJECT_REGISTRY_MAP = {
|
||||
"_pypi": "pypi",
|
||||
"_npm": "npm",
|
||||
"_maven": "maven",
|
||||
}
|
||||
|
||||
|
||||
async def resolve_dependencies_with_fetch(
|
||||
db: Session,
|
||||
project_name: str,
|
||||
package_name: str,
|
||||
ref: str,
|
||||
base_url: str,
|
||||
storage: "S3Storage",
|
||||
registry_clients: Dict[str, "RegistryClient"],
|
||||
max_fetch_depth: int = 3,
|
||||
) -> DependencyResolutionResponse:
|
||||
"""
|
||||
Resolve all dependencies for an artifact recursively, fetching missing ones from upstream.
|
||||
|
||||
This async version extends the basic resolution with auto-fetch capability:
|
||||
when a missing dependency is from a system project (e.g., _pypi), it attempts
|
||||
to fetch the package from the corresponding upstream registry.
|
||||
|
||||
Args:
|
||||
db: Database session
|
||||
project_name: Project name
|
||||
package_name: Package name
|
||||
ref: Version reference (or artifact:hash)
|
||||
base_url: Base URL for download URLs
|
||||
storage: S3 storage for caching fetched artifacts
|
||||
registry_clients: Map of system project to registry client {"_pypi": PyPIRegistryClient}
|
||||
max_fetch_depth: Maximum depth for auto-fetching (prevents runaway fetching)
|
||||
|
||||
Returns:
|
||||
DependencyResolutionResponse with all resolved artifacts and fetch status
|
||||
|
||||
Raises:
|
||||
DependencyNotFoundError: If the root artifact cannot be found
|
||||
CircularDependencyError: If circular dependencies are detected
|
||||
DependencyConflictError: If conflicting versions are required
|
||||
"""
|
||||
# Resolve the initial artifact (same as sync version)
|
||||
project = db.query(Project).filter(Project.name == project_name).first()
|
||||
if not project:
|
||||
raise DependencyNotFoundError(project_name, package_name, ref)
|
||||
|
||||
package = db.query(Package).filter(
|
||||
Package.project_id == project.id,
|
||||
Package.name == package_name,
|
||||
).first()
|
||||
if not package:
|
||||
raise DependencyNotFoundError(project_name, package_name, ref)
|
||||
|
||||
# Handle artifact: prefix for direct artifact ID references
|
||||
if ref.startswith("artifact:"):
|
||||
artifact_id = ref[9:]
|
||||
artifact = db.query(Artifact).filter(Artifact.id == artifact_id).first()
|
||||
if not artifact:
|
||||
raise DependencyNotFoundError(project_name, package_name, ref)
|
||||
root_artifact_id = artifact.id
|
||||
root_version = artifact_id[:12]
|
||||
root_size = artifact.size
|
||||
else:
|
||||
resolved = _resolve_dependency_to_artifact(
|
||||
db, project_name, package_name, ref
|
||||
)
|
||||
if not resolved:
|
||||
raise DependencyNotFoundError(project_name, package_name, ref)
|
||||
root_artifact_id, root_version, root_size = resolved
|
||||
|
||||
# Track state
|
||||
resolved_artifacts: Dict[str, ResolvedArtifact] = {}
|
||||
missing_dependencies: List[MissingDependency] = []
|
||||
fetched_artifacts: List[ResolvedArtifact] = [] # Newly fetched
|
||||
version_requirements: Dict[str, List[Dict[str, Any]]] = {}
|
||||
visiting: Set[str] = set()
|
||||
visited: Set[str] = set()
|
||||
current_path: Dict[str, str] = {}
|
||||
resolution_order: List[str] = []
|
||||
|
||||
# Track fetch attempts to prevent loops
|
||||
fetch_attempted: Set[str] = set() # "project/package@constraint"
|
||||
|
||||
async def _try_fetch_dependency(
|
||||
dep_project: str,
|
||||
dep_package: str,
|
||||
constraint: str,
|
||||
required_by: str,
|
||||
fetch_depth: int,
|
||||
) -> Optional[Tuple[str, str, int]]:
|
||||
"""
|
||||
Try to fetch a missing dependency from upstream registry.
|
||||
|
||||
Returns (artifact_id, version, size) if successful, None otherwise.
|
||||
"""
|
||||
# Only fetch from system projects
|
||||
registry_type = SYSTEM_PROJECT_REGISTRY_MAP.get(dep_project)
|
||||
if not registry_type:
|
||||
logger.debug(
|
||||
f"Not a system project, skipping fetch: {dep_project}/{dep_package}"
|
||||
)
|
||||
return None
|
||||
|
||||
# Check fetch depth
|
||||
if fetch_depth > max_fetch_depth:
|
||||
logger.info(
|
||||
f"Max fetch depth ({max_fetch_depth}) exceeded for {dep_project}/{dep_package}"
|
||||
)
|
||||
return None
|
||||
|
||||
# Build fetch key for loop prevention
|
||||
fetch_key = f"{dep_project}/{dep_package}@{constraint}"
|
||||
if fetch_key in fetch_attempted:
|
||||
logger.debug(f"Already attempted fetch for {fetch_key}")
|
||||
return None
|
||||
fetch_attempted.add(fetch_key)
|
||||
|
||||
# Get registry client
|
||||
client = registry_clients.get(dep_project)
|
||||
if not client:
|
||||
logger.debug(f"No registry client for {dep_project}")
|
||||
return None
|
||||
|
||||
try:
|
||||
# Resolve version constraint
|
||||
version_info = await client.resolve_constraint(dep_package, constraint)
|
||||
if not version_info:
|
||||
logger.info(
|
||||
f"No version of {dep_package} matches constraint '{constraint}' on upstream"
|
||||
)
|
||||
return None
|
||||
|
||||
# Fetch and cache the package
|
||||
fetch_result = await client.fetch_package(
|
||||
dep_package, version_info, db, storage
|
||||
)
|
||||
if not fetch_result:
|
||||
logger.warning(f"Failed to fetch {dep_package}=={version_info.version}")
|
||||
return None
|
||||
|
||||
logger.info(
|
||||
f"Successfully fetched {dep_package}=={version_info.version} "
|
||||
f"(artifact {fetch_result.artifact_id[:12]})"
|
||||
)
|
||||
|
||||
# Add to fetched list for response
|
||||
fetched_artifacts.append(ResolvedArtifact(
|
||||
artifact_id=fetch_result.artifact_id,
|
||||
project=dep_project,
|
||||
package=dep_package,
|
||||
version=fetch_result.version,
|
||||
size=fetch_result.size,
|
||||
download_url=f"{base_url}/api/v1/project/{dep_project}/{dep_package}/+/{fetch_result.version}",
|
||||
))
|
||||
|
||||
return (fetch_result.artifact_id, fetch_result.version, fetch_result.size)
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Error fetching {dep_package}: {e}")
|
||||
return None
|
||||
|
||||
async def _resolve_recursive_async(
|
||||
artifact_id: str,
|
||||
proj_name: str,
|
||||
pkg_name: str,
|
||||
version_or_tag: str,
|
||||
size: int,
|
||||
required_by: Optional[str],
|
||||
depth: int = 0,
|
||||
fetch_depth: int = 0,
|
||||
):
|
||||
"""Recursively resolve dependencies with fetch capability."""
|
||||
if depth > MAX_DEPENDENCY_DEPTH:
|
||||
raise DependencyDepthExceededError(MAX_DEPENDENCY_DEPTH)
|
||||
|
||||
pkg_key = f"{proj_name}/{pkg_name}"
|
||||
|
||||
# Cycle detection
|
||||
if artifact_id in visiting:
|
||||
cycle_start = current_path.get(artifact_id, pkg_key)
|
||||
cycle = [cycle_start, pkg_key]
|
||||
raise CircularDependencyError(cycle)
|
||||
|
||||
# Conflict detection
|
||||
if pkg_key in version_requirements:
|
||||
existing_versions = {r["version"] for r in version_requirements[pkg_key]}
|
||||
if version_or_tag not in existing_versions:
|
||||
requirements = version_requirements[pkg_key] + [
|
||||
{"version": version_or_tag, "required_by": required_by}
|
||||
]
|
||||
raise DependencyConflictError([
|
||||
DependencyConflict(
|
||||
project=proj_name,
|
||||
package=pkg_name,
|
||||
requirements=[
|
||||
{
|
||||
"version": r["version"],
|
||||
"required_by": [{"path": r["required_by"]}] if r["required_by"] else []
|
||||
}
|
||||
for r in requirements
|
||||
],
|
||||
)
|
||||
])
|
||||
if artifact_id in visited:
|
||||
return
|
||||
|
||||
if artifact_id in visited:
|
||||
return
|
||||
|
||||
visiting.add(artifact_id)
|
||||
current_path[artifact_id] = pkg_key
|
||||
|
||||
if pkg_key not in version_requirements:
|
||||
version_requirements[pkg_key] = []
|
||||
version_requirements[pkg_key].append({
|
||||
"version": version_or_tag,
|
||||
"required_by": required_by,
|
||||
})
|
||||
|
||||
# Get dependencies
|
||||
deps = db.query(ArtifactDependency).filter(
|
||||
ArtifactDependency.artifact_id == artifact_id
|
||||
).all()
|
||||
|
||||
for dep in deps:
|
||||
# Skip self-dependencies
|
||||
dep_proj_normalized = dep.dependency_project.lower()
|
||||
dep_pkg_normalized = _normalize_pypi_package_name(dep.dependency_package)
|
||||
curr_proj_normalized = proj_name.lower()
|
||||
curr_pkg_normalized = _normalize_pypi_package_name(pkg_name)
|
||||
if dep_proj_normalized == curr_proj_normalized and dep_pkg_normalized == curr_pkg_normalized:
|
||||
continue
|
||||
|
||||
resolved_dep = _resolve_dependency_to_artifact(
|
||||
db,
|
||||
dep.dependency_project,
|
||||
dep.dependency_package,
|
||||
dep.version_constraint,
|
||||
)
|
||||
|
||||
if not resolved_dep:
|
||||
# Try to fetch from upstream if it's a system project
|
||||
fetched = await _try_fetch_dependency(
|
||||
dep.dependency_project,
|
||||
dep.dependency_package,
|
||||
dep.version_constraint,
|
||||
pkg_key,
|
||||
fetch_depth + 1,
|
||||
)
|
||||
|
||||
if fetched:
|
||||
resolved_dep = fetched
|
||||
else:
|
||||
# Still missing - add to missing list with fetch status
|
||||
fetch_key = f"{dep.dependency_project}/{dep.dependency_package}@{dep.version_constraint}"
|
||||
was_attempted = fetch_key in fetch_attempted
|
||||
missing_dependencies.append(MissingDependency(
|
||||
project=dep.dependency_project,
|
||||
package=dep.dependency_package,
|
||||
constraint=dep.version_constraint,
|
||||
required_by=pkg_key,
|
||||
fetch_attempted=was_attempted,
|
||||
fetch_error="Max fetch depth exceeded" if was_attempted and fetch_depth >= max_fetch_depth else None,
|
||||
))
|
||||
continue
|
||||
|
||||
dep_artifact_id, dep_version, dep_size = resolved_dep
|
||||
|
||||
if dep_artifact_id == artifact_id:
|
||||
continue
|
||||
|
||||
await _resolve_recursive_async(
|
||||
dep_artifact_id,
|
||||
dep.dependency_project,
|
||||
dep.dependency_package,
|
||||
dep_version,
|
||||
dep_size,
|
||||
pkg_key,
|
||||
depth + 1,
|
||||
fetch_depth + 1 if dep_artifact_id in [f.artifact_id for f in fetched_artifacts] else fetch_depth,
|
||||
)
|
||||
|
||||
visiting.remove(artifact_id)
|
||||
del current_path[artifact_id]
|
||||
visited.add(artifact_id)
|
||||
|
||||
resolution_order.append(artifact_id)
|
||||
|
||||
resolved_artifacts[artifact_id] = ResolvedArtifact(
|
||||
artifact_id=artifact_id,
|
||||
project=proj_name,
|
||||
package=pkg_name,
|
||||
version=version_or_tag,
|
||||
size=size,
|
||||
download_url=f"{base_url}/api/v1/project/{proj_name}/{pkg_name}/+/{version_or_tag}",
|
||||
)
|
||||
|
||||
# Start resolution from root
|
||||
await _resolve_recursive_async(
|
||||
root_artifact_id,
|
||||
project_name,
|
||||
package_name,
|
||||
root_version,
|
||||
root_size,
|
||||
None,
|
||||
)
|
||||
|
||||
# Build response in topological order
|
||||
resolved_list = [resolved_artifacts[aid] for aid in resolution_order]
|
||||
total_size = sum(r.size for r in resolved_list)
|
||||
|
||||
return DependencyResolutionResponse(
|
||||
requested={
|
||||
"project": project_name,
|
||||
"package": package_name,
|
||||
"ref": ref,
|
||||
},
|
||||
resolved=resolved_list,
|
||||
missing=missing_dependencies,
|
||||
fetched=fetched_artifacts,
|
||||
total_size=total_size,
|
||||
artifact_count=len(resolved_list),
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user