Files
orchard/backend/app/dependencies.py
Mondo Diaz f1ac43c1cb fix: use lenient conflict handling for dependency resolution
Instead of failing with 409 on version conflicts, use "first version wins"
strategy. This allows resolution to succeed for complex dependency trees
like tensorflow where transitive dependencies may have overlapping but
not identical version requirements.

The resolver now:
- Checks if an already-resolved version satisfies a new constraint
- If yes, reuses the existing version
- If no, logs the mismatch and uses the first-encountered version

This matches pip's behavior of picking a working version rather than
failing on theoretical conflicts.
2026-02-04 13:45:15 -06:00

1291 lines
44 KiB
Python

"""
Dependency management module for artifact dependencies.
Handles:
- Parsing orchard.ensure files
- Storing dependencies in the database
- Querying dependencies and reverse dependencies
- Dependency resolution with topological sorting
- Circular dependency detection
- Conflict detection
"""
import re
import logging
import yaml
from typing import List, Dict, Any, Optional, Set, Tuple, TYPE_CHECKING
from sqlalchemy.orm import Session
from sqlalchemy import and_
if TYPE_CHECKING:
from .storage import S3Storage
from .registry_client import RegistryClient
logger = logging.getLogger(__name__)
# Import packaging for PEP 440 version matching
try:
from packaging.specifiers import SpecifierSet, InvalidSpecifier
from packaging.version import Version, InvalidVersion
HAS_PACKAGING = True
except ImportError:
HAS_PACKAGING = False
from .models import (
Project,
Package,
Artifact,
ArtifactDependency,
PackageVersion,
)
from .schemas import (
EnsureFileContent,
EnsureFileDependency,
DependencyResponse,
ArtifactDependenciesResponse,
DependentInfo,
ReverseDependenciesResponse,
ResolvedArtifact,
DependencyResolutionResponse,
DependencyConflict,
MissingDependency,
PaginationMeta,
)
def _normalize_pypi_package_name(name: str) -> str:
"""
Normalize a PyPI package name for comparison.
- Strips extras brackets (e.g., "package[extra]" -> "package")
- Replaces sequences of hyphens, underscores, and dots with a single hyphen
- Lowercases the result
This follows PEP 503 normalization rules.
"""
# Strip extras brackets like [test], [dev], etc.
base_name = re.sub(r'\[.*\]', '', name)
# Normalize separators and lowercase
return re.sub(r'[-_.]+', '-', base_name).lower()
class DependencyError(Exception):
"""Base exception for dependency errors."""
pass
class CircularDependencyError(DependencyError):
"""Raised when a circular dependency is detected."""
def __init__(self, cycle: List[str]):
self.cycle = cycle
super().__init__(f"Circular dependency detected: {' -> '.join(cycle)}")
class DependencyConflictError(DependencyError):
"""Raised when conflicting dependency versions are detected."""
def __init__(self, conflicts: List[DependencyConflict]):
self.conflicts = conflicts
super().__init__(f"Dependency conflicts detected: {len(conflicts)} conflict(s)")
class DependencyNotFoundError(DependencyError):
"""Raised when a dependency cannot be resolved."""
def __init__(self, project: str, package: str, constraint: str):
self.project = project
self.package = package
self.constraint = constraint
super().__init__(f"Dependency not found: {project}/{package}@{constraint}")
class InvalidEnsureFileError(DependencyError):
"""Raised when the ensure file is invalid."""
pass
class DependencyDepthExceededError(DependencyError):
"""Raised when dependency resolution exceeds max depth."""
def __init__(self, max_depth: int):
self.max_depth = max_depth
super().__init__(f"Dependency resolution exceeded maximum depth of {max_depth}")
# Safety limits to prevent DoS attacks
MAX_DEPENDENCY_DEPTH = 50 # Maximum levels of nested dependencies
MAX_DEPENDENCIES_PER_ARTIFACT = 200 # Maximum direct dependencies per artifact
def parse_ensure_file(content: bytes) -> EnsureFileContent:
"""
Parse an orchard.ensure file.
Args:
content: Raw bytes of the ensure file
Returns:
Parsed EnsureFileContent
Raises:
InvalidEnsureFileError: If the file is invalid YAML or has wrong structure
"""
try:
data = yaml.safe_load(content.decode('utf-8'))
except yaml.YAMLError as e:
raise InvalidEnsureFileError(f"Invalid YAML: {e}")
except UnicodeDecodeError as e:
raise InvalidEnsureFileError(f"Invalid encoding: {e}")
if data is None:
return EnsureFileContent(dependencies=[])
if not isinstance(data, dict):
raise InvalidEnsureFileError("Ensure file must be a YAML dictionary")
dependencies = []
deps_data = data.get('dependencies', [])
if not isinstance(deps_data, list):
raise InvalidEnsureFileError("'dependencies' must be a list")
# Safety limit: prevent DoS through excessive dependencies
if len(deps_data) > MAX_DEPENDENCIES_PER_ARTIFACT:
raise InvalidEnsureFileError(
f"Too many dependencies: {len(deps_data)} exceeds maximum of {MAX_DEPENDENCIES_PER_ARTIFACT}"
)
for i, dep in enumerate(deps_data):
if not isinstance(dep, dict):
raise InvalidEnsureFileError(f"Dependency {i} must be a dictionary")
project = dep.get('project')
package = dep.get('package')
version = dep.get('version')
if not project:
raise InvalidEnsureFileError(f"Dependency {i} missing 'project'")
if not package:
raise InvalidEnsureFileError(f"Dependency {i} missing 'package'")
if not version:
raise InvalidEnsureFileError(
f"Dependency {i} must have 'version'"
)
dependencies.append(EnsureFileDependency(
project=project,
package=package,
version=version,
))
return EnsureFileContent(dependencies=dependencies)
def validate_dependencies(
db: Session,
dependencies: List[EnsureFileDependency],
) -> List[str]:
"""
Validate that all dependency projects exist.
Args:
db: Database session
dependencies: List of dependencies to validate
Returns:
List of error messages (empty if all valid)
"""
errors = []
for dep in dependencies:
project = db.query(Project).filter(Project.name == dep.project).first()
if not project:
errors.append(f"Project '{dep.project}' not found")
return errors
def store_dependencies(
db: Session,
artifact_id: str,
dependencies: List[EnsureFileDependency],
) -> List[ArtifactDependency]:
"""
Store dependencies for an artifact.
Args:
db: Database session
artifact_id: The artifact ID that has these dependencies
dependencies: List of dependencies to store
Returns:
List of created ArtifactDependency objects
"""
created = []
for dep in dependencies:
artifact_dep = ArtifactDependency(
artifact_id=artifact_id,
dependency_project=dep.project,
dependency_package=dep.package,
version_constraint=dep.version,
)
db.add(artifact_dep)
created.append(artifact_dep)
return created
def get_artifact_dependencies(
db: Session,
artifact_id: str,
) -> List[DependencyResponse]:
"""
Get all dependencies for an artifact.
Args:
db: Database session
artifact_id: The artifact ID
Returns:
List of DependencyResponse objects
"""
deps = db.query(ArtifactDependency).filter(
ArtifactDependency.artifact_id == artifact_id
).all()
return [DependencyResponse.from_orm_model(dep) for dep in deps]
def get_reverse_dependencies(
db: Session,
project_name: str,
package_name: str,
page: int = 1,
limit: int = 50,
) -> ReverseDependenciesResponse:
"""
Get all artifacts that depend on a given package.
Args:
db: Database session
project_name: Target project name
package_name: Target package name
page: Page number (1-indexed)
limit: Results per page
Returns:
ReverseDependenciesResponse with dependents and pagination
"""
# Query dependencies that point to this project/package
query = db.query(ArtifactDependency).filter(
ArtifactDependency.dependency_project == project_name,
ArtifactDependency.dependency_package == package_name,
)
total = query.count()
offset = (page - 1) * limit
deps = query.offset(offset).limit(limit).all()
dependents = []
for dep in deps:
# Get artifact info to find the project/package/version
artifact = db.query(Artifact).filter(Artifact.id == dep.artifact_id).first()
if not artifact:
continue
# Find which package this artifact belongs to via versions
version_record = db.query(PackageVersion).filter(
PackageVersion.artifact_id == dep.artifact_id,
).first()
if version_record:
pkg = db.query(Package).filter(Package.id == version_record.package_id).first()
if pkg:
proj = db.query(Project).filter(Project.id == pkg.project_id).first()
if proj:
dependents.append(DependentInfo(
artifact_id=dep.artifact_id,
project=proj.name,
package=pkg.name,
version=version_record.version,
constraint_value=dep.version_constraint,
))
total_pages = (total + limit - 1) // limit
return ReverseDependenciesResponse(
project=project_name,
package=package_name,
dependents=dependents,
pagination=PaginationMeta(
page=page,
limit=limit,
total=total,
total_pages=total_pages,
has_more=page < total_pages,
),
)
def _is_version_constraint(version_str: str) -> bool:
"""Check if a version string contains constraint operators."""
if not version_str:
return False
# Check for common constraint operators
return any(op in version_str for op in ['>=', '<=', '!=', '~=', '>', '<', '==', '*'])
def _version_satisfies_constraint(version: str, constraint: str) -> bool:
"""
Check if a version satisfies a constraint.
Args:
version: A version string (e.g., '1.26.0')
constraint: A version constraint (e.g., '>=1.20', '>=1.20,<2.0', '*')
Returns:
True if the version satisfies the constraint, False otherwise
"""
if not HAS_PACKAGING:
return False
# Wildcard matches everything
if constraint == '*' or not constraint:
return True
try:
spec = SpecifierSet(constraint)
v = Version(version)
return v in spec
except (InvalidSpecifier, InvalidVersion):
# If we can't parse, assume it doesn't match
return False
def _resolve_version_constraint(
db: Session,
package: Package,
constraint: str,
) -> Optional[Tuple[str, str, int]]:
"""
Resolve a version constraint (e.g., '>=1.9') to a specific version.
Uses PEP 440 version matching to find the best matching version.
Args:
db: Database session
package: Package to search versions in
constraint: Version constraint string (e.g., '>=1.9', '<2.0,>=1.5')
Returns:
Tuple of (artifact_id, resolved_version, size) or None if not found
"""
if not HAS_PACKAGING:
# Fallback: if packaging not available, can't do constraint matching
return None
# Handle wildcard - return latest version
if constraint == '*':
# Get the latest version by created_at
latest = db.query(PackageVersion).filter(
PackageVersion.package_id == package.id,
).order_by(PackageVersion.created_at.desc()).first()
if latest:
artifact = db.query(Artifact).filter(Artifact.id == latest.artifact_id).first()
if artifact:
return (artifact.id, latest.version, artifact.size)
return None
try:
specifier = SpecifierSet(constraint)
except InvalidSpecifier:
# Invalid constraint (e.g., ">=" without version) - treat as wildcard
# This can happen with malformed metadata from PyPI packages
latest = db.query(PackageVersion).filter(
PackageVersion.package_id == package.id,
).order_by(PackageVersion.created_at.desc()).first()
if latest:
artifact = db.query(Artifact).filter(Artifact.id == latest.artifact_id).first()
if artifact:
return (artifact.id, latest.version, artifact.size)
return None
# Get all versions for this package
all_versions = db.query(PackageVersion).filter(
PackageVersion.package_id == package.id,
).all()
if not all_versions:
return None
# Find matching versions
matching = []
for pv in all_versions:
try:
v = Version(pv.version)
if v in specifier:
matching.append((pv, v))
except InvalidVersion:
# Skip invalid versions
continue
if not matching:
return None
# Sort by version (descending) and return the latest matching
matching.sort(key=lambda x: x[1], reverse=True)
best_match = matching[0][0]
artifact = db.query(Artifact).filter(Artifact.id == best_match.artifact_id).first()
if artifact:
return (artifact.id, best_match.version, artifact.size)
return None
def _resolve_dependency_to_artifact(
db: Session,
project_name: str,
package_name: str,
version: str,
) -> Optional[Tuple[str, str, int]]:
"""
Resolve a dependency constraint to an artifact ID.
Supports:
- Exact version matching (e.g., '1.2.3')
- Version constraints (e.g., '>=1.9', '<2.0,>=1.5')
- Wildcard ('*' for any version)
Args:
db: Database session
project_name: Project name
package_name: Package name
version: Version or version constraint
Returns:
Tuple of (artifact_id, resolved_version, size) or None if not found
"""
# Get project and package
project = db.query(Project).filter(Project.name == project_name).first()
if not project:
return None
package = db.query(Package).filter(
Package.project_id == project.id,
Package.name == package_name,
).first()
if not package:
return None
# Check if this is a version constraint (>=, <, etc.) or exact version
if _is_version_constraint(version):
result = _resolve_version_constraint(db, package, version)
if result:
return result
else:
# Look up by exact version
pkg_version = db.query(PackageVersion).filter(
PackageVersion.package_id == package.id,
PackageVersion.version == version,
).first()
if pkg_version:
artifact = db.query(Artifact).filter(
Artifact.id == pkg_version.artifact_id
).first()
if artifact:
return (artifact.id, version, artifact.size)
return None
def _detect_package_cycle(
db: Session,
project_name: str,
package_name: str,
target_project: str,
target_package: str,
visiting: Set[str],
visited: Set[str],
path: List[str],
) -> Optional[List[str]]:
"""
Detect cycles at the package level using DFS.
Args:
db: Database session
project_name: Current project being visited
package_name: Current package being visited
target_project: The project we're checking for cycles back to
target_package: The package we're checking for cycles back to
visiting: Set of package keys currently in the recursion stack
visited: Set of fully processed package keys
path: Current path for cycle reporting
Returns:
Cycle path if detected, None otherwise
"""
# Normalize names for comparison (handles extras like [test] and separators)
pkg_normalized = _normalize_pypi_package_name(package_name)
target_pkg_normalized = _normalize_pypi_package_name(target_package)
# Use normalized key for tracking
pkg_key = f"{project_name.lower()}/{pkg_normalized}"
# Check if we've reached the target package (cycle detected)
# Use normalized comparison to handle extras and naming variations
if project_name.lower() == target_project.lower() and pkg_normalized == target_pkg_normalized:
return path + [pkg_key]
if pkg_key in visiting:
# Unexpected internal cycle
return None
if pkg_key in visited:
return None
visiting.add(pkg_key)
path.append(pkg_key)
# Get the package and find any artifacts with dependencies
project = db.query(Project).filter(Project.name == project_name).first()
if project:
package = db.query(Package).filter(
Package.project_id == project.id,
Package.name == package_name,
).first()
if package:
# Find all artifacts in this package via versions
versions = db.query(PackageVersion).filter(PackageVersion.package_id == package.id).all()
artifact_ids = {v.artifact_id for v in versions}
# Get dependencies from all artifacts in this package
for artifact_id in artifact_ids:
deps = db.query(ArtifactDependency).filter(
ArtifactDependency.artifact_id == artifact_id
).all()
for dep in deps:
cycle = _detect_package_cycle(
db,
dep.dependency_project,
dep.dependency_package,
target_project,
target_package,
visiting,
visited,
path,
)
if cycle:
return cycle
path.pop()
visiting.remove(pkg_key)
visited.add(pkg_key)
return None
def check_circular_dependencies(
db: Session,
artifact_id: str,
new_dependencies: List[EnsureFileDependency],
project_name: Optional[str] = None,
package_name: Optional[str] = None,
) -> Optional[List[str]]:
"""
Check if adding the new dependencies would create a circular dependency.
Args:
db: Database session
artifact_id: The artifact that will have these dependencies
new_dependencies: Dependencies to be added
project_name: Project name (optional, will try to look up from version if not provided)
package_name: Package name (optional, will try to look up from version if not provided)
Returns:
Cycle path if detected, None otherwise
"""
# First, get the package info for this artifact to build path labels
if project_name and package_name:
current_path = f"{project_name}/{package_name}"
else:
# Try to look up from version
artifact = db.query(Artifact).filter(Artifact.id == artifact_id).first()
if not artifact:
return None
# Find package for this artifact via version
version_record = db.query(PackageVersion).filter(
PackageVersion.artifact_id == artifact_id
).first()
if not version_record:
return None
package = db.query(Package).filter(Package.id == version_record.package_id).first()
if not package:
return None
project = db.query(Project).filter(Project.id == package.project_id).first()
if not project:
return None
current_path = f"{project.name}/{package.name}"
# Extract target project and package from current_path
if "/" in current_path:
target_project, target_package = current_path.split("/", 1)
else:
return None
# Normalize the initial path for consistency with _detect_package_cycle
normalized_path = f"{target_project.lower()}/{_normalize_pypi_package_name(target_package)}"
# For each new dependency, check if it would create a cycle back to our package
for dep in new_dependencies:
# Check if this dependency (transitively) depends on us at the package level
visiting: Set[str] = set()
visited: Set[str] = set()
path: List[str] = [normalized_path]
# Check from the dependency's package
cycle = _detect_package_cycle(
db,
dep.project,
dep.package,
target_project,
target_package,
visiting,
visited,
path,
)
if cycle:
return cycle
return None
def resolve_dependencies(
db: Session,
project_name: str,
package_name: str,
ref: str,
base_url: str,
) -> DependencyResolutionResponse:
"""
Resolve all dependencies for an artifact recursively.
Args:
db: Database session
project_name: Project name
package_name: Package name
ref: Version reference (or artifact:hash)
base_url: Base URL for download URLs
Returns:
DependencyResolutionResponse with all resolved artifacts
Raises:
DependencyNotFoundError: If a dependency cannot be resolved
CircularDependencyError: If circular dependencies are detected
DependencyConflictError: If conflicting versions are required
"""
# Resolve the initial artifact
project = db.query(Project).filter(Project.name == project_name).first()
if not project:
raise DependencyNotFoundError(project_name, package_name, ref)
package = db.query(Package).filter(
Package.project_id == project.id,
Package.name == package_name,
).first()
if not package:
raise DependencyNotFoundError(project_name, package_name, ref)
# Handle artifact: prefix for direct artifact ID references
if ref.startswith("artifact:"):
artifact_id = ref[9:]
artifact = db.query(Artifact).filter(Artifact.id == artifact_id).first()
if not artifact:
raise DependencyNotFoundError(project_name, package_name, ref)
root_artifact_id = artifact.id
root_version = artifact_id[:12] # Use short hash as version display
root_size = artifact.size
else:
# Try to find artifact by version
resolved = _resolve_dependency_to_artifact(
db, project_name, package_name, ref
)
if not resolved:
raise DependencyNotFoundError(project_name, package_name, ref)
root_artifact_id, root_version, root_size = resolved
# Track resolved artifacts and their versions
resolved_artifacts: Dict[str, ResolvedArtifact] = {}
# Track missing dependencies (not cached on server)
missing_dependencies: List[MissingDependency] = []
# Track version requirements for conflict detection
version_requirements: Dict[str, List[Dict[str, Any]]] = {} # pkg_key -> [(version, required_by)]
# Track visiting/visited for cycle detection
visiting: Set[str] = set()
visited: Set[str] = set()
# Track the current path for cycle reporting (artifact_id -> pkg_key)
current_path: Dict[str, str] = {}
# Resolution order (topological)
resolution_order: List[str] = []
def _resolve_recursive(
artifact_id: str,
proj_name: str,
pkg_name: str,
version_or_tag: str,
size: int,
required_by: Optional[str],
depth: int = 0,
):
"""Recursively resolve dependencies with cycle/conflict detection."""
# Safety limit: prevent DoS through deeply nested dependencies
if depth > MAX_DEPENDENCY_DEPTH:
raise DependencyDepthExceededError(MAX_DEPENDENCY_DEPTH)
pkg_key = f"{proj_name}/{pkg_name}"
# Cycle detection (at artifact level)
if artifact_id in visiting:
# Build cycle path from current_path
cycle_start = current_path.get(artifact_id, pkg_key)
cycle = [cycle_start, pkg_key]
raise CircularDependencyError(cycle)
# Version conflict handling - use first resolved version (lenient mode)
if pkg_key in version_requirements:
existing_versions = {r["version"] for r in version_requirements[pkg_key]}
if version_or_tag not in existing_versions:
# Different version requested - log and use existing (first wins)
existing = version_requirements[pkg_key][0]["version"]
logger.debug(
f"Version mismatch for {pkg_key}: using {existing} "
f"(also requested: {version_or_tag} by {required_by})"
)
# Already resolved this package - skip
return
if artifact_id in visited:
return
visiting.add(artifact_id)
current_path[artifact_id] = pkg_key
# Track version requirement
if pkg_key not in version_requirements:
version_requirements[pkg_key] = []
version_requirements[pkg_key].append({
"version": version_or_tag,
"required_by": required_by,
})
# Get dependencies
deps = db.query(ArtifactDependency).filter(
ArtifactDependency.artifact_id == artifact_id
).all()
# Resolve each dependency first (depth-first)
for dep in deps:
# Skip self-dependencies (can happen with PyPI extras like pytest[testing])
# Use normalized comparison for PyPI naming conventions (handles extras, separators)
dep_proj_normalized = dep.dependency_project.lower()
dep_pkg_normalized = _normalize_pypi_package_name(dep.dependency_package)
curr_proj_normalized = proj_name.lower()
curr_pkg_normalized = _normalize_pypi_package_name(pkg_name)
if dep_proj_normalized == curr_proj_normalized and dep_pkg_normalized == curr_pkg_normalized:
continue
resolved_dep = _resolve_dependency_to_artifact(
db,
dep.dependency_project,
dep.dependency_package,
dep.version_constraint,
)
if not resolved_dep:
# Dependency not cached on server - track as missing but continue
constraint = dep.version_constraint
missing_dependencies.append(MissingDependency(
project=dep.dependency_project,
package=dep.dependency_package,
constraint=constraint,
required_by=pkg_key,
))
continue
dep_artifact_id, dep_version, dep_size = resolved_dep
# Skip if resolved to same artifact (self-dependency at artifact level)
if dep_artifact_id == artifact_id:
continue
# Skip if this artifact is already being visited (would cause cycle)
if dep_artifact_id in visiting:
continue
_resolve_recursive(
dep_artifact_id,
dep.dependency_project,
dep.dependency_package,
dep_version,
dep_size,
pkg_key,
depth + 1,
)
visiting.remove(artifact_id)
del current_path[artifact_id]
visited.add(artifact_id)
# Add to resolution order (dependencies before dependents)
resolution_order.append(artifact_id)
# Store resolved artifact info
resolved_artifacts[artifact_id] = ResolvedArtifact(
artifact_id=artifact_id,
project=proj_name,
package=pkg_name,
version=version_or_tag,
size=size,
download_url=f"{base_url}/api/v1/project/{proj_name}/{pkg_name}/+/{version_or_tag}",
)
# Start resolution from root
_resolve_recursive(
root_artifact_id,
project_name,
package_name,
root_version,
root_size,
None,
)
# Build response in topological order
resolved_list = [resolved_artifacts[aid] for aid in resolution_order]
total_size = sum(r.size for r in resolved_list)
return DependencyResolutionResponse(
requested={
"project": project_name,
"package": package_name,
"ref": ref,
},
resolved=resolved_list,
missing=missing_dependencies,
fetched=[], # No fetching in sync version
total_size=total_size,
artifact_count=len(resolved_list),
)
# System project mapping for auto-fetch
SYSTEM_PROJECT_REGISTRY_MAP = {
"_pypi": "pypi",
"_npm": "npm",
"_maven": "maven",
}
async def resolve_dependencies_with_fetch(
db: Session,
project_name: str,
package_name: str,
ref: str,
base_url: str,
storage: "S3Storage",
registry_clients: Dict[str, "RegistryClient"],
max_fetch_depth: int = 10,
) -> DependencyResolutionResponse:
"""
Resolve all dependencies for an artifact recursively, fetching missing ones from upstream.
This async version extends the basic resolution with auto-fetch capability:
when a missing dependency is from a system project (e.g., _pypi), it attempts
to fetch the package from the corresponding upstream registry.
If the root artifact itself doesn't exist in a system project, it will also
be fetched from upstream before resolution begins.
Args:
db: Database session
project_name: Project name
package_name: Package name
ref: Version reference (or artifact:hash)
base_url: Base URL for download URLs
storage: S3 storage for caching fetched artifacts
registry_clients: Map of system project to registry client {"_pypi": PyPIRegistryClient}
max_fetch_depth: Maximum depth for auto-fetching (prevents runaway fetching)
Returns:
DependencyResolutionResponse with all resolved artifacts and fetch status
Raises:
DependencyNotFoundError: If the root artifact cannot be found (even after fetch attempt)
CircularDependencyError: If circular dependencies are detected
DependencyConflictError: If conflicting versions are required
"""
# Track fetched artifacts for response
fetched_artifacts: List[ResolvedArtifact] = []
# Check if project exists
project = db.query(Project).filter(Project.name == project_name).first()
# If project doesn't exist and it's a system project pattern, we can't auto-create it
if not project:
raise DependencyNotFoundError(project_name, package_name, ref)
# Check if package exists
package = db.query(Package).filter(
Package.project_id == project.id,
Package.name == package_name,
).first()
# Try to resolve the root artifact
root_artifact_id = None
root_version = None
root_size = None
# Handle artifact: prefix for direct artifact ID references
if ref.startswith("artifact:"):
artifact_id = ref[9:]
artifact = db.query(Artifact).filter(Artifact.id == artifact_id).first()
if artifact:
root_artifact_id = artifact.id
root_version = artifact_id[:12]
root_size = artifact.size
elif package:
# Try to resolve by version/constraint
resolved = _resolve_dependency_to_artifact(
db, project_name, package_name, ref
)
if resolved:
root_artifact_id, root_version, root_size = resolved
# If root artifact not found and this is a system project, try to fetch it
if root_artifact_id is None and project_name in SYSTEM_PROJECT_REGISTRY_MAP:
logger.info(
f"Root artifact {project_name}/{package_name}@{ref} not found, "
"attempting to fetch from upstream"
)
client = registry_clients.get(project_name)
if client:
try:
# Resolve the version constraint from upstream
version_info = await client.resolve_constraint(package_name, ref)
if version_info:
# Fetch and cache the package
fetch_result = await client.fetch_package(
package_name, version_info, db, storage
)
if fetch_result:
logger.info(
f"Successfully fetched root artifact {package_name}=="
f"{fetch_result.version} (artifact {fetch_result.artifact_id[:12]})"
)
root_artifact_id = fetch_result.artifact_id
root_version = fetch_result.version
root_size = fetch_result.size
# Add to fetched list
fetched_artifacts.append(ResolvedArtifact(
artifact_id=fetch_result.artifact_id,
project=project_name,
package=package_name,
version=fetch_result.version,
size=fetch_result.size,
download_url=f"{base_url}/api/v1/project/{project_name}/{package_name}/+/{fetch_result.version}",
))
except Exception as e:
logger.warning(f"Failed to fetch root artifact {package_name}: {e}")
# If still no root artifact, raise error
if root_artifact_id is None:
raise DependencyNotFoundError(project_name, package_name, ref)
# Track state
resolved_artifacts: Dict[str, ResolvedArtifact] = {}
missing_dependencies: List[MissingDependency] = []
# Note: fetched_artifacts was already initialized above (line 911)
# and may already contain the root artifact if it was fetched from upstream
version_requirements: Dict[str, List[Dict[str, Any]]] = {}
visiting: Set[str] = set()
visited: Set[str] = set()
current_path: Dict[str, str] = {}
resolution_order: List[str] = []
# Track fetch attempts to prevent loops
fetch_attempted: Set[str] = set() # "project/package@constraint"
async def _try_fetch_dependency(
dep_project: str,
dep_package: str,
constraint: str,
required_by: str,
fetch_depth: int,
) -> Optional[Tuple[str, str, int]]:
"""
Try to fetch a missing dependency from upstream registry.
Returns (artifact_id, version, size) if successful, None otherwise.
"""
# Only fetch from system projects
registry_type = SYSTEM_PROJECT_REGISTRY_MAP.get(dep_project)
if not registry_type:
logger.debug(
f"Not a system project, skipping fetch: {dep_project}/{dep_package}"
)
return None
# Check fetch depth
if fetch_depth > max_fetch_depth:
logger.info(
f"Max fetch depth ({max_fetch_depth}) exceeded for {dep_project}/{dep_package}"
)
return None
# Build fetch key for loop prevention
fetch_key = f"{dep_project}/{dep_package}@{constraint}"
if fetch_key in fetch_attempted:
logger.debug(f"Already attempted fetch for {fetch_key}")
return None
fetch_attempted.add(fetch_key)
# Get registry client
client = registry_clients.get(dep_project)
if not client:
logger.debug(f"No registry client for {dep_project}")
return None
try:
# Resolve version constraint
version_info = await client.resolve_constraint(dep_package, constraint)
if not version_info:
logger.info(
f"No version of {dep_package} matches constraint '{constraint}' on upstream"
)
return None
# Fetch and cache the package
fetch_result = await client.fetch_package(
dep_package, version_info, db, storage
)
if not fetch_result:
logger.warning(f"Failed to fetch {dep_package}=={version_info.version}")
return None
logger.info(
f"Successfully fetched {dep_package}=={version_info.version} "
f"(artifact {fetch_result.artifact_id[:12]})"
)
# Add to fetched list for response
fetched_artifacts.append(ResolvedArtifact(
artifact_id=fetch_result.artifact_id,
project=dep_project,
package=dep_package,
version=fetch_result.version,
size=fetch_result.size,
download_url=f"{base_url}/api/v1/project/{dep_project}/{dep_package}/+/{fetch_result.version}",
))
return (fetch_result.artifact_id, fetch_result.version, fetch_result.size)
except Exception as e:
logger.warning(f"Error fetching {dep_package}: {e}")
return None
async def _resolve_recursive_async(
artifact_id: str,
proj_name: str,
pkg_name: str,
version_or_tag: str,
size: int,
required_by: Optional[str],
depth: int = 0,
fetch_depth: int = 0,
):
"""Recursively resolve dependencies with fetch capability."""
if depth > MAX_DEPENDENCY_DEPTH:
raise DependencyDepthExceededError(MAX_DEPENDENCY_DEPTH)
pkg_key = f"{proj_name}/{pkg_name}"
# Cycle detection
if artifact_id in visiting:
cycle_start = current_path.get(artifact_id, pkg_key)
cycle = [cycle_start, pkg_key]
raise CircularDependencyError(cycle)
# Version conflict handling - use first resolved version (lenient mode)
if pkg_key in version_requirements:
existing_versions = {r["version"] for r in version_requirements[pkg_key]}
if version_or_tag not in existing_versions:
# Different version requested - log and use existing (first wins)
existing = version_requirements[pkg_key][0]["version"]
logger.debug(
f"Version mismatch for {pkg_key}: using {existing} "
f"(also requested: {version_or_tag} by {required_by})"
)
# Already resolved this package - skip
return
if artifact_id in visited:
return
visiting.add(artifact_id)
current_path[artifact_id] = pkg_key
if pkg_key not in version_requirements:
version_requirements[pkg_key] = []
version_requirements[pkg_key].append({
"version": version_or_tag,
"required_by": required_by,
})
# Get dependencies
deps = db.query(ArtifactDependency).filter(
ArtifactDependency.artifact_id == artifact_id
).all()
for dep in deps:
# Skip self-dependencies (common with PyPI extras like pytest[testing] -> pytest)
dep_proj_normalized = dep.dependency_project.lower()
dep_pkg_normalized = _normalize_pypi_package_name(dep.dependency_package)
curr_proj_normalized = proj_name.lower()
curr_pkg_normalized = _normalize_pypi_package_name(pkg_name)
if dep_proj_normalized == curr_proj_normalized and dep_pkg_normalized == curr_pkg_normalized:
logger.debug(
f"Skipping self-dependency: {pkg_key} -> {dep.dependency_project}/{dep.dependency_package}"
)
continue
# Also check if this dependency would resolve to the current artifact
# (handles cases where package names differ but resolve to same artifact)
resolved_dep = _resolve_dependency_to_artifact(
db,
dep.dependency_project,
dep.dependency_package,
dep.version_constraint,
)
if not resolved_dep:
# Try to fetch from upstream if it's a system project
fetched = await _try_fetch_dependency(
dep.dependency_project,
dep.dependency_package,
dep.version_constraint,
pkg_key,
fetch_depth + 1,
)
if fetched:
resolved_dep = fetched
else:
# Still missing - add to missing list with fetch status
fetch_key = f"{dep.dependency_project}/{dep.dependency_package}@{dep.version_constraint}"
was_attempted = fetch_key in fetch_attempted
missing_dependencies.append(MissingDependency(
project=dep.dependency_project,
package=dep.dependency_package,
constraint=dep.version_constraint,
required_by=pkg_key,
fetch_attempted=was_attempted,
fetch_error="Max fetch depth exceeded" if was_attempted and fetch_depth >= max_fetch_depth else None,
))
continue
dep_artifact_id, dep_version, dep_size = resolved_dep
# Skip if resolved to same artifact (self-dependency at artifact level)
if dep_artifact_id == artifact_id:
logger.debug(
f"Skipping self-dependency (same artifact): {pkg_key} -> "
f"{dep.dependency_project}/{dep.dependency_package} (artifact {dep_artifact_id[:12]})"
)
continue
# Skip if this artifact is already being visited (would cause cycle)
if dep_artifact_id in visiting:
logger.debug(
f"Skipping dependency already in resolution stack: {pkg_key} -> "
f"{dep.dependency_project}/{dep.dependency_package} (artifact {dep_artifact_id[:12]})"
)
continue
# Check if we've already resolved this package to a different version
dep_pkg_key = f"{dep.dependency_project}/{dep.dependency_package}"
if dep_pkg_key in version_requirements:
existing_version = version_requirements[dep_pkg_key][0]["version"]
if existing_version != dep_version:
# Different version resolved - check if existing satisfies new constraint
if HAS_PACKAGING and _version_satisfies_constraint(existing_version, dep.version_constraint):
logger.debug(
f"Reusing existing version {existing_version} for {dep_pkg_key} "
f"(satisfies constraint {dep.version_constraint})"
)
continue
else:
logger.debug(
f"Version conflict for {dep_pkg_key}: have {existing_version}, "
f"need {dep.version_constraint} (resolved to {dep_version})"
)
# Don't raise error - just use the first version we resolved
# This is more lenient than strict conflict detection
continue
await _resolve_recursive_async(
dep_artifact_id,
dep.dependency_project,
dep.dependency_package,
dep_version,
dep_size,
pkg_key,
depth + 1,
fetch_depth + 1 if dep_artifact_id in [f.artifact_id for f in fetched_artifacts] else fetch_depth,
)
visiting.remove(artifact_id)
del current_path[artifact_id]
visited.add(artifact_id)
resolution_order.append(artifact_id)
resolved_artifacts[artifact_id] = ResolvedArtifact(
artifact_id=artifact_id,
project=proj_name,
package=pkg_name,
version=version_or_tag,
size=size,
download_url=f"{base_url}/api/v1/project/{proj_name}/{pkg_name}/+/{version_or_tag}",
)
# Start resolution from root
await _resolve_recursive_async(
root_artifact_id,
project_name,
package_name,
root_version,
root_size,
None,
)
# Build response in topological order
resolved_list = [resolved_artifacts[aid] for aid in resolution_order]
total_size = sum(r.size for r in resolved_list)
return DependencyResolutionResponse(
requested={
"project": project_name,
"package": package_name,
"ref": ref,
},
resolved=resolved_list,
missing=missing_dependencies,
fetched=fetched_artifacts,
total_size=total_size,
artifact_count=len(resolved_list),
)