Instead of failing with 409 on version conflicts, use "first version wins" strategy. This allows resolution to succeed for complex dependency trees like tensorflow where transitive dependencies may have overlapping but not identical version requirements. The resolver now: - Checks if an already-resolved version satisfies a new constraint - If yes, reuses the existing version - If no, logs the mismatch and uses the first-encountered version This matches pip's behavior of picking a working version rather than failing on theoretical conflicts.
1291 lines
44 KiB
Python
1291 lines
44 KiB
Python
"""
|
|
Dependency management module for artifact dependencies.
|
|
|
|
Handles:
|
|
- Parsing orchard.ensure files
|
|
- Storing dependencies in the database
|
|
- Querying dependencies and reverse dependencies
|
|
- Dependency resolution with topological sorting
|
|
- Circular dependency detection
|
|
- Conflict detection
|
|
"""
|
|
|
|
import re
|
|
import logging
|
|
import yaml
|
|
from typing import List, Dict, Any, Optional, Set, Tuple, TYPE_CHECKING
|
|
from sqlalchemy.orm import Session
|
|
from sqlalchemy import and_
|
|
|
|
if TYPE_CHECKING:
|
|
from .storage import S3Storage
|
|
from .registry_client import RegistryClient
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# Import packaging for PEP 440 version matching
|
|
try:
|
|
from packaging.specifiers import SpecifierSet, InvalidSpecifier
|
|
from packaging.version import Version, InvalidVersion
|
|
HAS_PACKAGING = True
|
|
except ImportError:
|
|
HAS_PACKAGING = False
|
|
|
|
from .models import (
|
|
Project,
|
|
Package,
|
|
Artifact,
|
|
ArtifactDependency,
|
|
PackageVersion,
|
|
)
|
|
from .schemas import (
|
|
EnsureFileContent,
|
|
EnsureFileDependency,
|
|
DependencyResponse,
|
|
ArtifactDependenciesResponse,
|
|
DependentInfo,
|
|
ReverseDependenciesResponse,
|
|
ResolvedArtifact,
|
|
DependencyResolutionResponse,
|
|
DependencyConflict,
|
|
MissingDependency,
|
|
PaginationMeta,
|
|
)
|
|
|
|
|
|
def _normalize_pypi_package_name(name: str) -> str:
|
|
"""
|
|
Normalize a PyPI package name for comparison.
|
|
|
|
- Strips extras brackets (e.g., "package[extra]" -> "package")
|
|
- Replaces sequences of hyphens, underscores, and dots with a single hyphen
|
|
- Lowercases the result
|
|
|
|
This follows PEP 503 normalization rules.
|
|
"""
|
|
# Strip extras brackets like [test], [dev], etc.
|
|
base_name = re.sub(r'\[.*\]', '', name)
|
|
# Normalize separators and lowercase
|
|
return re.sub(r'[-_.]+', '-', base_name).lower()
|
|
|
|
|
|
class DependencyError(Exception):
|
|
"""Base exception for dependency errors."""
|
|
pass
|
|
|
|
|
|
class CircularDependencyError(DependencyError):
|
|
"""Raised when a circular dependency is detected."""
|
|
def __init__(self, cycle: List[str]):
|
|
self.cycle = cycle
|
|
super().__init__(f"Circular dependency detected: {' -> '.join(cycle)}")
|
|
|
|
|
|
class DependencyConflictError(DependencyError):
|
|
"""Raised when conflicting dependency versions are detected."""
|
|
def __init__(self, conflicts: List[DependencyConflict]):
|
|
self.conflicts = conflicts
|
|
super().__init__(f"Dependency conflicts detected: {len(conflicts)} conflict(s)")
|
|
|
|
|
|
class DependencyNotFoundError(DependencyError):
|
|
"""Raised when a dependency cannot be resolved."""
|
|
def __init__(self, project: str, package: str, constraint: str):
|
|
self.project = project
|
|
self.package = package
|
|
self.constraint = constraint
|
|
super().__init__(f"Dependency not found: {project}/{package}@{constraint}")
|
|
|
|
|
|
class InvalidEnsureFileError(DependencyError):
|
|
"""Raised when the ensure file is invalid."""
|
|
pass
|
|
|
|
|
|
class DependencyDepthExceededError(DependencyError):
|
|
"""Raised when dependency resolution exceeds max depth."""
|
|
def __init__(self, max_depth: int):
|
|
self.max_depth = max_depth
|
|
super().__init__(f"Dependency resolution exceeded maximum depth of {max_depth}")
|
|
|
|
|
|
# Safety limits to prevent DoS attacks
|
|
MAX_DEPENDENCY_DEPTH = 50 # Maximum levels of nested dependencies
|
|
MAX_DEPENDENCIES_PER_ARTIFACT = 200 # Maximum direct dependencies per artifact
|
|
|
|
|
|
def parse_ensure_file(content: bytes) -> EnsureFileContent:
|
|
"""
|
|
Parse an orchard.ensure file.
|
|
|
|
Args:
|
|
content: Raw bytes of the ensure file
|
|
|
|
Returns:
|
|
Parsed EnsureFileContent
|
|
|
|
Raises:
|
|
InvalidEnsureFileError: If the file is invalid YAML or has wrong structure
|
|
"""
|
|
try:
|
|
data = yaml.safe_load(content.decode('utf-8'))
|
|
except yaml.YAMLError as e:
|
|
raise InvalidEnsureFileError(f"Invalid YAML: {e}")
|
|
except UnicodeDecodeError as e:
|
|
raise InvalidEnsureFileError(f"Invalid encoding: {e}")
|
|
|
|
if data is None:
|
|
return EnsureFileContent(dependencies=[])
|
|
|
|
if not isinstance(data, dict):
|
|
raise InvalidEnsureFileError("Ensure file must be a YAML dictionary")
|
|
|
|
dependencies = []
|
|
deps_data = data.get('dependencies', [])
|
|
|
|
if not isinstance(deps_data, list):
|
|
raise InvalidEnsureFileError("'dependencies' must be a list")
|
|
|
|
# Safety limit: prevent DoS through excessive dependencies
|
|
if len(deps_data) > MAX_DEPENDENCIES_PER_ARTIFACT:
|
|
raise InvalidEnsureFileError(
|
|
f"Too many dependencies: {len(deps_data)} exceeds maximum of {MAX_DEPENDENCIES_PER_ARTIFACT}"
|
|
)
|
|
|
|
for i, dep in enumerate(deps_data):
|
|
if not isinstance(dep, dict):
|
|
raise InvalidEnsureFileError(f"Dependency {i} must be a dictionary")
|
|
|
|
project = dep.get('project')
|
|
package = dep.get('package')
|
|
version = dep.get('version')
|
|
|
|
if not project:
|
|
raise InvalidEnsureFileError(f"Dependency {i} missing 'project'")
|
|
if not package:
|
|
raise InvalidEnsureFileError(f"Dependency {i} missing 'package'")
|
|
if not version:
|
|
raise InvalidEnsureFileError(
|
|
f"Dependency {i} must have 'version'"
|
|
)
|
|
|
|
dependencies.append(EnsureFileDependency(
|
|
project=project,
|
|
package=package,
|
|
version=version,
|
|
))
|
|
|
|
return EnsureFileContent(dependencies=dependencies)
|
|
|
|
|
|
def validate_dependencies(
|
|
db: Session,
|
|
dependencies: List[EnsureFileDependency],
|
|
) -> List[str]:
|
|
"""
|
|
Validate that all dependency projects exist.
|
|
|
|
Args:
|
|
db: Database session
|
|
dependencies: List of dependencies to validate
|
|
|
|
Returns:
|
|
List of error messages (empty if all valid)
|
|
"""
|
|
errors = []
|
|
|
|
for dep in dependencies:
|
|
project = db.query(Project).filter(Project.name == dep.project).first()
|
|
if not project:
|
|
errors.append(f"Project '{dep.project}' not found")
|
|
|
|
return errors
|
|
|
|
|
|
def store_dependencies(
|
|
db: Session,
|
|
artifact_id: str,
|
|
dependencies: List[EnsureFileDependency],
|
|
) -> List[ArtifactDependency]:
|
|
"""
|
|
Store dependencies for an artifact.
|
|
|
|
Args:
|
|
db: Database session
|
|
artifact_id: The artifact ID that has these dependencies
|
|
dependencies: List of dependencies to store
|
|
|
|
Returns:
|
|
List of created ArtifactDependency objects
|
|
"""
|
|
created = []
|
|
|
|
for dep in dependencies:
|
|
artifact_dep = ArtifactDependency(
|
|
artifact_id=artifact_id,
|
|
dependency_project=dep.project,
|
|
dependency_package=dep.package,
|
|
version_constraint=dep.version,
|
|
)
|
|
db.add(artifact_dep)
|
|
created.append(artifact_dep)
|
|
|
|
return created
|
|
|
|
|
|
def get_artifact_dependencies(
|
|
db: Session,
|
|
artifact_id: str,
|
|
) -> List[DependencyResponse]:
|
|
"""
|
|
Get all dependencies for an artifact.
|
|
|
|
Args:
|
|
db: Database session
|
|
artifact_id: The artifact ID
|
|
|
|
Returns:
|
|
List of DependencyResponse objects
|
|
"""
|
|
deps = db.query(ArtifactDependency).filter(
|
|
ArtifactDependency.artifact_id == artifact_id
|
|
).all()
|
|
|
|
return [DependencyResponse.from_orm_model(dep) for dep in deps]
|
|
|
|
|
|
def get_reverse_dependencies(
|
|
db: Session,
|
|
project_name: str,
|
|
package_name: str,
|
|
page: int = 1,
|
|
limit: int = 50,
|
|
) -> ReverseDependenciesResponse:
|
|
"""
|
|
Get all artifacts that depend on a given package.
|
|
|
|
Args:
|
|
db: Database session
|
|
project_name: Target project name
|
|
package_name: Target package name
|
|
page: Page number (1-indexed)
|
|
limit: Results per page
|
|
|
|
Returns:
|
|
ReverseDependenciesResponse with dependents and pagination
|
|
"""
|
|
# Query dependencies that point to this project/package
|
|
query = db.query(ArtifactDependency).filter(
|
|
ArtifactDependency.dependency_project == project_name,
|
|
ArtifactDependency.dependency_package == package_name,
|
|
)
|
|
|
|
total = query.count()
|
|
offset = (page - 1) * limit
|
|
deps = query.offset(offset).limit(limit).all()
|
|
|
|
dependents = []
|
|
for dep in deps:
|
|
# Get artifact info to find the project/package/version
|
|
artifact = db.query(Artifact).filter(Artifact.id == dep.artifact_id).first()
|
|
if not artifact:
|
|
continue
|
|
|
|
# Find which package this artifact belongs to via versions
|
|
version_record = db.query(PackageVersion).filter(
|
|
PackageVersion.artifact_id == dep.artifact_id,
|
|
).first()
|
|
if version_record:
|
|
pkg = db.query(Package).filter(Package.id == version_record.package_id).first()
|
|
if pkg:
|
|
proj = db.query(Project).filter(Project.id == pkg.project_id).first()
|
|
if proj:
|
|
dependents.append(DependentInfo(
|
|
artifact_id=dep.artifact_id,
|
|
project=proj.name,
|
|
package=pkg.name,
|
|
version=version_record.version,
|
|
constraint_value=dep.version_constraint,
|
|
))
|
|
|
|
total_pages = (total + limit - 1) // limit
|
|
|
|
return ReverseDependenciesResponse(
|
|
project=project_name,
|
|
package=package_name,
|
|
dependents=dependents,
|
|
pagination=PaginationMeta(
|
|
page=page,
|
|
limit=limit,
|
|
total=total,
|
|
total_pages=total_pages,
|
|
has_more=page < total_pages,
|
|
),
|
|
)
|
|
|
|
|
|
def _is_version_constraint(version_str: str) -> bool:
|
|
"""Check if a version string contains constraint operators."""
|
|
if not version_str:
|
|
return False
|
|
# Check for common constraint operators
|
|
return any(op in version_str for op in ['>=', '<=', '!=', '~=', '>', '<', '==', '*'])
|
|
|
|
|
|
def _version_satisfies_constraint(version: str, constraint: str) -> bool:
|
|
"""
|
|
Check if a version satisfies a constraint.
|
|
|
|
Args:
|
|
version: A version string (e.g., '1.26.0')
|
|
constraint: A version constraint (e.g., '>=1.20', '>=1.20,<2.0', '*')
|
|
|
|
Returns:
|
|
True if the version satisfies the constraint, False otherwise
|
|
"""
|
|
if not HAS_PACKAGING:
|
|
return False
|
|
|
|
# Wildcard matches everything
|
|
if constraint == '*' or not constraint:
|
|
return True
|
|
|
|
try:
|
|
spec = SpecifierSet(constraint)
|
|
v = Version(version)
|
|
return v in spec
|
|
except (InvalidSpecifier, InvalidVersion):
|
|
# If we can't parse, assume it doesn't match
|
|
return False
|
|
|
|
|
|
def _resolve_version_constraint(
|
|
db: Session,
|
|
package: Package,
|
|
constraint: str,
|
|
) -> Optional[Tuple[str, str, int]]:
|
|
"""
|
|
Resolve a version constraint (e.g., '>=1.9') to a specific version.
|
|
|
|
Uses PEP 440 version matching to find the best matching version.
|
|
|
|
Args:
|
|
db: Database session
|
|
package: Package to search versions in
|
|
constraint: Version constraint string (e.g., '>=1.9', '<2.0,>=1.5')
|
|
|
|
Returns:
|
|
Tuple of (artifact_id, resolved_version, size) or None if not found
|
|
"""
|
|
if not HAS_PACKAGING:
|
|
# Fallback: if packaging not available, can't do constraint matching
|
|
return None
|
|
|
|
# Handle wildcard - return latest version
|
|
if constraint == '*':
|
|
# Get the latest version by created_at
|
|
latest = db.query(PackageVersion).filter(
|
|
PackageVersion.package_id == package.id,
|
|
).order_by(PackageVersion.created_at.desc()).first()
|
|
if latest:
|
|
artifact = db.query(Artifact).filter(Artifact.id == latest.artifact_id).first()
|
|
if artifact:
|
|
return (artifact.id, latest.version, artifact.size)
|
|
return None
|
|
|
|
try:
|
|
specifier = SpecifierSet(constraint)
|
|
except InvalidSpecifier:
|
|
# Invalid constraint (e.g., ">=" without version) - treat as wildcard
|
|
# This can happen with malformed metadata from PyPI packages
|
|
latest = db.query(PackageVersion).filter(
|
|
PackageVersion.package_id == package.id,
|
|
).order_by(PackageVersion.created_at.desc()).first()
|
|
if latest:
|
|
artifact = db.query(Artifact).filter(Artifact.id == latest.artifact_id).first()
|
|
if artifact:
|
|
return (artifact.id, latest.version, artifact.size)
|
|
return None
|
|
|
|
# Get all versions for this package
|
|
all_versions = db.query(PackageVersion).filter(
|
|
PackageVersion.package_id == package.id,
|
|
).all()
|
|
|
|
if not all_versions:
|
|
return None
|
|
|
|
# Find matching versions
|
|
matching = []
|
|
for pv in all_versions:
|
|
try:
|
|
v = Version(pv.version)
|
|
if v in specifier:
|
|
matching.append((pv, v))
|
|
except InvalidVersion:
|
|
# Skip invalid versions
|
|
continue
|
|
|
|
if not matching:
|
|
return None
|
|
|
|
# Sort by version (descending) and return the latest matching
|
|
matching.sort(key=lambda x: x[1], reverse=True)
|
|
best_match = matching[0][0]
|
|
|
|
artifact = db.query(Artifact).filter(Artifact.id == best_match.artifact_id).first()
|
|
if artifact:
|
|
return (artifact.id, best_match.version, artifact.size)
|
|
|
|
return None
|
|
|
|
|
|
def _resolve_dependency_to_artifact(
|
|
db: Session,
|
|
project_name: str,
|
|
package_name: str,
|
|
version: str,
|
|
) -> Optional[Tuple[str, str, int]]:
|
|
"""
|
|
Resolve a dependency constraint to an artifact ID.
|
|
|
|
Supports:
|
|
- Exact version matching (e.g., '1.2.3')
|
|
- Version constraints (e.g., '>=1.9', '<2.0,>=1.5')
|
|
- Wildcard ('*' for any version)
|
|
|
|
Args:
|
|
db: Database session
|
|
project_name: Project name
|
|
package_name: Package name
|
|
version: Version or version constraint
|
|
|
|
Returns:
|
|
Tuple of (artifact_id, resolved_version, size) or None if not found
|
|
"""
|
|
# Get project and package
|
|
project = db.query(Project).filter(Project.name == project_name).first()
|
|
if not project:
|
|
return None
|
|
|
|
package = db.query(Package).filter(
|
|
Package.project_id == project.id,
|
|
Package.name == package_name,
|
|
).first()
|
|
if not package:
|
|
return None
|
|
|
|
# Check if this is a version constraint (>=, <, etc.) or exact version
|
|
if _is_version_constraint(version):
|
|
result = _resolve_version_constraint(db, package, version)
|
|
if result:
|
|
return result
|
|
else:
|
|
# Look up by exact version
|
|
pkg_version = db.query(PackageVersion).filter(
|
|
PackageVersion.package_id == package.id,
|
|
PackageVersion.version == version,
|
|
).first()
|
|
if pkg_version:
|
|
artifact = db.query(Artifact).filter(
|
|
Artifact.id == pkg_version.artifact_id
|
|
).first()
|
|
if artifact:
|
|
return (artifact.id, version, artifact.size)
|
|
|
|
return None
|
|
|
|
|
|
def _detect_package_cycle(
|
|
db: Session,
|
|
project_name: str,
|
|
package_name: str,
|
|
target_project: str,
|
|
target_package: str,
|
|
visiting: Set[str],
|
|
visited: Set[str],
|
|
path: List[str],
|
|
) -> Optional[List[str]]:
|
|
"""
|
|
Detect cycles at the package level using DFS.
|
|
|
|
Args:
|
|
db: Database session
|
|
project_name: Current project being visited
|
|
package_name: Current package being visited
|
|
target_project: The project we're checking for cycles back to
|
|
target_package: The package we're checking for cycles back to
|
|
visiting: Set of package keys currently in the recursion stack
|
|
visited: Set of fully processed package keys
|
|
path: Current path for cycle reporting
|
|
|
|
Returns:
|
|
Cycle path if detected, None otherwise
|
|
"""
|
|
# Normalize names for comparison (handles extras like [test] and separators)
|
|
pkg_normalized = _normalize_pypi_package_name(package_name)
|
|
target_pkg_normalized = _normalize_pypi_package_name(target_package)
|
|
|
|
# Use normalized key for tracking
|
|
pkg_key = f"{project_name.lower()}/{pkg_normalized}"
|
|
|
|
# Check if we've reached the target package (cycle detected)
|
|
# Use normalized comparison to handle extras and naming variations
|
|
if project_name.lower() == target_project.lower() and pkg_normalized == target_pkg_normalized:
|
|
return path + [pkg_key]
|
|
|
|
if pkg_key in visiting:
|
|
# Unexpected internal cycle
|
|
return None
|
|
|
|
if pkg_key in visited:
|
|
return None
|
|
|
|
visiting.add(pkg_key)
|
|
path.append(pkg_key)
|
|
|
|
# Get the package and find any artifacts with dependencies
|
|
project = db.query(Project).filter(Project.name == project_name).first()
|
|
if project:
|
|
package = db.query(Package).filter(
|
|
Package.project_id == project.id,
|
|
Package.name == package_name,
|
|
).first()
|
|
if package:
|
|
# Find all artifacts in this package via versions
|
|
versions = db.query(PackageVersion).filter(PackageVersion.package_id == package.id).all()
|
|
artifact_ids = {v.artifact_id for v in versions}
|
|
|
|
# Get dependencies from all artifacts in this package
|
|
for artifact_id in artifact_ids:
|
|
deps = db.query(ArtifactDependency).filter(
|
|
ArtifactDependency.artifact_id == artifact_id
|
|
).all()
|
|
|
|
for dep in deps:
|
|
cycle = _detect_package_cycle(
|
|
db,
|
|
dep.dependency_project,
|
|
dep.dependency_package,
|
|
target_project,
|
|
target_package,
|
|
visiting,
|
|
visited,
|
|
path,
|
|
)
|
|
if cycle:
|
|
return cycle
|
|
|
|
path.pop()
|
|
visiting.remove(pkg_key)
|
|
visited.add(pkg_key)
|
|
|
|
return None
|
|
|
|
|
|
def check_circular_dependencies(
|
|
db: Session,
|
|
artifact_id: str,
|
|
new_dependencies: List[EnsureFileDependency],
|
|
project_name: Optional[str] = None,
|
|
package_name: Optional[str] = None,
|
|
) -> Optional[List[str]]:
|
|
"""
|
|
Check if adding the new dependencies would create a circular dependency.
|
|
|
|
Args:
|
|
db: Database session
|
|
artifact_id: The artifact that will have these dependencies
|
|
new_dependencies: Dependencies to be added
|
|
project_name: Project name (optional, will try to look up from version if not provided)
|
|
package_name: Package name (optional, will try to look up from version if not provided)
|
|
|
|
Returns:
|
|
Cycle path if detected, None otherwise
|
|
"""
|
|
# First, get the package info for this artifact to build path labels
|
|
if project_name and package_name:
|
|
current_path = f"{project_name}/{package_name}"
|
|
else:
|
|
# Try to look up from version
|
|
artifact = db.query(Artifact).filter(Artifact.id == artifact_id).first()
|
|
if not artifact:
|
|
return None
|
|
|
|
# Find package for this artifact via version
|
|
version_record = db.query(PackageVersion).filter(
|
|
PackageVersion.artifact_id == artifact_id
|
|
).first()
|
|
if not version_record:
|
|
return None
|
|
|
|
package = db.query(Package).filter(Package.id == version_record.package_id).first()
|
|
if not package:
|
|
return None
|
|
|
|
project = db.query(Project).filter(Project.id == package.project_id).first()
|
|
if not project:
|
|
return None
|
|
|
|
current_path = f"{project.name}/{package.name}"
|
|
|
|
# Extract target project and package from current_path
|
|
if "/" in current_path:
|
|
target_project, target_package = current_path.split("/", 1)
|
|
else:
|
|
return None
|
|
|
|
# Normalize the initial path for consistency with _detect_package_cycle
|
|
normalized_path = f"{target_project.lower()}/{_normalize_pypi_package_name(target_package)}"
|
|
|
|
# For each new dependency, check if it would create a cycle back to our package
|
|
for dep in new_dependencies:
|
|
# Check if this dependency (transitively) depends on us at the package level
|
|
visiting: Set[str] = set()
|
|
visited: Set[str] = set()
|
|
path: List[str] = [normalized_path]
|
|
|
|
# Check from the dependency's package
|
|
cycle = _detect_package_cycle(
|
|
db,
|
|
dep.project,
|
|
dep.package,
|
|
target_project,
|
|
target_package,
|
|
visiting,
|
|
visited,
|
|
path,
|
|
)
|
|
if cycle:
|
|
return cycle
|
|
|
|
return None
|
|
|
|
|
|
def resolve_dependencies(
|
|
db: Session,
|
|
project_name: str,
|
|
package_name: str,
|
|
ref: str,
|
|
base_url: str,
|
|
) -> DependencyResolutionResponse:
|
|
"""
|
|
Resolve all dependencies for an artifact recursively.
|
|
|
|
Args:
|
|
db: Database session
|
|
project_name: Project name
|
|
package_name: Package name
|
|
ref: Version reference (or artifact:hash)
|
|
base_url: Base URL for download URLs
|
|
|
|
Returns:
|
|
DependencyResolutionResponse with all resolved artifacts
|
|
|
|
Raises:
|
|
DependencyNotFoundError: If a dependency cannot be resolved
|
|
CircularDependencyError: If circular dependencies are detected
|
|
DependencyConflictError: If conflicting versions are required
|
|
"""
|
|
# Resolve the initial artifact
|
|
project = db.query(Project).filter(Project.name == project_name).first()
|
|
if not project:
|
|
raise DependencyNotFoundError(project_name, package_name, ref)
|
|
|
|
package = db.query(Package).filter(
|
|
Package.project_id == project.id,
|
|
Package.name == package_name,
|
|
).first()
|
|
if not package:
|
|
raise DependencyNotFoundError(project_name, package_name, ref)
|
|
|
|
# Handle artifact: prefix for direct artifact ID references
|
|
if ref.startswith("artifact:"):
|
|
artifact_id = ref[9:]
|
|
artifact = db.query(Artifact).filter(Artifact.id == artifact_id).first()
|
|
if not artifact:
|
|
raise DependencyNotFoundError(project_name, package_name, ref)
|
|
root_artifact_id = artifact.id
|
|
root_version = artifact_id[:12] # Use short hash as version display
|
|
root_size = artifact.size
|
|
else:
|
|
# Try to find artifact by version
|
|
resolved = _resolve_dependency_to_artifact(
|
|
db, project_name, package_name, ref
|
|
)
|
|
if not resolved:
|
|
raise DependencyNotFoundError(project_name, package_name, ref)
|
|
root_artifact_id, root_version, root_size = resolved
|
|
|
|
# Track resolved artifacts and their versions
|
|
resolved_artifacts: Dict[str, ResolvedArtifact] = {}
|
|
# Track missing dependencies (not cached on server)
|
|
missing_dependencies: List[MissingDependency] = []
|
|
# Track version requirements for conflict detection
|
|
version_requirements: Dict[str, List[Dict[str, Any]]] = {} # pkg_key -> [(version, required_by)]
|
|
# Track visiting/visited for cycle detection
|
|
visiting: Set[str] = set()
|
|
visited: Set[str] = set()
|
|
# Track the current path for cycle reporting (artifact_id -> pkg_key)
|
|
current_path: Dict[str, str] = {}
|
|
# Resolution order (topological)
|
|
resolution_order: List[str] = []
|
|
|
|
def _resolve_recursive(
|
|
artifact_id: str,
|
|
proj_name: str,
|
|
pkg_name: str,
|
|
version_or_tag: str,
|
|
size: int,
|
|
required_by: Optional[str],
|
|
depth: int = 0,
|
|
):
|
|
"""Recursively resolve dependencies with cycle/conflict detection."""
|
|
# Safety limit: prevent DoS through deeply nested dependencies
|
|
if depth > MAX_DEPENDENCY_DEPTH:
|
|
raise DependencyDepthExceededError(MAX_DEPENDENCY_DEPTH)
|
|
|
|
pkg_key = f"{proj_name}/{pkg_name}"
|
|
|
|
# Cycle detection (at artifact level)
|
|
if artifact_id in visiting:
|
|
# Build cycle path from current_path
|
|
cycle_start = current_path.get(artifact_id, pkg_key)
|
|
cycle = [cycle_start, pkg_key]
|
|
raise CircularDependencyError(cycle)
|
|
|
|
# Version conflict handling - use first resolved version (lenient mode)
|
|
if pkg_key in version_requirements:
|
|
existing_versions = {r["version"] for r in version_requirements[pkg_key]}
|
|
if version_or_tag not in existing_versions:
|
|
# Different version requested - log and use existing (first wins)
|
|
existing = version_requirements[pkg_key][0]["version"]
|
|
logger.debug(
|
|
f"Version mismatch for {pkg_key}: using {existing} "
|
|
f"(also requested: {version_or_tag} by {required_by})"
|
|
)
|
|
# Already resolved this package - skip
|
|
return
|
|
|
|
if artifact_id in visited:
|
|
return
|
|
|
|
visiting.add(artifact_id)
|
|
current_path[artifact_id] = pkg_key
|
|
|
|
# Track version requirement
|
|
if pkg_key not in version_requirements:
|
|
version_requirements[pkg_key] = []
|
|
version_requirements[pkg_key].append({
|
|
"version": version_or_tag,
|
|
"required_by": required_by,
|
|
})
|
|
|
|
# Get dependencies
|
|
deps = db.query(ArtifactDependency).filter(
|
|
ArtifactDependency.artifact_id == artifact_id
|
|
).all()
|
|
|
|
# Resolve each dependency first (depth-first)
|
|
for dep in deps:
|
|
# Skip self-dependencies (can happen with PyPI extras like pytest[testing])
|
|
# Use normalized comparison for PyPI naming conventions (handles extras, separators)
|
|
dep_proj_normalized = dep.dependency_project.lower()
|
|
dep_pkg_normalized = _normalize_pypi_package_name(dep.dependency_package)
|
|
curr_proj_normalized = proj_name.lower()
|
|
curr_pkg_normalized = _normalize_pypi_package_name(pkg_name)
|
|
if dep_proj_normalized == curr_proj_normalized and dep_pkg_normalized == curr_pkg_normalized:
|
|
continue
|
|
|
|
resolved_dep = _resolve_dependency_to_artifact(
|
|
db,
|
|
dep.dependency_project,
|
|
dep.dependency_package,
|
|
dep.version_constraint,
|
|
)
|
|
|
|
if not resolved_dep:
|
|
# Dependency not cached on server - track as missing but continue
|
|
constraint = dep.version_constraint
|
|
missing_dependencies.append(MissingDependency(
|
|
project=dep.dependency_project,
|
|
package=dep.dependency_package,
|
|
constraint=constraint,
|
|
required_by=pkg_key,
|
|
))
|
|
continue
|
|
|
|
dep_artifact_id, dep_version, dep_size = resolved_dep
|
|
|
|
# Skip if resolved to same artifact (self-dependency at artifact level)
|
|
if dep_artifact_id == artifact_id:
|
|
continue
|
|
|
|
# Skip if this artifact is already being visited (would cause cycle)
|
|
if dep_artifact_id in visiting:
|
|
continue
|
|
|
|
_resolve_recursive(
|
|
dep_artifact_id,
|
|
dep.dependency_project,
|
|
dep.dependency_package,
|
|
dep_version,
|
|
dep_size,
|
|
pkg_key,
|
|
depth + 1,
|
|
)
|
|
|
|
visiting.remove(artifact_id)
|
|
del current_path[artifact_id]
|
|
visited.add(artifact_id)
|
|
|
|
# Add to resolution order (dependencies before dependents)
|
|
resolution_order.append(artifact_id)
|
|
|
|
# Store resolved artifact info
|
|
resolved_artifacts[artifact_id] = ResolvedArtifact(
|
|
artifact_id=artifact_id,
|
|
project=proj_name,
|
|
package=pkg_name,
|
|
version=version_or_tag,
|
|
size=size,
|
|
download_url=f"{base_url}/api/v1/project/{proj_name}/{pkg_name}/+/{version_or_tag}",
|
|
)
|
|
|
|
# Start resolution from root
|
|
_resolve_recursive(
|
|
root_artifact_id,
|
|
project_name,
|
|
package_name,
|
|
root_version,
|
|
root_size,
|
|
None,
|
|
)
|
|
|
|
# Build response in topological order
|
|
resolved_list = [resolved_artifacts[aid] for aid in resolution_order]
|
|
total_size = sum(r.size for r in resolved_list)
|
|
|
|
return DependencyResolutionResponse(
|
|
requested={
|
|
"project": project_name,
|
|
"package": package_name,
|
|
"ref": ref,
|
|
},
|
|
resolved=resolved_list,
|
|
missing=missing_dependencies,
|
|
fetched=[], # No fetching in sync version
|
|
total_size=total_size,
|
|
artifact_count=len(resolved_list),
|
|
)
|
|
|
|
|
|
# System project mapping for auto-fetch
|
|
SYSTEM_PROJECT_REGISTRY_MAP = {
|
|
"_pypi": "pypi",
|
|
"_npm": "npm",
|
|
"_maven": "maven",
|
|
}
|
|
|
|
|
|
async def resolve_dependencies_with_fetch(
|
|
db: Session,
|
|
project_name: str,
|
|
package_name: str,
|
|
ref: str,
|
|
base_url: str,
|
|
storage: "S3Storage",
|
|
registry_clients: Dict[str, "RegistryClient"],
|
|
max_fetch_depth: int = 10,
|
|
) -> DependencyResolutionResponse:
|
|
"""
|
|
Resolve all dependencies for an artifact recursively, fetching missing ones from upstream.
|
|
|
|
This async version extends the basic resolution with auto-fetch capability:
|
|
when a missing dependency is from a system project (e.g., _pypi), it attempts
|
|
to fetch the package from the corresponding upstream registry.
|
|
|
|
If the root artifact itself doesn't exist in a system project, it will also
|
|
be fetched from upstream before resolution begins.
|
|
|
|
Args:
|
|
db: Database session
|
|
project_name: Project name
|
|
package_name: Package name
|
|
ref: Version reference (or artifact:hash)
|
|
base_url: Base URL for download URLs
|
|
storage: S3 storage for caching fetched artifacts
|
|
registry_clients: Map of system project to registry client {"_pypi": PyPIRegistryClient}
|
|
max_fetch_depth: Maximum depth for auto-fetching (prevents runaway fetching)
|
|
|
|
Returns:
|
|
DependencyResolutionResponse with all resolved artifacts and fetch status
|
|
|
|
Raises:
|
|
DependencyNotFoundError: If the root artifact cannot be found (even after fetch attempt)
|
|
CircularDependencyError: If circular dependencies are detected
|
|
DependencyConflictError: If conflicting versions are required
|
|
"""
|
|
# Track fetched artifacts for response
|
|
fetched_artifacts: List[ResolvedArtifact] = []
|
|
|
|
# Check if project exists
|
|
project = db.query(Project).filter(Project.name == project_name).first()
|
|
|
|
# If project doesn't exist and it's a system project pattern, we can't auto-create it
|
|
if not project:
|
|
raise DependencyNotFoundError(project_name, package_name, ref)
|
|
|
|
# Check if package exists
|
|
package = db.query(Package).filter(
|
|
Package.project_id == project.id,
|
|
Package.name == package_name,
|
|
).first()
|
|
|
|
# Try to resolve the root artifact
|
|
root_artifact_id = None
|
|
root_version = None
|
|
root_size = None
|
|
|
|
# Handle artifact: prefix for direct artifact ID references
|
|
if ref.startswith("artifact:"):
|
|
artifact_id = ref[9:]
|
|
artifact = db.query(Artifact).filter(Artifact.id == artifact_id).first()
|
|
if artifact:
|
|
root_artifact_id = artifact.id
|
|
root_version = artifact_id[:12]
|
|
root_size = artifact.size
|
|
elif package:
|
|
# Try to resolve by version/constraint
|
|
resolved = _resolve_dependency_to_artifact(
|
|
db, project_name, package_name, ref
|
|
)
|
|
if resolved:
|
|
root_artifact_id, root_version, root_size = resolved
|
|
|
|
# If root artifact not found and this is a system project, try to fetch it
|
|
if root_artifact_id is None and project_name in SYSTEM_PROJECT_REGISTRY_MAP:
|
|
logger.info(
|
|
f"Root artifact {project_name}/{package_name}@{ref} not found, "
|
|
"attempting to fetch from upstream"
|
|
)
|
|
|
|
client = registry_clients.get(project_name)
|
|
if client:
|
|
try:
|
|
# Resolve the version constraint from upstream
|
|
version_info = await client.resolve_constraint(package_name, ref)
|
|
if version_info:
|
|
# Fetch and cache the package
|
|
fetch_result = await client.fetch_package(
|
|
package_name, version_info, db, storage
|
|
)
|
|
if fetch_result:
|
|
logger.info(
|
|
f"Successfully fetched root artifact {package_name}=="
|
|
f"{fetch_result.version} (artifact {fetch_result.artifact_id[:12]})"
|
|
)
|
|
root_artifact_id = fetch_result.artifact_id
|
|
root_version = fetch_result.version
|
|
root_size = fetch_result.size
|
|
|
|
# Add to fetched list
|
|
fetched_artifacts.append(ResolvedArtifact(
|
|
artifact_id=fetch_result.artifact_id,
|
|
project=project_name,
|
|
package=package_name,
|
|
version=fetch_result.version,
|
|
size=fetch_result.size,
|
|
download_url=f"{base_url}/api/v1/project/{project_name}/{package_name}/+/{fetch_result.version}",
|
|
))
|
|
except Exception as e:
|
|
logger.warning(f"Failed to fetch root artifact {package_name}: {e}")
|
|
|
|
# If still no root artifact, raise error
|
|
if root_artifact_id is None:
|
|
raise DependencyNotFoundError(project_name, package_name, ref)
|
|
|
|
# Track state
|
|
resolved_artifacts: Dict[str, ResolvedArtifact] = {}
|
|
missing_dependencies: List[MissingDependency] = []
|
|
# Note: fetched_artifacts was already initialized above (line 911)
|
|
# and may already contain the root artifact if it was fetched from upstream
|
|
version_requirements: Dict[str, List[Dict[str, Any]]] = {}
|
|
visiting: Set[str] = set()
|
|
visited: Set[str] = set()
|
|
current_path: Dict[str, str] = {}
|
|
resolution_order: List[str] = []
|
|
|
|
# Track fetch attempts to prevent loops
|
|
fetch_attempted: Set[str] = set() # "project/package@constraint"
|
|
|
|
async def _try_fetch_dependency(
|
|
dep_project: str,
|
|
dep_package: str,
|
|
constraint: str,
|
|
required_by: str,
|
|
fetch_depth: int,
|
|
) -> Optional[Tuple[str, str, int]]:
|
|
"""
|
|
Try to fetch a missing dependency from upstream registry.
|
|
|
|
Returns (artifact_id, version, size) if successful, None otherwise.
|
|
"""
|
|
# Only fetch from system projects
|
|
registry_type = SYSTEM_PROJECT_REGISTRY_MAP.get(dep_project)
|
|
if not registry_type:
|
|
logger.debug(
|
|
f"Not a system project, skipping fetch: {dep_project}/{dep_package}"
|
|
)
|
|
return None
|
|
|
|
# Check fetch depth
|
|
if fetch_depth > max_fetch_depth:
|
|
logger.info(
|
|
f"Max fetch depth ({max_fetch_depth}) exceeded for {dep_project}/{dep_package}"
|
|
)
|
|
return None
|
|
|
|
# Build fetch key for loop prevention
|
|
fetch_key = f"{dep_project}/{dep_package}@{constraint}"
|
|
if fetch_key in fetch_attempted:
|
|
logger.debug(f"Already attempted fetch for {fetch_key}")
|
|
return None
|
|
fetch_attempted.add(fetch_key)
|
|
|
|
# Get registry client
|
|
client = registry_clients.get(dep_project)
|
|
if not client:
|
|
logger.debug(f"No registry client for {dep_project}")
|
|
return None
|
|
|
|
try:
|
|
# Resolve version constraint
|
|
version_info = await client.resolve_constraint(dep_package, constraint)
|
|
if not version_info:
|
|
logger.info(
|
|
f"No version of {dep_package} matches constraint '{constraint}' on upstream"
|
|
)
|
|
return None
|
|
|
|
# Fetch and cache the package
|
|
fetch_result = await client.fetch_package(
|
|
dep_package, version_info, db, storage
|
|
)
|
|
if not fetch_result:
|
|
logger.warning(f"Failed to fetch {dep_package}=={version_info.version}")
|
|
return None
|
|
|
|
logger.info(
|
|
f"Successfully fetched {dep_package}=={version_info.version} "
|
|
f"(artifact {fetch_result.artifact_id[:12]})"
|
|
)
|
|
|
|
# Add to fetched list for response
|
|
fetched_artifacts.append(ResolvedArtifact(
|
|
artifact_id=fetch_result.artifact_id,
|
|
project=dep_project,
|
|
package=dep_package,
|
|
version=fetch_result.version,
|
|
size=fetch_result.size,
|
|
download_url=f"{base_url}/api/v1/project/{dep_project}/{dep_package}/+/{fetch_result.version}",
|
|
))
|
|
|
|
return (fetch_result.artifact_id, fetch_result.version, fetch_result.size)
|
|
|
|
except Exception as e:
|
|
logger.warning(f"Error fetching {dep_package}: {e}")
|
|
return None
|
|
|
|
async def _resolve_recursive_async(
|
|
artifact_id: str,
|
|
proj_name: str,
|
|
pkg_name: str,
|
|
version_or_tag: str,
|
|
size: int,
|
|
required_by: Optional[str],
|
|
depth: int = 0,
|
|
fetch_depth: int = 0,
|
|
):
|
|
"""Recursively resolve dependencies with fetch capability."""
|
|
if depth > MAX_DEPENDENCY_DEPTH:
|
|
raise DependencyDepthExceededError(MAX_DEPENDENCY_DEPTH)
|
|
|
|
pkg_key = f"{proj_name}/{pkg_name}"
|
|
|
|
# Cycle detection
|
|
if artifact_id in visiting:
|
|
cycle_start = current_path.get(artifact_id, pkg_key)
|
|
cycle = [cycle_start, pkg_key]
|
|
raise CircularDependencyError(cycle)
|
|
|
|
# Version conflict handling - use first resolved version (lenient mode)
|
|
if pkg_key in version_requirements:
|
|
existing_versions = {r["version"] for r in version_requirements[pkg_key]}
|
|
if version_or_tag not in existing_versions:
|
|
# Different version requested - log and use existing (first wins)
|
|
existing = version_requirements[pkg_key][0]["version"]
|
|
logger.debug(
|
|
f"Version mismatch for {pkg_key}: using {existing} "
|
|
f"(also requested: {version_or_tag} by {required_by})"
|
|
)
|
|
# Already resolved this package - skip
|
|
return
|
|
|
|
if artifact_id in visited:
|
|
return
|
|
|
|
visiting.add(artifact_id)
|
|
current_path[artifact_id] = pkg_key
|
|
|
|
if pkg_key not in version_requirements:
|
|
version_requirements[pkg_key] = []
|
|
version_requirements[pkg_key].append({
|
|
"version": version_or_tag,
|
|
"required_by": required_by,
|
|
})
|
|
|
|
# Get dependencies
|
|
deps = db.query(ArtifactDependency).filter(
|
|
ArtifactDependency.artifact_id == artifact_id
|
|
).all()
|
|
|
|
for dep in deps:
|
|
# Skip self-dependencies (common with PyPI extras like pytest[testing] -> pytest)
|
|
dep_proj_normalized = dep.dependency_project.lower()
|
|
dep_pkg_normalized = _normalize_pypi_package_name(dep.dependency_package)
|
|
curr_proj_normalized = proj_name.lower()
|
|
curr_pkg_normalized = _normalize_pypi_package_name(pkg_name)
|
|
if dep_proj_normalized == curr_proj_normalized and dep_pkg_normalized == curr_pkg_normalized:
|
|
logger.debug(
|
|
f"Skipping self-dependency: {pkg_key} -> {dep.dependency_project}/{dep.dependency_package}"
|
|
)
|
|
continue
|
|
|
|
# Also check if this dependency would resolve to the current artifact
|
|
# (handles cases where package names differ but resolve to same artifact)
|
|
resolved_dep = _resolve_dependency_to_artifact(
|
|
db,
|
|
dep.dependency_project,
|
|
dep.dependency_package,
|
|
dep.version_constraint,
|
|
)
|
|
|
|
if not resolved_dep:
|
|
# Try to fetch from upstream if it's a system project
|
|
fetched = await _try_fetch_dependency(
|
|
dep.dependency_project,
|
|
dep.dependency_package,
|
|
dep.version_constraint,
|
|
pkg_key,
|
|
fetch_depth + 1,
|
|
)
|
|
|
|
if fetched:
|
|
resolved_dep = fetched
|
|
else:
|
|
# Still missing - add to missing list with fetch status
|
|
fetch_key = f"{dep.dependency_project}/{dep.dependency_package}@{dep.version_constraint}"
|
|
was_attempted = fetch_key in fetch_attempted
|
|
missing_dependencies.append(MissingDependency(
|
|
project=dep.dependency_project,
|
|
package=dep.dependency_package,
|
|
constraint=dep.version_constraint,
|
|
required_by=pkg_key,
|
|
fetch_attempted=was_attempted,
|
|
fetch_error="Max fetch depth exceeded" if was_attempted and fetch_depth >= max_fetch_depth else None,
|
|
))
|
|
continue
|
|
|
|
dep_artifact_id, dep_version, dep_size = resolved_dep
|
|
|
|
# Skip if resolved to same artifact (self-dependency at artifact level)
|
|
if dep_artifact_id == artifact_id:
|
|
logger.debug(
|
|
f"Skipping self-dependency (same artifact): {pkg_key} -> "
|
|
f"{dep.dependency_project}/{dep.dependency_package} (artifact {dep_artifact_id[:12]})"
|
|
)
|
|
continue
|
|
|
|
# Skip if this artifact is already being visited (would cause cycle)
|
|
if dep_artifact_id in visiting:
|
|
logger.debug(
|
|
f"Skipping dependency already in resolution stack: {pkg_key} -> "
|
|
f"{dep.dependency_project}/{dep.dependency_package} (artifact {dep_artifact_id[:12]})"
|
|
)
|
|
continue
|
|
|
|
# Check if we've already resolved this package to a different version
|
|
dep_pkg_key = f"{dep.dependency_project}/{dep.dependency_package}"
|
|
if dep_pkg_key in version_requirements:
|
|
existing_version = version_requirements[dep_pkg_key][0]["version"]
|
|
if existing_version != dep_version:
|
|
# Different version resolved - check if existing satisfies new constraint
|
|
if HAS_PACKAGING and _version_satisfies_constraint(existing_version, dep.version_constraint):
|
|
logger.debug(
|
|
f"Reusing existing version {existing_version} for {dep_pkg_key} "
|
|
f"(satisfies constraint {dep.version_constraint})"
|
|
)
|
|
continue
|
|
else:
|
|
logger.debug(
|
|
f"Version conflict for {dep_pkg_key}: have {existing_version}, "
|
|
f"need {dep.version_constraint} (resolved to {dep_version})"
|
|
)
|
|
# Don't raise error - just use the first version we resolved
|
|
# This is more lenient than strict conflict detection
|
|
continue
|
|
|
|
await _resolve_recursive_async(
|
|
dep_artifact_id,
|
|
dep.dependency_project,
|
|
dep.dependency_package,
|
|
dep_version,
|
|
dep_size,
|
|
pkg_key,
|
|
depth + 1,
|
|
fetch_depth + 1 if dep_artifact_id in [f.artifact_id for f in fetched_artifacts] else fetch_depth,
|
|
)
|
|
|
|
visiting.remove(artifact_id)
|
|
del current_path[artifact_id]
|
|
visited.add(artifact_id)
|
|
|
|
resolution_order.append(artifact_id)
|
|
|
|
resolved_artifacts[artifact_id] = ResolvedArtifact(
|
|
artifact_id=artifact_id,
|
|
project=proj_name,
|
|
package=pkg_name,
|
|
version=version_or_tag,
|
|
size=size,
|
|
download_url=f"{base_url}/api/v1/project/{proj_name}/{pkg_name}/+/{version_or_tag}",
|
|
)
|
|
|
|
# Start resolution from root
|
|
await _resolve_recursive_async(
|
|
root_artifact_id,
|
|
project_name,
|
|
package_name,
|
|
root_version,
|
|
root_size,
|
|
None,
|
|
)
|
|
|
|
# Build response in topological order
|
|
resolved_list = [resolved_artifacts[aid] for aid in resolution_order]
|
|
total_size = sum(r.size for r in resolved_list)
|
|
|
|
return DependencyResolutionResponse(
|
|
requested={
|
|
"project": project_name,
|
|
"package": package_name,
|
|
"ref": ref,
|
|
},
|
|
resolved=resolved_list,
|
|
missing=missing_dependencies,
|
|
fetched=fetched_artifacts,
|
|
total_size=total_size,
|
|
artifact_count=len(resolved_list),
|
|
)
|