""" Dependency management module for artifact dependencies. Handles: - Parsing orchard.ensure files - Storing dependencies in the database - Querying dependencies and reverse dependencies - Dependency resolution with topological sorting - Circular dependency detection - Conflict detection """ import re import logging import yaml from typing import List, Dict, Any, Optional, Set, Tuple, TYPE_CHECKING from sqlalchemy.orm import Session from sqlalchemy import and_ if TYPE_CHECKING: from .storage import S3Storage from .registry_client import RegistryClient logger = logging.getLogger(__name__) # Import packaging for PEP 440 version matching try: from packaging.specifiers import SpecifierSet, InvalidSpecifier from packaging.version import Version, InvalidVersion HAS_PACKAGING = True except ImportError: HAS_PACKAGING = False from .models import ( Project, Package, Artifact, ArtifactDependency, PackageVersion, ) from .schemas import ( EnsureFileContent, EnsureFileDependency, DependencyResponse, ArtifactDependenciesResponse, DependentInfo, ReverseDependenciesResponse, ResolvedArtifact, DependencyResolutionResponse, DependencyConflict, MissingDependency, PaginationMeta, ) def _normalize_pypi_package_name(name: str) -> str: """ Normalize a PyPI package name for comparison. - Strips extras brackets (e.g., "package[extra]" -> "package") - Replaces sequences of hyphens, underscores, and dots with a single hyphen - Lowercases the result This follows PEP 503 normalization rules. """ # Strip extras brackets like [test], [dev], etc. base_name = re.sub(r'\[.*\]', '', name) # Normalize separators and lowercase return re.sub(r'[-_.]+', '-', base_name).lower() class DependencyError(Exception): """Base exception for dependency errors.""" pass class CircularDependencyError(DependencyError): """Raised when a circular dependency is detected.""" def __init__(self, cycle: List[str]): self.cycle = cycle super().__init__(f"Circular dependency detected: {' -> '.join(cycle)}") class DependencyConflictError(DependencyError): """Raised when conflicting dependency versions are detected.""" def __init__(self, conflicts: List[DependencyConflict]): self.conflicts = conflicts super().__init__(f"Dependency conflicts detected: {len(conflicts)} conflict(s)") class DependencyNotFoundError(DependencyError): """Raised when a dependency cannot be resolved.""" def __init__(self, project: str, package: str, constraint: str): self.project = project self.package = package self.constraint = constraint super().__init__(f"Dependency not found: {project}/{package}@{constraint}") class InvalidEnsureFileError(DependencyError): """Raised when the ensure file is invalid.""" pass class DependencyDepthExceededError(DependencyError): """Raised when dependency resolution exceeds max depth.""" def __init__(self, max_depth: int): self.max_depth = max_depth super().__init__(f"Dependency resolution exceeded maximum depth of {max_depth}") class TooManyArtifactsError(DependencyError): """Raised when dependency resolution resolves too many artifacts.""" def __init__(self, max_artifacts: int): self.max_artifacts = max_artifacts super().__init__(f"Dependency resolution exceeded maximum of {max_artifacts} artifacts") # Safety limits to prevent DoS attacks MAX_DEPENDENCY_DEPTH = 100 # Maximum levels of nested dependencies MAX_DEPENDENCIES_PER_ARTIFACT = 200 # Maximum direct dependencies per artifact MAX_TOTAL_ARTIFACTS = 1000 # Maximum total artifacts in resolution to prevent memory issues def parse_ensure_file(content: bytes) -> EnsureFileContent: """ Parse an orchard.ensure file. Args: content: Raw bytes of the ensure file Returns: Parsed EnsureFileContent Raises: InvalidEnsureFileError: If the file is invalid YAML or has wrong structure """ try: data = yaml.safe_load(content.decode('utf-8')) except yaml.YAMLError as e: raise InvalidEnsureFileError(f"Invalid YAML: {e}") except UnicodeDecodeError as e: raise InvalidEnsureFileError(f"Invalid encoding: {e}") if data is None: return EnsureFileContent(dependencies=[]) if not isinstance(data, dict): raise InvalidEnsureFileError("Ensure file must be a YAML dictionary") dependencies = [] deps_data = data.get('dependencies', []) if not isinstance(deps_data, list): raise InvalidEnsureFileError("'dependencies' must be a list") # Safety limit: prevent DoS through excessive dependencies if len(deps_data) > MAX_DEPENDENCIES_PER_ARTIFACT: raise InvalidEnsureFileError( f"Too many dependencies: {len(deps_data)} exceeds maximum of {MAX_DEPENDENCIES_PER_ARTIFACT}" ) for i, dep in enumerate(deps_data): if not isinstance(dep, dict): raise InvalidEnsureFileError(f"Dependency {i} must be a dictionary") project = dep.get('project') package = dep.get('package') version = dep.get('version') if not project: raise InvalidEnsureFileError(f"Dependency {i} missing 'project'") if not package: raise InvalidEnsureFileError(f"Dependency {i} missing 'package'") if not version: raise InvalidEnsureFileError( f"Dependency {i} must have 'version'" ) dependencies.append(EnsureFileDependency( project=project, package=package, version=version, )) return EnsureFileContent(dependencies=dependencies) def validate_dependencies( db: Session, dependencies: List[EnsureFileDependency], ) -> List[str]: """ Validate that all dependency projects exist. Args: db: Database session dependencies: List of dependencies to validate Returns: List of error messages (empty if all valid) """ errors = [] for dep in dependencies: project = db.query(Project).filter(Project.name == dep.project).first() if not project: errors.append(f"Project '{dep.project}' not found") return errors def store_dependencies( db: Session, artifact_id: str, dependencies: List[EnsureFileDependency], ) -> List[ArtifactDependency]: """ Store dependencies for an artifact. Args: db: Database session artifact_id: The artifact ID that has these dependencies dependencies: List of dependencies to store Returns: List of created ArtifactDependency objects """ created = [] for dep in dependencies: artifact_dep = ArtifactDependency( artifact_id=artifact_id, dependency_project=dep.project, dependency_package=dep.package, version_constraint=dep.version, ) db.add(artifact_dep) created.append(artifact_dep) return created def get_artifact_dependencies( db: Session, artifact_id: str, ) -> List[DependencyResponse]: """ Get all dependencies for an artifact. Args: db: Database session artifact_id: The artifact ID Returns: List of DependencyResponse objects """ deps = db.query(ArtifactDependency).filter( ArtifactDependency.artifact_id == artifact_id ).all() return [DependencyResponse.from_orm_model(dep) for dep in deps] def get_reverse_dependencies( db: Session, project_name: str, package_name: str, page: int = 1, limit: int = 50, ) -> ReverseDependenciesResponse: """ Get all artifacts that depend on a given package. Args: db: Database session project_name: Target project name package_name: Target package name page: Page number (1-indexed) limit: Results per page Returns: ReverseDependenciesResponse with dependents and pagination """ # Query dependencies that point to this project/package query = db.query(ArtifactDependency).filter( ArtifactDependency.dependency_project == project_name, ArtifactDependency.dependency_package == package_name, ) total = query.count() offset = (page - 1) * limit deps = query.offset(offset).limit(limit).all() dependents = [] for dep in deps: # Get artifact info to find the project/package/version artifact = db.query(Artifact).filter(Artifact.id == dep.artifact_id).first() if not artifact: continue # Find which package this artifact belongs to via versions version_record = db.query(PackageVersion).filter( PackageVersion.artifact_id == dep.artifact_id, ).first() if version_record: pkg = db.query(Package).filter(Package.id == version_record.package_id).first() if pkg: proj = db.query(Project).filter(Project.id == pkg.project_id).first() if proj: dependents.append(DependentInfo( artifact_id=dep.artifact_id, project=proj.name, package=pkg.name, version=version_record.version, constraint_value=dep.version_constraint, )) total_pages = (total + limit - 1) // limit return ReverseDependenciesResponse( project=project_name, package=package_name, dependents=dependents, pagination=PaginationMeta( page=page, limit=limit, total=total, total_pages=total_pages, has_more=page < total_pages, ), ) def _is_version_constraint(version_str: str) -> bool: """Check if a version string contains constraint operators.""" if not version_str: return False # Check for common constraint operators return any(op in version_str for op in ['>=', '<=', '!=', '~=', '>', '<', '==', '*']) def _version_satisfies_constraint(version: str, constraint: str) -> bool: """ Check if a version satisfies a constraint. Args: version: A version string (e.g., '1.26.0') constraint: A version constraint (e.g., '>=1.20', '>=1.20,<2.0', '*') Returns: True if the version satisfies the constraint, False otherwise """ if not HAS_PACKAGING: return False # Wildcard matches everything if constraint == '*' or not constraint: return True try: spec = SpecifierSet(constraint) v = Version(version) return v in spec except (InvalidSpecifier, InvalidVersion): # If we can't parse, assume it doesn't match return False def _resolve_version_constraint( db: Session, package: Package, constraint: str, ) -> Optional[Tuple[str, str, int]]: """ Resolve a version constraint (e.g., '>=1.9') to a specific version. Uses PEP 440 version matching to find the best matching version. Args: db: Database session package: Package to search versions in constraint: Version constraint string (e.g., '>=1.9', '<2.0,>=1.5') Returns: Tuple of (artifact_id, resolved_version, size) or None if not found """ if not HAS_PACKAGING: # Fallback: if packaging not available, can't do constraint matching return None # Handle wildcard - return latest version if constraint == '*': # Get the latest version by created_at latest = db.query(PackageVersion).filter( PackageVersion.package_id == package.id, ).order_by(PackageVersion.created_at.desc()).first() if latest: artifact = db.query(Artifact).filter(Artifact.id == latest.artifact_id).first() if artifact: return (artifact.id, latest.version, artifact.size) return None try: specifier = SpecifierSet(constraint) except InvalidSpecifier: # Invalid constraint (e.g., ">=" without version) - treat as wildcard # This can happen with malformed metadata from PyPI packages latest = db.query(PackageVersion).filter( PackageVersion.package_id == package.id, ).order_by(PackageVersion.created_at.desc()).first() if latest: artifact = db.query(Artifact).filter(Artifact.id == latest.artifact_id).first() if artifact: return (artifact.id, latest.version, artifact.size) return None # Get all versions for this package all_versions = db.query(PackageVersion).filter( PackageVersion.package_id == package.id, ).all() if not all_versions: return None # Find matching versions matching = [] for pv in all_versions: try: v = Version(pv.version) if v in specifier: matching.append((pv, v)) except InvalidVersion: # Skip invalid versions continue if not matching: return None # Sort by version (descending) and return the latest matching matching.sort(key=lambda x: x[1], reverse=True) best_match = matching[0][0] artifact = db.query(Artifact).filter(Artifact.id == best_match.artifact_id).first() if artifact: return (artifact.id, best_match.version, artifact.size) return None def _resolve_dependency_to_artifact( db: Session, project_name: str, package_name: str, version: str, ) -> Optional[Tuple[str, str, int]]: """ Resolve a dependency constraint to an artifact ID. Supports: - Exact version matching (e.g., '1.2.3') - Version constraints (e.g., '>=1.9', '<2.0,>=1.5') - Wildcard ('*' for any version) Args: db: Database session project_name: Project name package_name: Package name version: Version or version constraint Returns: Tuple of (artifact_id, resolved_version, size) or None if not found """ # Get project and package project = db.query(Project).filter(Project.name == project_name).first() if not project: return None package = db.query(Package).filter( Package.project_id == project.id, Package.name == package_name, ).first() if not package: return None # Check if this is a version constraint (>=, <, etc.) or exact version if _is_version_constraint(version): result = _resolve_version_constraint(db, package, version) if result: return result else: # Look up by exact version pkg_version = db.query(PackageVersion).filter( PackageVersion.package_id == package.id, PackageVersion.version == version, ).first() if pkg_version: artifact = db.query(Artifact).filter( Artifact.id == pkg_version.artifact_id ).first() if artifact: return (artifact.id, version, artifact.size) return None def _detect_package_cycle( db: Session, project_name: str, package_name: str, target_project: str, target_package: str, visiting: Set[str], visited: Set[str], path: List[str], ) -> Optional[List[str]]: """ Detect cycles at the package level using DFS. Args: db: Database session project_name: Current project being visited package_name: Current package being visited target_project: The project we're checking for cycles back to target_package: The package we're checking for cycles back to visiting: Set of package keys currently in the recursion stack visited: Set of fully processed package keys path: Current path for cycle reporting Returns: Cycle path if detected, None otherwise """ # Normalize names for comparison (handles extras like [test] and separators) pkg_normalized = _normalize_pypi_package_name(package_name) target_pkg_normalized = _normalize_pypi_package_name(target_package) # Use normalized key for tracking pkg_key = f"{project_name.lower()}/{pkg_normalized}" # Check if we've reached the target package (cycle detected) # Use normalized comparison to handle extras and naming variations if project_name.lower() == target_project.lower() and pkg_normalized == target_pkg_normalized: return path + [pkg_key] if pkg_key in visiting: # Unexpected internal cycle return None if pkg_key in visited: return None visiting.add(pkg_key) path.append(pkg_key) # Get the package and find any artifacts with dependencies project = db.query(Project).filter(Project.name == project_name).first() if project: package = db.query(Package).filter( Package.project_id == project.id, Package.name == package_name, ).first() if package: # Find all artifacts in this package via versions versions = db.query(PackageVersion).filter(PackageVersion.package_id == package.id).all() artifact_ids = {v.artifact_id for v in versions} # Get dependencies from all artifacts in this package for artifact_id in artifact_ids: deps = db.query(ArtifactDependency).filter( ArtifactDependency.artifact_id == artifact_id ).all() for dep in deps: cycle = _detect_package_cycle( db, dep.dependency_project, dep.dependency_package, target_project, target_package, visiting, visited, path, ) if cycle: return cycle path.pop() visiting.remove(pkg_key) visited.add(pkg_key) return None def check_circular_dependencies( db: Session, artifact_id: str, new_dependencies: List[EnsureFileDependency], project_name: Optional[str] = None, package_name: Optional[str] = None, ) -> Optional[List[str]]: """ Check if adding the new dependencies would create a circular dependency. Args: db: Database session artifact_id: The artifact that will have these dependencies new_dependencies: Dependencies to be added project_name: Project name (optional, will try to look up from version if not provided) package_name: Package name (optional, will try to look up from version if not provided) Returns: Cycle path if detected, None otherwise """ # First, get the package info for this artifact to build path labels if project_name and package_name: current_path = f"{project_name}/{package_name}" else: # Try to look up from version artifact = db.query(Artifact).filter(Artifact.id == artifact_id).first() if not artifact: return None # Find package for this artifact via version version_record = db.query(PackageVersion).filter( PackageVersion.artifact_id == artifact_id ).first() if not version_record: return None package = db.query(Package).filter(Package.id == version_record.package_id).first() if not package: return None project = db.query(Project).filter(Project.id == package.project_id).first() if not project: return None current_path = f"{project.name}/{package.name}" # Extract target project and package from current_path if "/" in current_path: target_project, target_package = current_path.split("/", 1) else: return None # Normalize the initial path for consistency with _detect_package_cycle normalized_path = f"{target_project.lower()}/{_normalize_pypi_package_name(target_package)}" # For each new dependency, check if it would create a cycle back to our package for dep in new_dependencies: # Check if this dependency (transitively) depends on us at the package level visiting: Set[str] = set() visited: Set[str] = set() path: List[str] = [normalized_path] # Check from the dependency's package cycle = _detect_package_cycle( db, dep.project, dep.package, target_project, target_package, visiting, visited, path, ) if cycle: return cycle return None def resolve_dependencies( db: Session, project_name: str, package_name: str, ref: str, base_url: str, ) -> DependencyResolutionResponse: """ Resolve all dependencies for an artifact recursively. Args: db: Database session project_name: Project name package_name: Package name ref: Version reference (or artifact:hash) base_url: Base URL for download URLs Returns: DependencyResolutionResponse with all resolved artifacts Raises: DependencyNotFoundError: If a dependency cannot be resolved CircularDependencyError: If circular dependencies are detected DependencyConflictError: If conflicting versions are required """ # Resolve the initial artifact project = db.query(Project).filter(Project.name == project_name).first() if not project: raise DependencyNotFoundError(project_name, package_name, ref) package = db.query(Package).filter( Package.project_id == project.id, Package.name == package_name, ).first() if not package: raise DependencyNotFoundError(project_name, package_name, ref) # Handle artifact: prefix for direct artifact ID references if ref.startswith("artifact:"): artifact_id = ref[9:] artifact = db.query(Artifact).filter(Artifact.id == artifact_id).first() if not artifact: raise DependencyNotFoundError(project_name, package_name, ref) root_artifact_id = artifact.id root_version = artifact_id[:12] # Use short hash as version display root_size = artifact.size else: # Try to find artifact by version resolved = _resolve_dependency_to_artifact( db, project_name, package_name, ref ) if not resolved: raise DependencyNotFoundError(project_name, package_name, ref) root_artifact_id, root_version, root_size = resolved # Track resolved artifacts and their versions resolved_artifacts: Dict[str, ResolvedArtifact] = {} # Track missing dependencies (not cached on server) missing_dependencies: List[MissingDependency] = [] # Track version requirements for conflict detection version_requirements: Dict[str, List[Dict[str, Any]]] = {} # pkg_key -> [(version, required_by)] # Track visiting/visited for cycle detection visiting: Set[str] = set() visited: Set[str] = set() # Track the current path for cycle reporting (artifact_id -> pkg_key) current_path: Dict[str, str] = {} # Resolution order (topological) resolution_order: List[str] = [] # Track resolution path for debugging resolution_path_sync: List[str] = [] def _resolve_recursive( artifact_id: str, proj_name: str, pkg_name: str, version_or_tag: str, size: int, required_by: Optional[str], depth: int = 0, ): """Recursively resolve dependencies with cycle/conflict detection.""" pkg_key = f"{proj_name}/{pkg_name}" # Safety limit: prevent DoS through deeply nested dependencies if depth > MAX_DEPENDENCY_DEPTH: logger.error( f"Dependency depth exceeded at {pkg_key} (depth={depth}). " f"Resolution path: {' -> '.join(resolution_path_sync[-20:])}" ) raise DependencyDepthExceededError(MAX_DEPENDENCY_DEPTH) # Cycle detection (at artifact level) if artifact_id in visiting: # Build cycle path from current_path cycle_start = current_path.get(artifact_id, pkg_key) cycle = [cycle_start, pkg_key] raise CircularDependencyError(cycle) # Version conflict handling - use first resolved version (lenient mode) if pkg_key in version_requirements: existing_versions = {r["version"] for r in version_requirements[pkg_key]} if version_or_tag not in existing_versions: # Different version requested - log and use existing (first wins) existing = version_requirements[pkg_key][0]["version"] logger.debug( f"Version mismatch for {pkg_key}: using {existing} " f"(also requested: {version_or_tag} by {required_by})" ) # Already resolved this package - skip return if artifact_id in visited: return # Track path for debugging (only after early-return checks) resolution_path_sync.append(f"{pkg_key}@{version_or_tag}") visiting.add(artifact_id) current_path[artifact_id] = pkg_key # Track version requirement if pkg_key not in version_requirements: version_requirements[pkg_key] = [] version_requirements[pkg_key].append({ "version": version_or_tag, "required_by": required_by, }) # Get dependencies deps = db.query(ArtifactDependency).filter( ArtifactDependency.artifact_id == artifact_id ).all() # Resolve each dependency first (depth-first) for dep in deps: # Skip self-dependencies (can happen with PyPI extras like pytest[testing]) # Use normalized comparison for PyPI naming conventions (handles extras, separators) dep_proj_normalized = dep.dependency_project.lower() dep_pkg_normalized = _normalize_pypi_package_name(dep.dependency_package) curr_proj_normalized = proj_name.lower() curr_pkg_normalized = _normalize_pypi_package_name(pkg_name) if dep_proj_normalized == curr_proj_normalized and dep_pkg_normalized == curr_pkg_normalized: continue resolved_dep = _resolve_dependency_to_artifact( db, dep.dependency_project, dep.dependency_package, dep.version_constraint, ) if not resolved_dep: # Dependency not cached on server - track as missing but continue constraint = dep.version_constraint missing_dependencies.append(MissingDependency( project=dep.dependency_project, package=dep.dependency_package, constraint=constraint, required_by=pkg_key, )) continue dep_artifact_id, dep_version, dep_size = resolved_dep # Skip if resolved to same artifact (self-dependency at artifact level) if dep_artifact_id == artifact_id: continue # Skip if this artifact is already being visited (would cause cycle) if dep_artifact_id in visiting: continue _resolve_recursive( dep_artifact_id, dep.dependency_project, dep.dependency_package, dep_version, dep_size, pkg_key, depth + 1, ) visiting.remove(artifact_id) del current_path[artifact_id] visited.add(artifact_id) resolution_path_sync.pop() # Check total artifacts limit if len(resolution_order) >= MAX_TOTAL_ARTIFACTS: raise TooManyArtifactsError(MAX_TOTAL_ARTIFACTS) # Add to resolution order (dependencies before dependents) resolution_order.append(artifact_id) # Store resolved artifact info resolved_artifacts[artifact_id] = ResolvedArtifact( artifact_id=artifact_id, project=proj_name, package=pkg_name, version=version_or_tag, size=size, download_url=f"{base_url}/api/v1/project/{proj_name}/{pkg_name}/+/{version_or_tag}", ) # Start resolution from root _resolve_recursive( root_artifact_id, project_name, package_name, root_version, root_size, None, ) # Build response in topological order resolved_list = [resolved_artifacts[aid] for aid in resolution_order] total_size = sum(r.size for r in resolved_list) return DependencyResolutionResponse( requested={ "project": project_name, "package": package_name, "ref": ref, }, resolved=resolved_list, missing=missing_dependencies, fetched=[], # No fetching in sync version total_size=total_size, artifact_count=len(resolved_list), ) # System project mapping for auto-fetch SYSTEM_PROJECT_REGISTRY_MAP = { "_pypi": "pypi", "_npm": "npm", "_maven": "maven", } async def resolve_dependencies_with_fetch( db: Session, project_name: str, package_name: str, ref: str, base_url: str, storage: "S3Storage", registry_clients: Dict[str, "RegistryClient"], ) -> DependencyResolutionResponse: """ Resolve all dependencies for an artifact recursively, fetching missing ones from upstream. This async version extends the basic resolution with auto-fetch capability: when a missing dependency is from a system project (e.g., _pypi), it attempts to fetch the package from the corresponding upstream registry. If the root artifact itself doesn't exist in a system project, it will also be fetched from upstream before resolution begins. Args: db: Database session project_name: Project name package_name: Package name ref: Version reference (or artifact:hash) base_url: Base URL for download URLs storage: S3 storage for caching fetched artifacts registry_clients: Map of system project to registry client {"_pypi": PyPIRegistryClient} Returns: DependencyResolutionResponse with all resolved artifacts and fetch status Raises: DependencyNotFoundError: If the root artifact cannot be found (even after fetch attempt) CircularDependencyError: If circular dependencies are detected """ # Track fetched artifacts for response fetched_artifacts: List[ResolvedArtifact] = [] # Check if project exists project = db.query(Project).filter(Project.name == project_name).first() # If project doesn't exist and it's a system project pattern, we can't auto-create it if not project: raise DependencyNotFoundError(project_name, package_name, ref) # Check if package exists package = db.query(Package).filter( Package.project_id == project.id, Package.name == package_name, ).first() # Try to resolve the root artifact root_artifact_id = None root_version = None root_size = None # Handle artifact: prefix for direct artifact ID references if ref.startswith("artifact:"): artifact_id = ref[9:] artifact = db.query(Artifact).filter(Artifact.id == artifact_id).first() if artifact: root_artifact_id = artifact.id root_version = artifact_id[:12] root_size = artifact.size elif package: # Try to resolve by version/constraint resolved = _resolve_dependency_to_artifact( db, project_name, package_name, ref ) if resolved: root_artifact_id, root_version, root_size = resolved # If root artifact not found and this is a system project, try to fetch it if root_artifact_id is None and project_name in SYSTEM_PROJECT_REGISTRY_MAP: logger.info( f"Root artifact {project_name}/{package_name}@{ref} not found, " "attempting to fetch from upstream" ) client = registry_clients.get(project_name) if client: try: # Resolve the version constraint from upstream version_info = await client.resolve_constraint(package_name, ref) if version_info: # Fetch and cache the package fetch_result = await client.fetch_package( package_name, version_info, db, storage ) if fetch_result: logger.info( f"Successfully fetched root artifact {package_name}==" f"{fetch_result.version} (artifact {fetch_result.artifact_id[:12]})" ) root_artifact_id = fetch_result.artifact_id root_version = fetch_result.version root_size = fetch_result.size # Add to fetched list fetched_artifacts.append(ResolvedArtifact( artifact_id=fetch_result.artifact_id, project=project_name, package=package_name, version=fetch_result.version, size=fetch_result.size, download_url=f"{base_url}/api/v1/project/{project_name}/{package_name}/+/{fetch_result.version}", )) except Exception as e: logger.warning(f"Failed to fetch root artifact {package_name}: {e}") # If still no root artifact, raise error if root_artifact_id is None: raise DependencyNotFoundError(project_name, package_name, ref) # Track state resolved_artifacts: Dict[str, ResolvedArtifact] = {} missing_dependencies: List[MissingDependency] = [] # Note: fetched_artifacts was already initialized above (line 911) # and may already contain the root artifact if it was fetched from upstream version_requirements: Dict[str, List[Dict[str, Any]]] = {} visiting: Set[str] = set() visited: Set[str] = set() current_path: Dict[str, str] = {} resolution_order: List[str] = [] # Track fetch attempts to prevent loops fetch_attempted: Set[str] = set() # "project/package@constraint" async def _try_fetch_dependency( dep_project: str, dep_package: str, constraint: str, required_by: str, ) -> Optional[Tuple[str, str, int]]: """ Try to fetch a missing dependency from upstream registry. Returns (artifact_id, version, size) if successful, None otherwise. """ # Only fetch from system projects registry_type = SYSTEM_PROJECT_REGISTRY_MAP.get(dep_project) if not registry_type: logger.debug( f"Not a system project, skipping fetch: {dep_project}/{dep_package}" ) return None # Build fetch key for loop prevention fetch_key = f"{dep_project}/{dep_package}@{constraint}" if fetch_key in fetch_attempted: logger.debug(f"Already attempted fetch for {fetch_key}") return None fetch_attempted.add(fetch_key) # Get registry client client = registry_clients.get(dep_project) if not client: logger.debug(f"No registry client for {dep_project}") return None try: # Resolve version constraint version_info = await client.resolve_constraint(dep_package, constraint) if not version_info: logger.info( f"No version of {dep_package} matches constraint '{constraint}' on upstream" ) return None # Fetch and cache the package fetch_result = await client.fetch_package( dep_package, version_info, db, storage ) if not fetch_result: logger.warning(f"Failed to fetch {dep_package}=={version_info.version}") return None logger.info( f"Successfully fetched {dep_package}=={version_info.version} " f"(artifact {fetch_result.artifact_id[:12]})" ) # Add to fetched list for response fetched_artifacts.append(ResolvedArtifact( artifact_id=fetch_result.artifact_id, project=dep_project, package=dep_package, version=fetch_result.version, size=fetch_result.size, download_url=f"{base_url}/api/v1/project/{dep_project}/{dep_package}/+/{fetch_result.version}", )) return (fetch_result.artifact_id, fetch_result.version, fetch_result.size) except Exception as e: logger.warning(f"Error fetching {dep_package}: {e}") return None # Track resolution path for debugging resolution_path: List[str] = [] async def _resolve_recursive_async( artifact_id: str, proj_name: str, pkg_name: str, version_or_tag: str, size: int, required_by: Optional[str], depth: int = 0, ): """Recursively resolve dependencies with fetch capability.""" pkg_key = f"{proj_name}/{pkg_name}" if depth > MAX_DEPENDENCY_DEPTH: logger.error( f"Dependency depth exceeded at {pkg_key} (depth={depth}). " f"Resolution path: {' -> '.join(resolution_path[-20:])}" ) raise DependencyDepthExceededError(MAX_DEPENDENCY_DEPTH) # Cycle detection if artifact_id in visiting: cycle_start = current_path.get(artifact_id, pkg_key) cycle = [cycle_start, pkg_key] raise CircularDependencyError(cycle) # Version conflict handling - use first resolved version (lenient mode) if pkg_key in version_requirements: existing_versions = {r["version"] for r in version_requirements[pkg_key]} if version_or_tag not in existing_versions: # Different version requested - log and use existing (first wins) existing = version_requirements[pkg_key][0]["version"] logger.debug( f"Version mismatch for {pkg_key}: using {existing} " f"(also requested: {version_or_tag} by {required_by})" ) # Already resolved this package - skip return if artifact_id in visited: return # Track path for debugging (only after early-return checks) resolution_path.append(f"{pkg_key}@{version_or_tag}") visiting.add(artifact_id) current_path[artifact_id] = pkg_key if pkg_key not in version_requirements: version_requirements[pkg_key] = [] version_requirements[pkg_key].append({ "version": version_or_tag, "required_by": required_by, }) # Get dependencies deps = db.query(ArtifactDependency).filter( ArtifactDependency.artifact_id == artifact_id ).all() for dep in deps: # Skip self-dependencies (common with PyPI extras like pytest[testing] -> pytest) dep_proj_normalized = dep.dependency_project.lower() dep_pkg_normalized = _normalize_pypi_package_name(dep.dependency_package) curr_proj_normalized = proj_name.lower() curr_pkg_normalized = _normalize_pypi_package_name(pkg_name) if dep_proj_normalized == curr_proj_normalized and dep_pkg_normalized == curr_pkg_normalized: logger.debug( f"Skipping self-dependency: {pkg_key} -> {dep.dependency_project}/{dep.dependency_package}" ) continue # Also check if this dependency would resolve to the current artifact # (handles cases where package names differ but resolve to same artifact) resolved_dep = _resolve_dependency_to_artifact( db, dep.dependency_project, dep.dependency_package, dep.version_constraint, ) if not resolved_dep: # Try to fetch from upstream if it's a system project fetched = await _try_fetch_dependency( dep.dependency_project, dep.dependency_package, dep.version_constraint, pkg_key, ) if fetched: resolved_dep = fetched else: # Still missing - add to missing list with fetch status fetch_key = f"{dep.dependency_project}/{dep.dependency_package}@{dep.version_constraint}" was_attempted = fetch_key in fetch_attempted missing_dependencies.append(MissingDependency( project=dep.dependency_project, package=dep.dependency_package, constraint=dep.version_constraint, required_by=pkg_key, fetch_attempted=was_attempted, )) continue dep_artifact_id, dep_version, dep_size = resolved_dep # Skip if resolved to same artifact (self-dependency at artifact level) if dep_artifact_id == artifact_id: logger.debug( f"Skipping self-dependency (same artifact): {pkg_key} -> " f"{dep.dependency_project}/{dep.dependency_package} (artifact {dep_artifact_id[:12]})" ) continue # Skip if this artifact is already being visited (would cause cycle) if dep_artifact_id in visiting: logger.debug( f"Skipping dependency already in resolution stack: {pkg_key} -> " f"{dep.dependency_project}/{dep.dependency_package} (artifact {dep_artifact_id[:12]})" ) continue # Check if we've already resolved this package to a different version dep_pkg_key = f"{dep.dependency_project}/{dep.dependency_package}" if dep_pkg_key in version_requirements: existing_version = version_requirements[dep_pkg_key][0]["version"] if existing_version != dep_version: # Different version resolved - check if existing satisfies new constraint if HAS_PACKAGING and _version_satisfies_constraint(existing_version, dep.version_constraint): logger.debug( f"Reusing existing version {existing_version} for {dep_pkg_key} " f"(satisfies constraint {dep.version_constraint})" ) continue else: logger.debug( f"Version conflict for {dep_pkg_key}: have {existing_version}, " f"need {dep.version_constraint} (resolved to {dep_version})" ) # Don't raise error - just use the first version we resolved # This is more lenient than strict conflict detection continue await _resolve_recursive_async( dep_artifact_id, dep.dependency_project, dep.dependency_package, dep_version, dep_size, pkg_key, depth + 1, ) visiting.remove(artifact_id) del current_path[artifact_id] visited.add(artifact_id) resolution_path.pop() # Check total artifacts limit if len(resolution_order) >= MAX_TOTAL_ARTIFACTS: raise TooManyArtifactsError(MAX_TOTAL_ARTIFACTS) resolution_order.append(artifact_id) resolved_artifacts[artifact_id] = ResolvedArtifact( artifact_id=artifact_id, project=proj_name, package=pkg_name, version=version_or_tag, size=size, download_url=f"{base_url}/api/v1/project/{proj_name}/{pkg_name}/+/{version_or_tag}", ) # Start resolution from root await _resolve_recursive_async( root_artifact_id, project_name, package_name, root_version, root_size, None, ) # Build response in topological order resolved_list = [resolved_artifacts[aid] for aid in resolution_order] total_size = sum(r.size for r in resolved_list) return DependencyResolutionResponse( requested={ "project": project_name, "package": package_name, "ref": ref, }, resolved=resolved_list, missing=missing_dependencies, fetched=fetched_artifacts, total_size=total_size, artifact_count=len(resolved_list), )