Fix httpx.Timeout configuration in PyPI proxy

2026-02-05 10:31:04 -06:00
parent 11c5aee0f1
commit dfa089376a
76 changed files with 9384 additions and 4407 deletions
--- a/backend/app/cache_service.py
+++ b/backend/app/cache_service.py
@@ -0,0 +1,262 @@
+"""
+Redis-backed caching service with category-aware TTL and invalidation.
+
+Provides:
+- Immutable caching for artifact data (hermetic builds)
+- TTL-based caching for discovery data
+- Event-driven invalidation for config changes
+- Graceful fallback when Redis unavailable
+"""
+
+import logging
+from enum import Enum
+from typing import Optional
+
+from .config import Settings
+
+logger = logging.getLogger(__name__)
+
+
+class CacheCategory(Enum):
+    """
+    Cache categories with different TTL and invalidation rules.
+
+    Immutable (cache forever):
+    - ARTIFACT_METADATA: Artifact info by SHA256
+    - ARTIFACT_DEPENDENCIES: Extracted deps by SHA256
+    - DEPENDENCY_RESOLUTION: Resolution results by input hash
+
+    Mutable (TTL + event invalidation):
+    - UPSTREAM_SOURCES: Upstream config, invalidate on DB change
+    - PACKAGE_INDEX: PyPI/npm index pages, TTL only
+    - PACKAGE_VERSIONS: Version listings, TTL only
+    """
+
+    # Immutable - cache forever (hermetic builds)
+    ARTIFACT_METADATA = "artifact"
+    ARTIFACT_DEPENDENCIES = "deps"
+    DEPENDENCY_RESOLUTION = "resolve"
+
+    # Mutable - TTL + event invalidation
+    UPSTREAM_SOURCES = "upstream"
+    PACKAGE_INDEX = "index"
+    PACKAGE_VERSIONS = "versions"
+
+
+def get_category_ttl(category: CacheCategory, settings: Settings) -> Optional[int]:
+    """
+    Get TTL for a cache category.
+
+    Returns:
+        TTL in seconds, or None for no expiry (immutable).
+    """
+    ttl_map = {
+        # Immutable - no TTL
+        CacheCategory.ARTIFACT_METADATA: None,
+        CacheCategory.ARTIFACT_DEPENDENCIES: None,
+        CacheCategory.DEPENDENCY_RESOLUTION: None,
+        # Mutable - configurable TTL
+        CacheCategory.UPSTREAM_SOURCES: settings.cache_ttl_upstream,
+        CacheCategory.PACKAGE_INDEX: settings.cache_ttl_index,
+        CacheCategory.PACKAGE_VERSIONS: settings.cache_ttl_versions,
+    }
+    return ttl_map.get(category)
+
+
+class CacheService:
+    """
+    Redis-backed caching with category-aware TTL.
+
+    Key format: orchard:{category}:{protocol}:{identifier}
+    Example: orchard:deps:pypi:abc123def456
+
+    When Redis is disabled or unavailable, operations gracefully
+    return None/no-op to allow the application to function without caching.
+    """
+
+    def __init__(self, settings: Settings):
+        self._settings = settings
+        self._enabled = settings.redis_enabled
+        self._redis: Optional["redis.asyncio.Redis"] = None
+        self._started = False
+
+    async def startup(self) -> None:
+        """Initialize Redis connection. Called by FastAPI lifespan."""
+        if self._started:
+            return
+
+        if not self._enabled:
+            logger.info("CacheService disabled (redis_enabled=False)")
+            self._started = True
+            return
+
+        try:
+            import redis.asyncio as redis
+
+            logger.info(
+                f"Connecting to Redis at {self._settings.redis_host}:"
+                f"{self._settings.redis_port}/{self._settings.redis_db}"
+            )
+
+            self._redis = redis.Redis(
+                host=self._settings.redis_host,
+                port=self._settings.redis_port,
+                db=self._settings.redis_db,
+                password=self._settings.redis_password,
+                decode_responses=False,  # We handle bytes
+            )
+
+            # Test connection
+            await self._redis.ping()
+            logger.info("CacheService connected to Redis")
+
+        except ImportError:
+            logger.warning("redis package not installed, caching disabled")
+            self._enabled = False
+        except Exception as e:
+            logger.warning(f"Redis connection failed, caching disabled: {e}")
+            self._enabled = False
+            self._redis = None
+
+        self._started = True
+
+    async def shutdown(self) -> None:
+        """Close Redis connection. Called by FastAPI lifespan."""
+        if not self._started:
+            return
+
+        if self._redis:
+            await self._redis.aclose()
+            self._redis = None
+
+        self._started = False
+        logger.info("CacheService shutdown complete")
+
+    @staticmethod
+    def _make_key(category: CacheCategory, protocol: str, identifier: str) -> str:
+        """Build namespaced cache key."""
+        return f"orchard:{category.value}:{protocol}:{identifier}"
+
+    async def get(
+        self,
+        category: CacheCategory,
+        key: str,
+        protocol: str = "default",
+    ) -> Optional[bytes]:
+        """
+        Get cached value.
+
+        Args:
+            category: Cache category for TTL rules
+            key: Unique identifier within category
+            protocol: Protocol namespace (pypi, npm, etc.)
+
+        Returns:
+            Cached bytes or None if not found/disabled.
+        """
+        if not self._enabled or not self._redis:
+            return None
+
+        try:
+            full_key = self._make_key(category, protocol, key)
+            return await self._redis.get(full_key)
+        except Exception as e:
+            logger.warning(f"Cache get failed for {key}: {e}")
+            return None
+
+    async def set(
+        self,
+        category: CacheCategory,
+        key: str,
+        value: bytes,
+        protocol: str = "default",
+    ) -> None:
+        """
+        Set cached value with category-appropriate TTL.
+
+        Args:
+            category: Cache category for TTL rules
+            key: Unique identifier within category
+            value: Bytes to cache
+            protocol: Protocol namespace (pypi, npm, etc.)
+        """
+        if not self._enabled or not self._redis:
+            return
+
+        try:
+            full_key = self._make_key(category, protocol, key)
+            ttl = get_category_ttl(category, self._settings)
+
+            if ttl is None:
+                await self._redis.set(full_key, value)
+            else:
+                await self._redis.setex(full_key, ttl, value)
+
+        except Exception as e:
+            logger.warning(f"Cache set failed for {key}: {e}")
+
+    async def delete(
+        self,
+        category: CacheCategory,
+        key: str,
+        protocol: str = "default",
+    ) -> None:
+        """Delete a specific cache entry."""
+        if not self._enabled or not self._redis:
+            return
+
+        try:
+            full_key = self._make_key(category, protocol, key)
+            await self._redis.delete(full_key)
+        except Exception as e:
+            logger.warning(f"Cache delete failed for {key}: {e}")
+
+    async def invalidate_pattern(
+        self,
+        category: CacheCategory,
+        pattern: str = "*",
+        protocol: str = "default",
+    ) -> int:
+        """
+        Invalidate all entries matching pattern.
+
+        Args:
+            category: Cache category
+            pattern: Glob pattern for keys (default "*" = all in category)
+            protocol: Protocol namespace
+
+        Returns:
+            Number of keys deleted.
+        """
+        if not self._enabled or not self._redis:
+            return 0
+
+        try:
+            full_pattern = self._make_key(category, protocol, pattern)
+            keys = []
+            async for key in self._redis.scan_iter(match=full_pattern):
+                keys.append(key)
+
+            if keys:
+                return await self._redis.delete(*keys)
+            return 0
+
+        except Exception as e:
+            logger.warning(f"Cache invalidate failed for pattern {pattern}: {e}")
+            return 0
+
+    async def ping(self) -> bool:
+        """Check if Redis is connected and responding."""
+        if not self._enabled or not self._redis:
+            return False
+
+        try:
+            await self._redis.ping()
+            return True
+        except Exception:
+            return False
+
+    @property
+    def enabled(self) -> bool:
+        """Check if caching is enabled."""
+        return self._enabled
--- a/backend/app/config.py
+++ b/backend/app/config.py
@@ -22,8 +22,8 @@ class Settings(BaseSettings):
    database_sslmode: str = "disable"

    # Database connection pool settings
-    database_pool_size: int = 5  # Number of connections to keep open
-    database_max_overflow: int = 10  # Max additional connections beyond pool_size
+    database_pool_size: int = 20  # Number of connections to keep open
+    database_max_overflow: int = 30  # Max additional connections beyond pool_size
    database_pool_timeout: int = 30  # Seconds to wait for a connection from pool
    database_pool_recycle: int = (
        1800  # Recycle connections after this many seconds (30 min)
@@ -51,6 +51,26 @@ class Settings(BaseSettings):
    presigned_url_expiry: int = (
        3600  # Presigned URL expiry in seconds (default: 1 hour)
    )
+    pypi_download_mode: str = "redirect"  # "redirect" (to S3) or "proxy" (stream through Orchard)
+
+    # HTTP Client pool settings
+    http_max_connections: int = 100  # Max connections per pool
+    http_max_keepalive: int = 20  # Keep-alive connections
+    http_connect_timeout: float = 30.0  # Connection timeout seconds
+    http_read_timeout: float = 60.0  # Read timeout seconds
+    http_worker_threads: int = 32  # Thread pool for blocking ops
+
+    # Redis cache settings
+    redis_host: str = "localhost"
+    redis_port: int = 6379
+    redis_db: int = 0
+    redis_password: Optional[str] = None
+    redis_enabled: bool = True  # Set False to disable caching
+
+    # Cache TTL settings (seconds, 0 = no expiry)
+    cache_ttl_index: int = 300  # Package index pages: 5 min
+    cache_ttl_versions: int = 300  # Version listings: 5 min
+    cache_ttl_upstream: int = 3600  # Upstream source config: 1 hour

    # Logging settings
    log_level: str = "INFO"  # DEBUG, INFO, WARNING, ERROR, CRITICAL
@@ -64,6 +84,15 @@ class Settings(BaseSettings):
    # Global cache settings override (None = use DB value, True/False = override DB)
    cache_auto_create_system_projects: Optional[bool] = None  # Override auto_create_system_projects

+    # PyPI Cache Worker settings
+    pypi_cache_workers: int = 5  # Number of concurrent cache workers
+    pypi_cache_max_depth: int = 10  # Maximum recursion depth for dependency caching
+    pypi_cache_max_attempts: int = 3  # Maximum retry attempts for failed cache tasks
+
+    # Auto-fetch configuration for dependency resolution
+    auto_fetch_dependencies: bool = False  # Server default for auto_fetch parameter
+    auto_fetch_timeout: int = 300  # Total timeout for auto-fetch resolution in seconds
+
    # JWT Authentication settings (optional, for external identity providers)
    jwt_enabled: bool = False  # Enable JWT token validation
    jwt_secret: str = ""  # Secret key for HS256, or leave empty for RS256 with JWKS
@@ -88,6 +117,24 @@ class Settings(BaseSettings):
    def is_production(self) -> bool:
        return self.env.lower() == "production"

+    @property
+    def PORT(self) -> int:
+        """Alias for server_port for compatibility."""
+        return self.server_port
+
+    # Uppercase aliases for PyPI cache settings (for backward compatibility)
+    @property
+    def PYPI_CACHE_WORKERS(self) -> int:
+        return self.pypi_cache_workers
+
+    @property
+    def PYPI_CACHE_MAX_DEPTH(self) -> int:
+        return self.pypi_cache_max_depth
+
+    @property
+    def PYPI_CACHE_MAX_ATTEMPTS(self) -> int:
+        return self.pypi_cache_max_attempts
+
    class Config:
        env_prefix = "ORCHARD_"
        case_sensitive = False
--- a/backend/app/database.py
+++ b/backend/app/database.py
@@ -220,17 +220,7 @@ def _run_migrations():
                    CREATE UNIQUE INDEX idx_packages_project_name ON packages(project_id, name);
                END IF;

-                IF NOT EXISTS (
-                    SELECT 1 FROM pg_indexes WHERE indexname = 'idx_tags_package_name'
-                ) THEN
-                    CREATE UNIQUE INDEX idx_tags_package_name ON tags(package_id, name);
-                END IF;
-
-                IF NOT EXISTS (
-                    SELECT 1 FROM pg_indexes WHERE indexname = 'idx_tags_package_created_at'
-                ) THEN
-                    CREATE INDEX idx_tags_package_created_at ON tags(package_id, created_at);
-                END IF;
+                -- Tag indexes removed: tags table no longer exists (removed in tag system removal)
            END $$;
            """,
        ),
@@ -287,27 +277,8 @@ def _run_migrations():
        Migration(
            name="008_create_tags_ref_count_triggers",
            sql="""
-            DO $$
-            BEGIN
-                DROP TRIGGER IF EXISTS tags_ref_count_insert_trigger ON tags;
-                CREATE TRIGGER tags_ref_count_insert_trigger
-                    AFTER INSERT ON tags
-                    FOR EACH ROW
-                    EXECUTE FUNCTION increment_artifact_ref_count();
-
-                DROP TRIGGER IF EXISTS tags_ref_count_delete_trigger ON tags;
-                CREATE TRIGGER tags_ref_count_delete_trigger
-                    AFTER DELETE ON tags
-                    FOR EACH ROW
-                    EXECUTE FUNCTION decrement_artifact_ref_count();
-
-                DROP TRIGGER IF EXISTS tags_ref_count_update_trigger ON tags;
-                CREATE TRIGGER tags_ref_count_update_trigger
-                    AFTER UPDATE ON tags
-                    FOR EACH ROW
-                    WHEN (OLD.artifact_id IS DISTINCT FROM NEW.artifact_id)
-                    EXECUTE FUNCTION update_artifact_ref_count();
-            END $$;
+            -- Tags table removed: triggers no longer needed (tag system removed)
+            DO $$ BEGIN NULL; END $$;
            """,
        ),
        Migration(
@@ -354,9 +325,11 @@ def _run_migrations():
        Migration(
            name="011_migrate_semver_tags_to_versions",
            sql=r"""
+            -- Migrate semver tags to versions (only if both tables exist - for existing databases)
            DO $$
            BEGIN
-                IF EXISTS (SELECT 1 FROM information_schema.tables WHERE table_name = 'package_versions') THEN
+                IF EXISTS (SELECT 1 FROM information_schema.tables WHERE table_name = 'package_versions')
+                   AND EXISTS (SELECT 1 FROM information_schema.tables WHERE table_name = 'tags') THEN
                    INSERT INTO package_versions (id, package_id, artifact_id, version, version_source, created_by, created_at)
                    SELECT
                        gen_random_uuid(),
@@ -565,6 +538,62 @@ def _run_migrations():
            WHERE name IN ('npm-public', 'pypi-public', 'maven-central', 'docker-hub');
            """,
        ),
+        Migration(
+            name="024_remove_tags",
+            sql="""
+            -- Remove tag system, keeping only versions for artifact references
+            DO $$
+            BEGIN
+                -- Drop triggers on tags table (if they exist)
+                DROP TRIGGER IF EXISTS tags_ref_count_insert_trigger ON tags;
+                DROP TRIGGER IF EXISTS tags_ref_count_delete_trigger ON tags;
+                DROP TRIGGER IF EXISTS tags_ref_count_update_trigger ON tags;
+                DROP TRIGGER IF EXISTS tags_updated_at_trigger ON tags;
+                DROP TRIGGER IF EXISTS tag_changes_trigger ON tags;
+
+                -- Drop the tag change tracking function
+                DROP FUNCTION IF EXISTS track_tag_changes();
+
+                -- Remove tag_constraint from artifact_dependencies
+                IF EXISTS (
+                    SELECT 1 FROM information_schema.table_constraints
+                    WHERE constraint_name = 'check_constraint_type'
+                    AND table_name = 'artifact_dependencies'
+                ) THEN
+                    ALTER TABLE artifact_dependencies DROP CONSTRAINT check_constraint_type;
+                END IF;
+
+                -- Remove the tag_constraint column if it exists
+                IF EXISTS (
+                    SELECT 1 FROM information_schema.columns
+                    WHERE table_name = 'artifact_dependencies' AND column_name = 'tag_constraint'
+                ) THEN
+                    ALTER TABLE artifact_dependencies DROP COLUMN tag_constraint;
+                END IF;
+
+                -- Make version_constraint NOT NULL
+                UPDATE artifact_dependencies SET version_constraint = '*' WHERE version_constraint IS NULL;
+                ALTER TABLE artifact_dependencies ALTER COLUMN version_constraint SET NOT NULL;
+
+                -- Drop tag_history table first (depends on tags)
+                DROP TABLE IF EXISTS tag_history;
+
+                -- Drop tags table
+                DROP TABLE IF EXISTS tags;
+
+                -- Rename uploads.tag_name to version if it exists and version doesn't
+                IF EXISTS (
+                    SELECT 1 FROM information_schema.columns
+                    WHERE table_name = 'uploads' AND column_name = 'tag_name'
+                ) AND NOT EXISTS (
+                    SELECT 1 FROM information_schema.columns
+                    WHERE table_name = 'uploads' AND column_name = 'version'
+                ) THEN
+                    ALTER TABLE uploads RENAME COLUMN tag_name TO version;
+                END IF;
+            END $$;
+            """,
+        ),
    ]

    with engine.connect() as conn:
--- a/backend/app/db_utils.py
+++ b/backend/app/db_utils.py
@@ -0,0 +1,175 @@
+"""
+Database utilities for optimized artifact operations.
+
+Provides batch operations to eliminate N+1 queries.
+"""
+
+import logging
+from typing import Optional
+
+from sqlalchemy.dialects.postgresql import insert as pg_insert
+from sqlalchemy.orm import Session
+
+from .models import Artifact, ArtifactDependency, CachedUrl
+
+logger = logging.getLogger(__name__)
+
+
+class ArtifactRepository:
+    """
+    Optimized database operations for artifact storage.
+
+    Key optimizations:
+    - Atomic upserts using ON CONFLICT
+    - Batch inserts for dependencies
+    - Joined queries to avoid N+1
+    """
+
+    def __init__(self, db: Session):
+        self.db = db
+
+    @staticmethod
+    def _format_dependency_values(
+        artifact_id: str,
+        dependencies: list[tuple[str, str, str]],
+    ) -> list[dict]:
+        """
+        Format dependencies for batch insert.
+
+        Args:
+            artifact_id: SHA256 of the artifact
+            dependencies: List of (project, package, version_constraint)
+
+        Returns:
+            List of dicts ready for bulk insert.
+        """
+        return [
+            {
+                "artifact_id": artifact_id,
+                "dependency_project": proj,
+                "dependency_package": pkg,
+                "version_constraint": ver,
+            }
+            for proj, pkg, ver in dependencies
+        ]
+
+    def get_or_create_artifact(
+        self,
+        sha256: str,
+        size: int,
+        filename: str,
+        content_type: Optional[str] = None,
+        created_by: str = "system",
+        s3_key: Optional[str] = None,
+    ) -> tuple[Artifact, bool]:
+        """
+        Get existing artifact or create new one atomically.
+
+        Uses INSERT ... ON CONFLICT DO UPDATE to handle races.
+        If artifact exists, increments ref_count.
+
+        Args:
+            sha256: Content hash (primary key)
+            size: File size in bytes
+            filename: Original filename
+            content_type: MIME type
+            created_by: User who created the artifact
+            s3_key: S3 storage key (defaults to standard path)
+
+        Returns:
+            (artifact, created) tuple where created is True for new artifacts.
+        """
+        if s3_key is None:
+            s3_key = f"fruits/{sha256[:2]}/{sha256[2:4]}/{sha256}"
+
+        stmt = pg_insert(Artifact).values(
+            id=sha256,
+            size=size,
+            original_name=filename,
+            content_type=content_type,
+            ref_count=1,
+            created_by=created_by,
+            s3_key=s3_key,
+        ).on_conflict_do_update(
+            index_elements=['id'],
+            set_={'ref_count': Artifact.ref_count + 1}
+        ).returning(Artifact)
+
+        result = self.db.execute(stmt)
+        artifact = result.scalar_one()
+
+        # Check if this was an insert or update by comparing ref_count
+        # ref_count=1 means new, >1 means existing
+        created = artifact.ref_count == 1
+
+        return artifact, created
+
+    def batch_upsert_dependencies(
+        self,
+        artifact_id: str,
+        dependencies: list[tuple[str, str, str]],
+    ) -> int:
+        """
+        Insert dependencies in a single batch operation.
+
+        Uses ON CONFLICT DO NOTHING to skip duplicates.
+
+        Args:
+            artifact_id: SHA256 of the artifact
+            dependencies: List of (project, package, version_constraint)
+
+        Returns:
+            Number of dependencies inserted.
+        """
+        if not dependencies:
+            return 0
+
+        values = self._format_dependency_values(artifact_id, dependencies)
+
+        stmt = pg_insert(ArtifactDependency).values(values)
+        stmt = stmt.on_conflict_do_nothing(
+            index_elements=['artifact_id', 'dependency_project', 'dependency_package']
+        )
+
+        result = self.db.execute(stmt)
+        return result.rowcount
+
+    def get_cached_url_with_artifact(
+        self,
+        url_hash: str,
+    ) -> Optional[tuple[CachedUrl, Artifact]]:
+        """
+        Get cached URL and its artifact in a single query.
+
+        Args:
+            url_hash: SHA256 of the URL
+
+        Returns:
+            (CachedUrl, Artifact) tuple or None if not found.
+        """
+        result = (
+            self.db.query(CachedUrl, Artifact)
+            .join(Artifact, CachedUrl.artifact_id == Artifact.id)
+            .filter(CachedUrl.url_hash == url_hash)
+            .first()
+        )
+        return result
+
+    def get_artifact_dependencies(
+        self,
+        artifact_id: str,
+    ) -> list[ArtifactDependency]:
+        """
+        Get all dependencies for an artifact in a single query.
+
+        Args:
+            artifact_id: SHA256 of the artifact
+
+        Returns:
+            List of ArtifactDependency objects.
+        """
+        return (
+            self.db.query(ArtifactDependency)
+            .filter(ArtifactDependency.artifact_id == artifact_id)
+            .all()
+        )
--- a/backend/app/dependencies.py
+++ b/backend/app/dependencies.py
--- a/backend/app/http_client.py
+++ b/backend/app/http_client.py
@@ -0,0 +1,179 @@
+"""
+HTTP client manager with connection pooling and lifecycle management.
+
+Provides:
+- Shared connection pools for upstream requests
+- Per-upstream client isolation when needed
+- Thread pool for blocking I/O operations
+- FastAPI lifespan integration
+"""
+
+import asyncio
+import logging
+from concurrent.futures import ThreadPoolExecutor
+from typing import Any, Callable, Optional
+
+import httpx
+
+from .config import Settings
+
+logger = logging.getLogger(__name__)
+
+
+class HttpClientManager:
+    """
+    Manages httpx.AsyncClient pools with FastAPI lifespan integration.
+
+    Features:
+    - Default shared pool for general requests
+    - Per-upstream pools for sources needing specific config/auth
+    - Dedicated thread pool for blocking operations
+    - Graceful shutdown
+    """
+
+    def __init__(self, settings: Settings):
+        self.max_connections = settings.http_max_connections
+        self.max_keepalive = settings.http_max_keepalive
+        self.connect_timeout = settings.http_connect_timeout
+        self.read_timeout = settings.http_read_timeout
+        self.worker_threads = settings.http_worker_threads
+
+        self._default_client: Optional[httpx.AsyncClient] = None
+        self._upstream_clients: dict[str, httpx.AsyncClient] = {}
+        self._executor: Optional[ThreadPoolExecutor] = None
+        self._started = False
+
+    async def startup(self) -> None:
+        """Initialize clients and thread pool. Called by FastAPI lifespan."""
+        if self._started:
+            return
+
+        logger.info(
+            f"Starting HttpClientManager: max_connections={self.max_connections}, "
+            f"worker_threads={self.worker_threads}"
+        )
+
+        # Create connection limits
+        limits = httpx.Limits(
+            max_connections=self.max_connections,
+            max_keepalive_connections=self.max_keepalive,
+        )
+
+        # Create timeout config
+        timeout = httpx.Timeout(
+            connect=self.connect_timeout,
+            read=self.read_timeout,
+            write=self.read_timeout,
+            pool=self.connect_timeout,
+        )
+
+        # Create default client
+        self._default_client = httpx.AsyncClient(
+            limits=limits,
+            timeout=timeout,
+            follow_redirects=False,  # Handle redirects manually for auth
+        )
+
+        # Create thread pool for blocking operations
+        self._executor = ThreadPoolExecutor(
+            max_workers=self.worker_threads,
+            thread_name_prefix="orchard-blocking-",
+        )
+
+        self._started = True
+        logger.info("HttpClientManager started")
+
+    async def shutdown(self) -> None:
+        """Close all clients and thread pool. Called by FastAPI lifespan."""
+        if not self._started:
+            return
+
+        logger.info("Shutting down HttpClientManager")
+
+        # Close default client
+        if self._default_client:
+            await self._default_client.aclose()
+            self._default_client = None
+
+        # Close upstream-specific clients
+        for name, client in self._upstream_clients.items():
+            logger.debug(f"Closing upstream client: {name}")
+            await client.aclose()
+        self._upstream_clients.clear()
+
+        # Shutdown thread pool
+        if self._executor:
+            self._executor.shutdown(wait=True)
+            self._executor = None
+
+        self._started = False
+        logger.info("HttpClientManager shutdown complete")
+
+    def get_client(self, upstream_name: Optional[str] = None) -> httpx.AsyncClient:
+        """
+        Get HTTP client for making requests.
+
+        Args:
+            upstream_name: Optional upstream source name for dedicated pool.
+                          If None, returns the default shared client.
+
+        Returns:
+            httpx.AsyncClient configured for the request.
+
+        Raises:
+            RuntimeError: If manager not started.
+        """
+        if not self._started or not self._default_client:
+            raise RuntimeError("HttpClientManager not started. Call startup() first.")
+
+        if upstream_name and upstream_name in self._upstream_clients:
+            return self._upstream_clients[upstream_name]
+
+        return self._default_client
+
+    async def run_blocking(self, func: Callable[..., Any], *args: Any) -> Any:
+        """
+        Run a blocking function in the thread pool.
+
+        Use this for:
+        - File I/O operations
+        - Archive extraction (zipfile, tarfile)
+        - Hash computation on large data
+
+        Args:
+            func: Synchronous function to execute
+            *args: Arguments to pass to the function
+
+        Returns:
+            The function's return value.
+        """
+        if not self._executor:
+            raise RuntimeError("HttpClientManager not started. Call startup() first.")
+
+        loop = asyncio.get_running_loop()
+        return await loop.run_in_executor(self._executor, func, *args)
+
+    @property
+    def active_connections(self) -> int:
+        """Get approximate number of active connections (for health checks)."""
+        if not self._default_client:
+            return 0
+        # httpx doesn't expose this directly, return pool size as approximation
+        return self.max_connections
+
+    @property
+    def pool_size(self) -> int:
+        """Get configured pool size."""
+        return self.max_connections
+
+    @property
+    def executor_active(self) -> int:
+        """Get number of active thread pool workers."""
+        if not self._executor:
+            return 0
+        return len(self._executor._threads)
+
+    @property
+    def executor_max(self) -> int:
+        """Get max thread pool workers."""
+        return self.worker_threads
--- a/backend/app/main.py
+++ b/backend/app/main.py
@@ -15,6 +15,8 @@ from .pypi_proxy import router as pypi_router
 from .seed import seed_database
 from .auth import create_default_admin
 from .rate_limit import limiter
+from .http_client import HttpClientManager
+from .cache_service import CacheService

 settings = get_settings()
 logging.basicConfig(level=logging.INFO)
@@ -38,6 +40,17 @@ async def lifespan(app: FastAPI):
    finally:
        db.close()

+    # Initialize infrastructure services
+    logger.info("Initializing infrastructure services...")
+
+    app.state.http_client = HttpClientManager(settings)
+    await app.state.http_client.startup()
+
+    app.state.cache = CacheService(settings)
+    await app.state.cache.startup()
+
+    logger.info("Infrastructure services ready")
+
    # Seed test data in development mode
    if settings.is_development:
        logger.info(f"Running in {settings.env} mode - checking for seed data")
@@ -50,7 +63,12 @@ async def lifespan(app: FastAPI):
        logger.info(f"Running in {settings.env} mode - skipping seed data")

    yield
-    # Shutdown: cleanup if needed
+
+    # Shutdown infrastructure services
+    logger.info("Shutting down infrastructure services...")
+    await app.state.http_client.shutdown()
+    await app.state.cache.shutdown()
+    logger.info("Shutdown complete")


 app = FastAPI(
--- a/backend/app/models.py
+++ b/backend/app/models.py
@@ -71,7 +71,6 @@ class Package(Base):
    )

    project = relationship("Project", back_populates="packages")
-    tags = relationship("Tag", back_populates="package", cascade="all, delete-orphan")
    uploads = relationship(
        "Upload", back_populates="package", cascade="all, delete-orphan"
    )
@@ -120,7 +119,6 @@ class Artifact(Base):
    ref_count = Column(Integer, default=1)
    s3_key = Column(String(1024), nullable=False)

-    tags = relationship("Tag", back_populates="artifact")
    uploads = relationship("Upload", back_populates="artifact")
    versions = relationship("PackageVersion", back_populates="artifact")
    dependencies = relationship(
@@ -151,65 +149,6 @@ class Artifact(Base):
    )


-class Tag(Base):
-    __tablename__ = "tags"
-
-    id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
-    package_id = Column(
-        UUID(as_uuid=True),
-        ForeignKey("packages.id", ondelete="CASCADE"),
-        nullable=False,
-    )
-    name = Column(String(255), nullable=False)
-    artifact_id = Column(String(64), ForeignKey("artifacts.id"), nullable=False)
-    created_at = Column(DateTime(timezone=True), default=datetime.utcnow)
-    updated_at = Column(
-        DateTime(timezone=True), default=datetime.utcnow, onupdate=datetime.utcnow
-    )
-    created_by = Column(String(255), nullable=False)
-
-    package = relationship("Package", back_populates="tags")
-    artifact = relationship("Artifact", back_populates="tags")
-    history = relationship(
-        "TagHistory", back_populates="tag", cascade="all, delete-orphan"
-    )
-
-    __table_args__ = (
-        Index("idx_tags_package_id", "package_id"),
-        Index("idx_tags_artifact_id", "artifact_id"),
-        Index(
-            "idx_tags_package_name", "package_id", "name", unique=True
-        ),  # Composite unique index
-        Index(
-            "idx_tags_package_created_at", "package_id", "created_at"
-        ),  # For recent tags queries
-    )
-
-
-class TagHistory(Base):
-    __tablename__ = "tag_history"
-
-    id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
-    tag_id = Column(
-        UUID(as_uuid=True), ForeignKey("tags.id", ondelete="CASCADE"), nullable=False
-    )
-    old_artifact_id = Column(String(64), ForeignKey("artifacts.id"))
-    new_artifact_id = Column(String(64), ForeignKey("artifacts.id"), nullable=False)
-    change_type = Column(String(20), nullable=False, default="update")
-    changed_at = Column(DateTime(timezone=True), default=datetime.utcnow)
-    changed_by = Column(String(255), nullable=False)
-
-    tag = relationship("Tag", back_populates="history")
-
-    __table_args__ = (
-        Index("idx_tag_history_tag_id", "tag_id"),
-        Index("idx_tag_history_changed_at", "changed_at"),
-        CheckConstraint(
-            "change_type IN ('create', 'update', 'delete')", name="check_change_type"
-        ),
-    )
-
-
 class PackageVersion(Base):
    """Immutable version record for a package-artifact relationship.

@@ -249,7 +188,7 @@ class Upload(Base):
    artifact_id = Column(String(64), ForeignKey("artifacts.id"), nullable=False)
    package_id = Column(UUID(as_uuid=True), ForeignKey("packages.id"), nullable=False)
    original_name = Column(String(1024))
-    tag_name = Column(String(255))  # Tag assigned during upload
+    version = Column(String(255))  # Version assigned during upload
    user_agent = Column(String(512))  # Client identification
    duration_ms = Column(Integer)  # Upload timing in milliseconds
    deduplicated = Column(Boolean, default=False)  # Whether artifact was deduplicated
@@ -524,8 +463,8 @@ class PackageHistory(Base):
 class ArtifactDependency(Base):
    """Dependency declared by an artifact on another package.

-    Each artifact can declare dependencies on other packages, specifying either
-    an exact version or a tag. This enables recursive dependency resolution.
+    Each artifact can declare dependencies on other packages, specifying a version.
+    This enables recursive dependency resolution.
    """

    __tablename__ = "artifact_dependencies"
@@ -538,20 +477,13 @@ class ArtifactDependency(Base):
    )
    dependency_project = Column(String(255), nullable=False)
    dependency_package = Column(String(255), nullable=False)
-    version_constraint = Column(String(255), nullable=True)
-    tag_constraint = Column(String(255), nullable=True)
+    version_constraint = Column(String(255), nullable=False)
    created_at = Column(DateTime(timezone=True), default=datetime.utcnow)

    # Relationship to the artifact that declares this dependency
    artifact = relationship("Artifact", back_populates="dependencies")

    __table_args__ = (
-        # Exactly one of version_constraint or tag_constraint must be set
-        CheckConstraint(
-            "(version_constraint IS NOT NULL AND tag_constraint IS NULL) OR "
-            "(version_constraint IS NULL AND tag_constraint IS NOT NULL)",
-            name="check_constraint_type",
-        ),
        # Each artifact can only depend on a specific project/package once
        Index(
            "idx_artifact_dependencies_artifact_id",
--- a/backend/app/purge_seed_data.py
+++ b/backend/app/purge_seed_data.py
@@ -12,7 +12,6 @@ from .models import (
    Project,
    Package,
    Artifact,
-    Tag,
    Upload,
    PackageVersion,
    ArtifactDependency,
@@ -60,7 +59,6 @@ def purge_seed_data(db: Session) -> dict:

    results = {
        "dependencies_deleted": 0,
-        "tags_deleted": 0,
        "versions_deleted": 0,
        "uploads_deleted": 0,
        "artifacts_deleted": 0,
@@ -103,15 +101,7 @@ def purge_seed_data(db: Session) -> dict:
        results["dependencies_deleted"] = count
        logger.info(f"Deleted {count} artifact dependencies")

-    # 2. Delete tags
-    if seed_package_ids:
-        count = db.query(Tag).filter(Tag.package_id.in_(seed_package_ids)).delete(
-            synchronize_session=False
-        )
-        results["tags_deleted"] = count
-        logger.info(f"Deleted {count} tags")
-
-    # 3. Delete package versions
+    # 2. Delete package versions
    if seed_package_ids:
        count = db.query(PackageVersion).filter(
            PackageVersion.package_id.in_(seed_package_ids)
@@ -119,7 +109,7 @@ def purge_seed_data(db: Session) -> dict:
        results["versions_deleted"] = count
        logger.info(f"Deleted {count} package versions")

-    # 4. Delete uploads
+    # 3. Delete uploads
    if seed_package_ids:
        count = db.query(Upload).filter(Upload.package_id.in_(seed_package_ids)).delete(
            synchronize_session=False
@@ -127,7 +117,7 @@ def purge_seed_data(db: Session) -> dict:
        results["uploads_deleted"] = count
        logger.info(f"Deleted {count} uploads")

-    # 5. Delete S3 objects for seed artifacts
+    # 4. Delete S3 objects for seed artifacts
    if seed_artifact_ids:
        seed_artifacts = db.query(Artifact).filter(Artifact.id.in_(seed_artifact_ids)).all()
        for artifact in seed_artifacts:
@@ -139,8 +129,8 @@ def purge_seed_data(db: Session) -> dict:
                    logger.warning(f"Failed to delete S3 object {artifact.s3_key}: {e}")
        logger.info(f"Deleted {results['s3_objects_deleted']} S3 objects")

-    # 6. Delete artifacts (only those with ref_count that would be 0 after our deletions)
-    # Since we deleted all tags/versions pointing to these artifacts, we can delete them
+    # 5. Delete artifacts (only those with ref_count that would be 0 after our deletions)
+    # Since we deleted all versions pointing to these artifacts, we can delete them
    if seed_artifact_ids:
        count = db.query(Artifact).filter(Artifact.id.in_(seed_artifact_ids)).delete(
            synchronize_session=False
@@ -148,7 +138,7 @@ def purge_seed_data(db: Session) -> dict:
        results["artifacts_deleted"] = count
        logger.info(f"Deleted {count} artifacts")

-    # 7. Delete packages
+    # 6. Delete packages
    if seed_package_ids:
        count = db.query(Package).filter(Package.id.in_(seed_package_ids)).delete(
            synchronize_session=False
@@ -156,7 +146,7 @@ def purge_seed_data(db: Session) -> dict:
        results["packages_deleted"] = count
        logger.info(f"Deleted {count} packages")

-    # 8. Delete access permissions for seed projects
+    # 7. Delete access permissions for seed projects
    if seed_project_ids:
        count = db.query(AccessPermission).filter(
            AccessPermission.project_id.in_(seed_project_ids)
@@ -164,14 +154,14 @@ def purge_seed_data(db: Session) -> dict:
        results["permissions_deleted"] = count
        logger.info(f"Deleted {count} access permissions")

-    # 9. Delete seed projects
+    # 8. Delete seed projects
    count = db.query(Project).filter(Project.name.in_(SEED_PROJECT_NAMES)).delete(
        synchronize_session=False
    )
    results["projects_deleted"] = count
    logger.info(f"Deleted {count} projects")

-    # 10. Find and delete seed team
+    # 9. Find and delete seed team
    seed_team = db.query(Team).filter(Team.slug == SEED_TEAM_SLUG).first()
    if seed_team:
        # Delete team memberships first
@@ -186,7 +176,7 @@ def purge_seed_data(db: Session) -> dict:
        results["teams_deleted"] = 1
        logger.info(f"Deleted team: {SEED_TEAM_SLUG}")

-    # 11. Delete seed users (but NOT admin)
+    # 10. Delete seed users (but NOT admin)
    seed_users = db.query(User).filter(User.username.in_(SEED_USERNAMES)).all()
    for user in seed_users:
        # Delete any remaining team memberships for this user
--- a/backend/app/pypi_proxy.py
+++ b/backend/app/pypi_proxy.py
--- a/backend/app/registry_client.py
+++ b/backend/app/registry_client.py
@@ -0,0 +1,426 @@
+"""
+Registry client abstraction for upstream package registries.
+
+Provides a pluggable interface for fetching packages from upstream registries
+(PyPI, npm, Maven, etc.) during dependency resolution with auto-fetch enabled.
+"""
+
+import hashlib
+import logging
+import os
+import re
+import tempfile
+from abc import ABC, abstractmethod
+from dataclasses import dataclass
+from typing import List, Optional, TYPE_CHECKING
+from urllib.parse import urljoin, urlparse
+
+import httpx
+from packaging.specifiers import SpecifierSet, InvalidSpecifier
+from packaging.version import Version, InvalidVersion
+from sqlalchemy.orm import Session
+
+if TYPE_CHECKING:
+    from .storage import S3Storage
+    from .http_client import HttpClientManager
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class VersionInfo:
+    """Information about a package version from an upstream registry."""
+
+    version: str
+    download_url: str
+    filename: str
+    sha256: Optional[str] = None
+    size: Optional[int] = None
+    content_type: Optional[str] = None
+
+
+@dataclass
+class FetchResult:
+    """Result of fetching a package from upstream."""
+
+    artifact_id: str  # SHA256 hash
+    size: int
+    version: str
+    filename: str
+    already_cached: bool = False
+
+
+class RegistryClient(ABC):
+    """Abstract base class for upstream registry clients."""
+
+    @property
+    @abstractmethod
+    def source_type(self) -> str:
+        """Return the source type this client handles (e.g., 'pypi', 'npm')."""
+        pass
+
+    @abstractmethod
+    async def get_available_versions(self, package_name: str) -> List[str]:
+        """
+        Get all available versions of a package from upstream.
+
+        Args:
+            package_name: The normalized package name
+
+        Returns:
+            List of version strings, sorted from oldest to newest
+        """
+        pass
+
+    @abstractmethod
+    async def resolve_constraint(
+        self, package_name: str, constraint: str
+    ) -> Optional[VersionInfo]:
+        """
+        Find the best version matching a constraint.
+
+        Args:
+            package_name: The normalized package name
+            constraint: Version constraint (e.g., '>=1.9', '<2.0,>=1.5', '*')
+
+        Returns:
+            VersionInfo with download URL, or None if no matching version found
+        """
+        pass
+
+    @abstractmethod
+    async def fetch_package(
+        self,
+        package_name: str,
+        version_info: VersionInfo,
+        db: Session,
+        storage: "S3Storage",
+    ) -> Optional[FetchResult]:
+        """
+        Fetch and cache a package from upstream.
+
+        Args:
+            package_name: The normalized package name
+            version_info: Version details including download URL
+            db: Database session for creating records
+            storage: S3 storage for caching the artifact
+
+        Returns:
+            FetchResult with artifact_id, or None if fetch failed
+        """
+        pass
+
+
+class PyPIRegistryClient(RegistryClient):
+    """PyPI registry client using the JSON API."""
+
+    # Timeout configuration for PyPI requests
+    CONNECT_TIMEOUT = 30.0
+    READ_TIMEOUT = 60.0
+    DOWNLOAD_TIMEOUT = 300.0  # Longer timeout for file downloads
+
+    def __init__(
+        self,
+        http_client: httpx.AsyncClient,
+        upstream_sources: List,
+        pypi_api_url: str = "https://pypi.org/pypi",
+    ):
+        """
+        Initialize PyPI registry client.
+
+        Args:
+            http_client: Shared async HTTP client
+            upstream_sources: List of configured upstream sources for auth
+            pypi_api_url: Base URL for PyPI JSON API
+        """
+        self.client = http_client
+        self.sources = upstream_sources
+        self.api_url = pypi_api_url
+
+    @property
+    def source_type(self) -> str:
+        return "pypi"
+
+    def _normalize_package_name(self, name: str) -> str:
+        """Normalize a PyPI package name per PEP 503."""
+        return re.sub(r"[-_.]+", "-", name).lower()
+
+    def _get_auth_headers(self) -> dict:
+        """Get authentication headers from configured sources."""
+        headers = {"User-Agent": "Orchard-Registry-Client/1.0"}
+        if self.sources:
+            source = self.sources[0]
+            if hasattr(source, "auth_type"):
+                if source.auth_type == "bearer":
+                    password = (
+                        source.get_password()
+                        if hasattr(source, "get_password")
+                        else getattr(source, "password", None)
+                    )
+                    if password:
+                        headers["Authorization"] = f"Bearer {password}"
+                elif source.auth_type == "api_key":
+                    custom_headers = (
+                        source.get_headers()
+                        if hasattr(source, "get_headers")
+                        else {}
+                    )
+                    if custom_headers:
+                        headers.update(custom_headers)
+        return headers
+
+    def _get_basic_auth(self) -> Optional[tuple]:
+        """Get basic auth credentials if configured."""
+        if self.sources:
+            source = self.sources[0]
+            if hasattr(source, "auth_type") and source.auth_type == "basic":
+                username = getattr(source, "username", None)
+                if username:
+                    password = (
+                        source.get_password()
+                        if hasattr(source, "get_password")
+                        else getattr(source, "password", "")
+                    )
+                    return (username, password or "")
+        return None
+
+    async def get_available_versions(self, package_name: str) -> List[str]:
+        """Get all available versions from PyPI JSON API."""
+        normalized = self._normalize_package_name(package_name)
+        url = f"{self.api_url}/{normalized}/json"
+
+        headers = self._get_auth_headers()
+        auth = self._get_basic_auth()
+        timeout = httpx.Timeout(self.READ_TIMEOUT, connect=self.CONNECT_TIMEOUT)
+
+        try:
+            response = await self.client.get(
+                url, headers=headers, auth=auth, timeout=timeout
+            )
+
+            if response.status_code == 404:
+                logger.debug(f"Package {normalized} not found on PyPI")
+                return []
+
+            if response.status_code != 200:
+                logger.warning(
+                    f"PyPI API returned {response.status_code} for {normalized}"
+                )
+                return []
+
+            data = response.json()
+            releases = data.get("releases", {})
+
+            # Filter to valid versions and sort
+            versions = []
+            for v in releases.keys():
+                try:
+                    Version(v)
+                    versions.append(v)
+                except InvalidVersion:
+                    continue
+
+            versions.sort(key=lambda x: Version(x))
+            return versions
+
+        except httpx.RequestError as e:
+            logger.warning(f"Failed to query PyPI for {normalized}: {e}")
+            return []
+        except Exception as e:
+            logger.warning(f"Error parsing PyPI response for {normalized}: {e}")
+            return []
+
+    async def resolve_constraint(
+        self, package_name: str, constraint: str
+    ) -> Optional[VersionInfo]:
+        """Find best version matching constraint from PyPI."""
+        normalized = self._normalize_package_name(package_name)
+        url = f"{self.api_url}/{normalized}/json"
+
+        headers = self._get_auth_headers()
+        auth = self._get_basic_auth()
+        timeout = httpx.Timeout(self.READ_TIMEOUT, connect=self.CONNECT_TIMEOUT)
+
+        try:
+            response = await self.client.get(
+                url, headers=headers, auth=auth, timeout=timeout
+            )
+
+            if response.status_code == 404:
+                logger.debug(f"Package {normalized} not found on PyPI")
+                return None
+
+            if response.status_code != 200:
+                logger.warning(
+                    f"PyPI API returned {response.status_code} for {normalized}"
+                )
+                return None
+
+            data = response.json()
+            releases = data.get("releases", {})
+
+            # Handle wildcard - return latest version
+            if constraint == "*":
+                latest_version = data.get("info", {}).get("version")
+                if latest_version and latest_version in releases:
+                    return self._get_version_info(
+                        normalized, latest_version, releases[latest_version]
+                    )
+                return None
+
+            # Parse constraint
+            # If constraint looks like a bare version (no operator), treat as exact match
+            # e.g., "2025.10.5" -> "==2025.10.5"
+            effective_constraint = constraint
+            if constraint and constraint[0].isdigit():
+                effective_constraint = f"=={constraint}"
+                logger.debug(
+                    f"Bare version '{constraint}' for {normalized}, "
+                    f"treating as exact match '{effective_constraint}'"
+                )
+
+            try:
+                specifier = SpecifierSet(effective_constraint)
+            except InvalidSpecifier:
+                # Invalid constraint - treat as wildcard
+                logger.warning(
+                    f"Invalid version constraint '{constraint}' for {normalized}, "
+                    "treating as wildcard"
+                )
+                latest_version = data.get("info", {}).get("version")
+                if latest_version and latest_version in releases:
+                    return self._get_version_info(
+                        normalized, latest_version, releases[latest_version]
+                    )
+                return None
+
+            # Find matching versions
+            matching = []
+            for v_str, files in releases.items():
+                if not files:  # Skip versions with no files
+                    continue
+                try:
+                    v = Version(v_str)
+                    if v in specifier:
+                        matching.append((v_str, v, files))
+                except InvalidVersion:
+                    continue
+
+            if not matching:
+                logger.debug(
+                    f"No versions of {normalized} match constraint '{constraint}'"
+                )
+                return None
+
+            # Sort by version and return highest match
+            matching.sort(key=lambda x: x[1], reverse=True)
+            best_version, _, best_files = matching[0]
+
+            return self._get_version_info(normalized, best_version, best_files)
+
+        except httpx.RequestError as e:
+            logger.warning(f"Failed to query PyPI for {normalized}: {e}")
+            return None
+        except Exception as e:
+            logger.warning(f"Error resolving {normalized}@{constraint}: {e}")
+            return None
+
+    def _get_version_info(
+        self, package_name: str, version: str, files: List[dict]
+    ) -> Optional[VersionInfo]:
+        """Extract download info from PyPI release files."""
+        if not files:
+            return None
+
+        # Prefer wheel over sdist
+        wheel_file = None
+        sdist_file = None
+
+        for f in files:
+            filename = f.get("filename", "")
+            if filename.endswith(".whl"):
+                # Prefer platform-agnostic wheels
+                if "py3-none-any" in filename or wheel_file is None:
+                    wheel_file = f
+            elif filename.endswith(".tar.gz") and sdist_file is None:
+                sdist_file = f
+
+        selected = wheel_file or sdist_file
+        if not selected:
+            # Fall back to first available file
+            selected = files[0]
+
+        return VersionInfo(
+            version=version,
+            download_url=selected.get("url", ""),
+            filename=selected.get("filename", ""),
+            sha256=selected.get("digests", {}).get("sha256"),
+            size=selected.get("size"),
+            content_type="application/zip"
+            if selected.get("filename", "").endswith(".whl")
+            else "application/gzip",
+        )
+
+    async def fetch_package(
+        self,
+        package_name: str,
+        version_info: VersionInfo,
+        db: Session,
+        storage: "S3Storage",
+    ) -> Optional[FetchResult]:
+        """Fetch and cache a PyPI package."""
+        # Import here to avoid circular imports
+        from .pypi_proxy import fetch_and_cache_pypi_package
+
+        normalized = self._normalize_package_name(package_name)
+
+        logger.info(
+            f"Fetching {normalized}=={version_info.version} from upstream PyPI"
+        )
+
+        result = await fetch_and_cache_pypi_package(
+            db=db,
+            storage=storage,
+            http_client=self.client,
+            package_name=normalized,
+            filename=version_info.filename,
+            download_url=version_info.download_url,
+            expected_sha256=version_info.sha256,
+        )
+
+        if result is None:
+            return None
+
+        return FetchResult(
+            artifact_id=result["artifact_id"],
+            size=result["size"],
+            version=version_info.version,
+            filename=version_info.filename,
+            already_cached=result.get("already_cached", False),
+        )
+
+
+def get_registry_client(
+    source_type: str,
+    http_client: httpx.AsyncClient,
+    upstream_sources: List,
+) -> Optional[RegistryClient]:
+    """
+    Factory function to get a registry client for a source type.
+
+    Args:
+        source_type: The registry type ('pypi', 'npm', etc.)
+        http_client: Shared async HTTP client
+        upstream_sources: List of configured upstream sources
+
+    Returns:
+        RegistryClient for the source type, or None if not supported
+    """
+    if source_type == "pypi":
+        # Filter to PyPI sources
+        pypi_sources = [s for s in upstream_sources if getattr(s, "source_type", "") == "pypi"]
+        return PyPIRegistryClient(http_client, pypi_sources)
+
+    # Future: Add npm, maven, etc.
+    logger.debug(f"No registry client available for source type: {source_type}")
+    return None
--- a/backend/app/repositories/init.py
+++ b/backend/app/repositories/init.py
@@ -9,7 +9,6 @@ from .base import BaseRepository
 from .project import ProjectRepository
 from .package import PackageRepository
 from .artifact import ArtifactRepository
-from .tag import TagRepository
 from .upload import UploadRepository

 __all__ = [
@@ -17,6 +16,5 @@ __all__ = [
    "ProjectRepository",
    "PackageRepository",
    "ArtifactRepository",
-    "TagRepository",
    "UploadRepository",
 ]
--- a/backend/app/repositories/artifact.py
+++ b/backend/app/repositories/artifact.py
@@ -8,7 +8,7 @@ from sqlalchemy import func, or_
 from uuid import UUID

 from .base import BaseRepository
-from ..models import Artifact, Tag, Upload, Package, Project
+from ..models import Artifact, PackageVersion, Upload, Package, Project


 class ArtifactRepository(BaseRepository[Artifact]):
@@ -77,14 +77,14 @@ class ArtifactRepository(BaseRepository[Artifact]):
            .all()
        )

-    def get_artifacts_without_tags(self, limit: int = 100) -> List[Artifact]:
-        """Get artifacts that have no tags pointing to them."""
-        # Subquery to find artifact IDs that have tags
-        tagged_artifacts = self.db.query(Tag.artifact_id).distinct().subquery()
+    def get_artifacts_without_versions(self, limit: int = 100) -> List[Artifact]:
+        """Get artifacts that have no versions pointing to them."""
+        # Subquery to find artifact IDs that have versions
+        versioned_artifacts = self.db.query(PackageVersion.artifact_id).distinct().subquery()

        return (
            self.db.query(Artifact)
-            .filter(~Artifact.id.in_(tagged_artifacts))
+            .filter(~Artifact.id.in_(versioned_artifacts))
            .limit(limit)
            .all()
        )
@@ -115,34 +115,34 @@ class ArtifactRepository(BaseRepository[Artifact]):

        return artifacts, total

-    def get_referencing_tags(self, artifact_id: str) -> List[Tuple[Tag, Package, Project]]:
-        """Get all tags referencing this artifact with package and project info."""
+    def get_referencing_versions(self, artifact_id: str) -> List[Tuple[PackageVersion, Package, Project]]:
+        """Get all versions referencing this artifact with package and project info."""
        return (
-            self.db.query(Tag, Package, Project)
-            .join(Package, Tag.package_id == Package.id)
+            self.db.query(PackageVersion, Package, Project)
+            .join(Package, PackageVersion.package_id == Package.id)
            .join(Project, Package.project_id == Project.id)
-            .filter(Tag.artifact_id == artifact_id)
+            .filter(PackageVersion.artifact_id == artifact_id)
            .all()
        )

-    def search(self, query_str: str, limit: int = 10) -> List[Tuple[Tag, Artifact, str, str]]:
+    def search(self, query_str: str, limit: int = 10) -> List[Tuple[PackageVersion, Artifact, str, str]]:
        """
-        Search artifacts by tag name or original filename.
-        Returns (tag, artifact, package_name, project_name) tuples.
+        Search artifacts by version or original filename.
+        Returns (version, artifact, package_name, project_name) tuples.
        """
        search_lower = query_str.lower()
        return (
-            self.db.query(Tag, Artifact, Package.name, Project.name)
-            .join(Artifact, Tag.artifact_id == Artifact.id)
-            .join(Package, Tag.package_id == Package.id)
+            self.db.query(PackageVersion, Artifact, Package.name, Project.name)
+            .join(Artifact, PackageVersion.artifact_id == Artifact.id)
+            .join(Package, PackageVersion.package_id == Package.id)
            .join(Project, Package.project_id == Project.id)
            .filter(
                or_(
-                    func.lower(Tag.name).contains(search_lower),
+                    func.lower(PackageVersion.version).contains(search_lower),
                    func.lower(Artifact.original_name).contains(search_lower)
                )
            )
-            .order_by(Tag.name)
+            .order_by(PackageVersion.version)
            .limit(limit)
            .all()
        )
--- a/backend/app/repositories/package.py
+++ b/backend/app/repositories/package.py
@@ -8,7 +8,7 @@ from sqlalchemy import func, or_, asc, desc
 from uuid import UUID

 from .base import BaseRepository
-from ..models import Package, Project, Tag, Upload, Artifact
+from ..models import Package, Project, PackageVersion, Upload, Artifact


 class PackageRepository(BaseRepository[Package]):
@@ -136,10 +136,10 @@ class PackageRepository(BaseRepository[Package]):
        return self.update(package, **updates)

    def get_stats(self, package_id: UUID) -> dict:
-        """Get package statistics (tag count, artifact count, total size)."""
-        tag_count = (
-            self.db.query(func.count(Tag.id))
-            .filter(Tag.package_id == package_id)
+        """Get package statistics (version count, artifact count, total size)."""
+        version_count = (
+            self.db.query(func.count(PackageVersion.id))
+            .filter(PackageVersion.package_id == package_id)
            .scalar() or 0
        )

@@ -154,7 +154,7 @@ class PackageRepository(BaseRepository[Package]):
        )

        return {
-            "tag_count": tag_count,
+            "version_count": version_count,
            "artifact_count": artifact_stats[0] if artifact_stats else 0,
            "total_size": artifact_stats[1] if artifact_stats else 0,
        }
--- a/backend/app/repositories/tag.py
+++ b/backend/app/repositories/tag.py
@@ -1,168 +0,0 @@
-"""
-Tag repository for data access operations.
-"""
-
-from typing import Optional, List, Tuple
-from sqlalchemy.orm import Session
-from sqlalchemy import func, or_, asc, desc
-from uuid import UUID
-
-from .base import BaseRepository
-from ..models import Tag, TagHistory, Artifact, Package, Project
-
-
-class TagRepository(BaseRepository[Tag]):
-    """Repository for Tag entity operations."""
-
-    model = Tag
-
-    def get_by_name(self, package_id: UUID, name: str) -> Optional[Tag]:
-        """Get tag by name within a package."""
-        return (
-            self.db.query(Tag)
-            .filter(Tag.package_id == package_id, Tag.name == name)
-            .first()
-        )
-
-    def get_with_artifact(self, package_id: UUID, name: str) -> Optional[Tuple[Tag, Artifact]]:
-        """Get tag with its artifact."""
-        return (
-            self.db.query(Tag, Artifact)
-            .join(Artifact, Tag.artifact_id == Artifact.id)
-            .filter(Tag.package_id == package_id, Tag.name == name)
-            .first()
-        )
-
-    def exists_by_name(self, package_id: UUID, name: str) -> bool:
-        """Check if tag with name exists in package."""
-        return self.db.query(
-            self.db.query(Tag)
-            .filter(Tag.package_id == package_id, Tag.name == name)
-            .exists()
-        ).scalar()
-
-    def list_by_package(
-        self,
-        package_id: UUID,
-        page: int = 1,
-        limit: int = 20,
-        search: Optional[str] = None,
-        sort: str = "name",
-        order: str = "asc",
-    ) -> Tuple[List[Tuple[Tag, Artifact]], int]:
-        """
-        List tags in a package with artifact metadata.
-
-        Returns tuple of ((tag, artifact) tuples, total_count).
-        """
-        query = (
-            self.db.query(Tag, Artifact)
-            .join(Artifact, Tag.artifact_id == Artifact.id)
-            .filter(Tag.package_id == package_id)
-        )
-
-        # Apply search filter (tag name or artifact original filename)
-        if search:
-            search_lower = search.lower()
-            query = query.filter(
-                or_(
-                    func.lower(Tag.name).contains(search_lower),
-                    func.lower(Artifact.original_name).contains(search_lower)
-                )
-            )
-
-        # Get total count
-        total = query.count()
-
-        # Apply sorting
-        sort_columns = {
-            "name": Tag.name,
-            "created_at": Tag.created_at,
-        }
-        sort_column = sort_columns.get(sort, Tag.name)
-        if order == "desc":
-            query = query.order_by(desc(sort_column))
-        else:
-            query = query.order_by(asc(sort_column))
-
-        # Apply pagination
-        offset = (page - 1) * limit
-        results = query.offset(offset).limit(limit).all()
-
-        return results, total
-
-    def create_tag(
-        self,
-        package_id: UUID,
-        name: str,
-        artifact_id: str,
-        created_by: str,
-    ) -> Tag:
-        """Create a new tag."""
-        return self.create(
-            package_id=package_id,
-            name=name,
-            artifact_id=artifact_id,
-            created_by=created_by,
-        )
-
-    def update_artifact(
-        self,
-        tag: Tag,
-        new_artifact_id: str,
-        changed_by: str,
-        record_history: bool = True,
-    ) -> Tag:
-        """
-        Update tag to point to a different artifact.
-        Optionally records change in tag history.
-        """
-        old_artifact_id = tag.artifact_id
-
-        if record_history and old_artifact_id != new_artifact_id:
-            history = TagHistory(
-                tag_id=tag.id,
-                old_artifact_id=old_artifact_id,
-                new_artifact_id=new_artifact_id,
-                changed_by=changed_by,
-            )
-            self.db.add(history)
-
-        tag.artifact_id = new_artifact_id
-        tag.created_by = changed_by
-        self.db.flush()
-        return tag
-
-    def get_history(self, tag_id: UUID) -> List[TagHistory]:
-        """Get tag change history."""
-        return (
-            self.db.query(TagHistory)
-            .filter(TagHistory.tag_id == tag_id)
-            .order_by(TagHistory.changed_at.desc())
-            .all()
-        )
-
-    def get_latest_in_package(self, package_id: UUID) -> Optional[Tag]:
-        """Get the most recently created/updated tag in a package."""
-        return (
-            self.db.query(Tag)
-            .filter(Tag.package_id == package_id)
-            .order_by(Tag.created_at.desc())
-            .first()
-        )
-
-    def get_by_artifact(self, artifact_id: str) -> List[Tag]:
-        """Get all tags pointing to an artifact."""
-        return (
-            self.db.query(Tag)
-            .filter(Tag.artifact_id == artifact_id)
-            .all()
-        )
-
-    def count_by_artifact(self, artifact_id: str) -> int:
-        """Count tags pointing to an artifact."""
-        return (
-            self.db.query(func.count(Tag.id))
-            .filter(Tag.artifact_id == artifact_id)
-            .scalar() or 0
-        )
--- a/backend/app/routes.py
+++ b/backend/app/routes.py
--- a/backend/app/schemas.py
+++ b/backend/app/schemas.py
@@ -33,6 +33,7 @@ class ProjectResponse(BaseModel):
    name: str
    description: Optional[str]
    is_public: bool
+    is_system: bool = False
    created_at: datetime
    updated_at: datetime
    created_by: str
@@ -113,14 +114,6 @@ class PackageUpdate(BaseModel):
    platform: Optional[str] = None


-class TagSummary(BaseModel):
-    """Lightweight tag info for embedding in package responses"""
-
-    name: str
-    artifact_id: str
-    created_at: datetime
-
-
 class PackageDetailResponse(BaseModel):
    """Package with aggregated metadata"""

@@ -133,13 +126,9 @@ class PackageDetailResponse(BaseModel):
    created_at: datetime
    updated_at: datetime
    # Aggregated fields
-    tag_count: int = 0
    artifact_count: int = 0
    total_size: int = 0
-    latest_tag: Optional[str] = None
    latest_upload_at: Optional[datetime] = None
-    # Recent tags (limit 5)
-    recent_tags: List[TagSummary] = []

    class Config:
        from_attributes = True
@@ -164,79 +153,6 @@ class ArtifactResponse(BaseModel):
        from_attributes = True


-# Tag schemas
-class TagCreate(BaseModel):
-    name: str
-    artifact_id: str
-
-
-class TagResponse(BaseModel):
-    id: UUID
-    package_id: UUID
-    name: str
-    artifact_id: str
-    created_at: datetime
-    created_by: str
-    version: Optional[str] = None  # Version of the artifact this tag points to
-
-    class Config:
-        from_attributes = True
-
-
-class TagDetailResponse(BaseModel):
-    """Tag with embedded artifact metadata"""
-
-    id: UUID
-    package_id: UUID
-    name: str
-    artifact_id: str
-    created_at: datetime
-    created_by: str
-    version: Optional[str] = None  # Version of the artifact this tag points to
-    # Artifact metadata
-    artifact_size: int
-    artifact_content_type: Optional[str]
-    artifact_original_name: Optional[str]
-    artifact_created_at: datetime
-    artifact_format_metadata: Optional[Dict[str, Any]] = None
-
-    class Config:
-        from_attributes = True
-
-
-class TagHistoryResponse(BaseModel):
-    """History entry for tag changes"""
-
-    id: UUID
-    tag_id: UUID
-    old_artifact_id: Optional[str]
-    new_artifact_id: str
-    changed_at: datetime
-    changed_by: str
-
-    class Config:
-        from_attributes = True
-
-
-class TagHistoryDetailResponse(BaseModel):
-    """Tag history with artifact metadata for each version"""
-
-    id: UUID
-    tag_id: UUID
-    tag_name: str
-    old_artifact_id: Optional[str]
-    new_artifact_id: str
-    changed_at: datetime
-    changed_by: str
-    # Artifact metadata for new artifact
-    artifact_size: int
-    artifact_original_name: Optional[str]
-    artifact_content_type: Optional[str]
-
-    class Config:
-        from_attributes = True
-
-
 # Audit log schemas
 class AuditLogResponse(BaseModel):
    """Audit log entry response"""
@@ -263,7 +179,7 @@ class UploadHistoryResponse(BaseModel):
    package_name: str
    project_name: str
    original_name: Optional[str]
-    tag_name: Optional[str]
+    version: Optional[str]
    uploaded_at: datetime
    uploaded_by: str
    source_ip: Optional[str]
@@ -294,10 +210,10 @@ class ArtifactProvenanceResponse(BaseModel):
    # Usage statistics
    upload_count: int
    # References
-    packages: List[Dict[str, Any]]  # List of {project_name, package_name, tag_names}
-    tags: List[
+    packages: List[Dict[str, Any]]  # List of {project_name, package_name, versions}
+    versions: List[
        Dict[str, Any]
-    ]  # List of {project_name, package_name, tag_name, created_at}
+    ]  # List of {project_name, package_name, version, created_at}
    # Upload history
    uploads: List[Dict[str, Any]]  # List of upload events

@@ -305,18 +221,8 @@ class ArtifactProvenanceResponse(BaseModel):
        from_attributes = True


-class ArtifactTagInfo(BaseModel):
-    """Tag info for embedding in artifact responses"""
-
-    id: UUID
-    name: str
-    package_id: UUID
-    package_name: str
-    project_name: str
-
-
 class ArtifactDetailResponse(BaseModel):
-    """Artifact with list of tags/packages referencing it"""
+    """Artifact with metadata"""

    id: str
    sha256: str  # Explicit SHA256 field (same as id)
@@ -330,14 +236,14 @@ class ArtifactDetailResponse(BaseModel):
    created_by: str
    ref_count: int
    format_metadata: Optional[Dict[str, Any]] = None
-    tags: List[ArtifactTagInfo] = []
+    versions: List[Dict[str, Any]] = []  # List of {version, package_name, project_name}

    class Config:
        from_attributes = True


 class PackageArtifactResponse(BaseModel):
-    """Artifact with tags for package artifact listing"""
+    """Artifact for package artifact listing"""

    id: str
    sha256: str  # Explicit SHA256 field (same as id)
@@ -350,7 +256,7 @@ class PackageArtifactResponse(BaseModel):
    created_at: datetime
    created_by: str
    format_metadata: Optional[Dict[str, Any]] = None
-    tags: List[str] = []  # Tag names pointing to this artifact
+    version: Optional[str] = None  # Version from PackageVersion if exists

    class Config:
        from_attributes = True
@@ -368,28 +274,9 @@ class GlobalArtifactResponse(BaseModel):
    created_by: str
    format_metadata: Optional[Dict[str, Any]] = None
    ref_count: int = 0
-    # Context from tags/packages
+    # Context from versions/packages
    projects: List[str] = []  # List of project names containing this artifact
    packages: List[str] = []  # List of "project/package" paths
-    tags: List[str] = []  # List of "project/package:tag" references
-
-    class Config:
-        from_attributes = True
-
-
-class GlobalTagResponse(BaseModel):
-    """Tag with project/package context for global listing"""
-
-    id: UUID
-    name: str
-    artifact_id: str
-    created_at: datetime
-    created_by: str
-    project_name: str
-    package_name: str
-    artifact_size: Optional[int] = None
-    artifact_content_type: Optional[str] = None
-    version: Optional[str] = None  # Version of the artifact this tag points to

    class Config:
        from_attributes = True
@@ -402,7 +289,6 @@ class UploadResponse(BaseModel):
    size: int
    project: str
    package: str
-    tag: Optional[str]
    version: Optional[str] = None  # Version assigned to this artifact
    version_source: Optional[str] = None  # How version was determined: 'explicit', 'filename', 'metadata'
    checksum_md5: Optional[str] = None
@@ -429,7 +315,6 @@ class ResumableUploadInitRequest(BaseModel):
    filename: str
    content_type: Optional[str] = None
    size: int
-    tag: Optional[str] = None
    version: Optional[str] = None  # Explicit version (auto-detected if not provided)

    @field_validator("expected_hash")
@@ -464,7 +349,7 @@ class ResumableUploadPartResponse(BaseModel):
 class ResumableUploadCompleteRequest(BaseModel):
    """Request to complete a resumable upload"""

-    tag: Optional[str] = None
+    pass


 class ResumableUploadCompleteResponse(BaseModel):
@@ -474,7 +359,6 @@ class ResumableUploadCompleteResponse(BaseModel):
    size: int
    project: str
    package: str
-    tag: Optional[str]


 class ResumableUploadStatusResponse(BaseModel):
@@ -527,7 +411,6 @@ class PackageVersionResponse(BaseModel):
    size: Optional[int] = None
    content_type: Optional[str] = None
    original_name: Optional[str] = None
-    tags: List[str] = []  # Tag names pointing to this artifact

    class Config:
        from_attributes = True
@@ -569,11 +452,10 @@ class SearchResultPackage(BaseModel):


 class SearchResultArtifact(BaseModel):
-    """Artifact/tag result for global search"""
+    """Artifact result for global search"""

-    tag_id: UUID
-    tag_name: str
    artifact_id: str
+    version: Optional[str]
    package_id: UUID
    package_name: str
    project_name: str
@@ -611,6 +493,8 @@ class HealthResponse(BaseModel):
    version: str = "1.0.0"
    storage_healthy: Optional[bool] = None
    database_healthy: Optional[bool] = None
+    http_pool: Optional[Dict[str, Any]] = None
+    cache: Optional[Dict[str, Any]] = None


 # Garbage collection schemas
@@ -686,7 +570,7 @@ class ProjectStatsResponse(BaseModel):
    project_id: str
    project_name: str
    package_count: int
-    tag_count: int
+    version_count: int
    artifact_count: int
    total_size_bytes: int
    upload_count: int
@@ -701,7 +585,7 @@ class PackageStatsResponse(BaseModel):
    package_id: str
    package_name: str
    project_name: str
-    tag_count: int
+    version_count: int
    artifact_count: int
    total_size_bytes: int
    upload_count: int
@@ -718,9 +602,9 @@ class ArtifactStatsResponse(BaseModel):
    size: int
    ref_count: int
    storage_savings: int  # (ref_count - 1) * size
-    tags: List[Dict[str, Any]]  # Tags referencing this artifact
    projects: List[str]  # Projects using this artifact
    packages: List[str]  # Packages using this artifact
+    versions: List[Dict[str, Any]] = []  # List of {version, package_name, project_name}
    first_uploaded: Optional[datetime] = None
    last_referenced: Optional[datetime] = None

@@ -929,20 +813,7 @@ class DependencyCreate(BaseModel):
    """Schema for creating a dependency"""
    project: str
    package: str
-    version: Optional[str] = None
-    tag: Optional[str] = None
-
-    @field_validator('version', 'tag')
-    @classmethod
-    def validate_constraint(cls, v, info):
-        return v
-
-    def model_post_init(self, __context):
-        """Validate that exactly one of version or tag is set"""
-        if self.version is None and self.tag is None:
-            raise ValueError("Either 'version' or 'tag' must be specified")
-        if self.version is not None and self.tag is not None:
-            raise ValueError("Cannot specify both 'version' and 'tag'")
+    version: str


 class DependencyResponse(BaseModel):
@@ -951,8 +822,7 @@ class DependencyResponse(BaseModel):
    artifact_id: str
    project: str
    package: str
-    version: Optional[str] = None
-    tag: Optional[str] = None
+    version: str
    created_at: datetime

    class Config:
@@ -967,7 +837,6 @@ class DependencyResponse(BaseModel):
            project=dep.dependency_project,
            package=dep.dependency_package,
            version=dep.version_constraint,
-            tag=dep.tag_constraint,
            created_at=dep.created_at,
        )

@@ -984,7 +853,6 @@ class DependentInfo(BaseModel):
    project: str
    package: str
    version: Optional[str] = None
-    constraint_type: str  # 'version' or 'tag'
    constraint_value: str


@@ -1000,20 +868,7 @@ class EnsureFileDependency(BaseModel):
    """Dependency entry from orchard.ensure file"""
    project: str
    package: str
-    version: Optional[str] = None
-    tag: Optional[str] = None
-
-    @field_validator('version', 'tag')
-    @classmethod
-    def validate_constraint(cls, v, info):
-        return v
-
-    def model_post_init(self, __context):
-        """Validate that exactly one of version or tag is set"""
-        if self.version is None and self.tag is None:
-            raise ValueError("Either 'version' or 'tag' must be specified")
-        if self.version is not None and self.tag is not None:
-            raise ValueError("Cannot specify both 'version' and 'tag'")
+    version: str


 class EnsureFileContent(BaseModel):
@@ -1027,15 +882,26 @@ class ResolvedArtifact(BaseModel):
    project: str
    package: str
    version: Optional[str] = None
-    tag: Optional[str] = None
    size: int
    download_url: str


+class MissingDependency(BaseModel):
+    """A dependency that could not be resolved (not cached on server)"""
+    project: str
+    package: str
+    constraint: Optional[str] = None
+    required_by: Optional[str] = None
+    fetch_attempted: bool = False  # True if auto-fetch was attempted
+    fetch_error: Optional[str] = None  # Error message if fetch failed
+
+
 class DependencyResolutionResponse(BaseModel):
    """Response from dependency resolution endpoint"""
    requested: Dict[str, str]  # project, package, ref
    resolved: List[ResolvedArtifact]
+    missing: List[MissingDependency] = []
+    fetched: List[ResolvedArtifact] = []  # Artifacts fetched from upstream during resolution
    total_size: int
    artifact_count: int

@@ -1044,7 +910,7 @@ class DependencyConflict(BaseModel):
    """Details about a dependency conflict"""
    project: str
    package: str
-    requirements: List[Dict[str, Any]]  # version/tag and required_by info
+    requirements: List[Dict[str, Any]]  # version and required_by info


 class DependencyConflictError(BaseModel):
@@ -1378,10 +1244,10 @@ class CacheRequest(BaseModel):
    url: str
    source_type: str
    package_name: Optional[str] = None  # Auto-derived from URL if not provided
-    tag: Optional[str] = None  # Auto-derived from URL if not provided
+    version: Optional[str] = None  # Auto-derived from URL if not provided
    user_project: Optional[str] = None  # Cross-reference to user project
    user_package: Optional[str] = None
-    user_tag: Optional[str] = None
+    user_version: Optional[str] = None
    expected_hash: Optional[str] = None  # Verify downloaded content

    @field_validator('url')
@@ -1428,8 +1294,8 @@ class CacheResponse(BaseModel):
    source_name: Optional[str]
    system_project: str
    system_package: str
-    system_tag: Optional[str]
-    user_reference: Optional[str] = None  # e.g., "my-app/npm-deps:lodash-4.17.21"
+    system_version: Optional[str]
+    user_reference: Optional[str] = None  # e.g., "my-app/npm-deps/+/4.17.21"


 class CacheResolveRequest(BaseModel):
@@ -1443,7 +1309,7 @@ class CacheResolveRequest(BaseModel):
    version: str
    user_project: Optional[str] = None
    user_package: Optional[str] = None
-    user_tag: Optional[str] = None
+    user_version: Optional[str] = None

    @field_validator('source_type')
    @classmethod
--- a/backend/app/seed.py
+++ b/backend/app/seed.py
@@ -5,7 +5,7 @@ import hashlib
 import logging
 from sqlalchemy.orm import Session

-from .models import Project, Package, Artifact, Tag, Upload, PackageVersion, ArtifactDependency, Team, TeamMembership, User
+from .models import Project, Package, Artifact, Upload, PackageVersion, ArtifactDependency, Team, TeamMembership, User
 from .storage import get_storage
 from .auth import hash_password

@@ -125,14 +125,14 @@ TEST_ARTIFACTS = [
 ]

 # Dependencies to create (source artifact -> dependency)
-# Format: (source_project, source_package, source_version, dep_project, dep_package, version_constraint, tag_constraint)
+# Format: (source_project, source_package, source_version, dep_project, dep_package, version_constraint)
 TEST_DEPENDENCIES = [
    # ui-components v1.1.0 depends on design-tokens v1.0.0
-    ("frontend-libs", "ui-components", "1.1.0", "frontend-libs", "design-tokens", "1.0.0", None),
+    ("frontend-libs", "ui-components", "1.1.0", "frontend-libs", "design-tokens", "1.0.0"),
    # auth-lib v1.0.0 depends on common-utils v2.0.0
-    ("backend-services", "auth-lib", "1.0.0", "backend-services", "common-utils", "2.0.0", None),
-    # auth-lib v1.0.0 also depends on design-tokens (stable tag)
-    ("backend-services", "auth-lib", "1.0.0", "frontend-libs", "design-tokens", None, "latest"),
+    ("backend-services", "auth-lib", "1.0.0", "backend-services", "common-utils", "2.0.0"),
+    # auth-lib v1.0.0 also depends on design-tokens v1.0.0
+    ("backend-services", "auth-lib", "1.0.0", "frontend-libs", "design-tokens", "1.0.0"),
 ]


@@ -252,9 +252,8 @@ def seed_database(db: Session) -> None:

    logger.info(f"Created {len(project_map)} projects and {len(package_map)} packages (assigned to {demo_team.slug})")

-    # Create artifacts, tags, and versions
+    # Create artifacts and versions
    artifact_count = 0
-    tag_count = 0
    version_count = 0

    for artifact_data in TEST_ARTIFACTS:
@@ -316,23 +315,12 @@ def seed_database(db: Session) -> None:
            db.add(version)
            version_count += 1

-        # Create tags
-        for tag_name in artifact_data["tags"]:
-            tag = Tag(
-                package_id=package.id,
-                name=tag_name,
-                artifact_id=sha256_hash,
-                created_by=team_owner_username,
-            )
-            db.add(tag)
-            tag_count += 1
-
    db.flush()

    # Create dependencies
    dependency_count = 0
    for dep_data in TEST_DEPENDENCIES:
-        src_project, src_package, src_version, dep_project, dep_package, version_constraint, tag_constraint = dep_data
+        src_project, src_package, src_version, dep_project, dep_package, version_constraint = dep_data

        # Find the source artifact by looking up its version
        src_pkg = package_map.get((src_project, src_package))
@@ -356,11 +344,10 @@ def seed_database(db: Session) -> None:
            dependency_project=dep_project,
            dependency_package=dep_package,
            version_constraint=version_constraint,
-            tag_constraint=tag_constraint,
        )
        db.add(dependency)
        dependency_count += 1

    db.commit()
-    logger.info(f"Created {artifact_count} artifacts, {tag_count} tags, {version_count} versions, and {dependency_count} dependencies")
+    logger.info(f"Created {artifact_count} artifacts, {version_count} versions, and {dependency_count} dependencies")
    logger.info("Database seeding complete")
--- a/backend/app/services/artifact_cleanup.py
+++ b/backend/app/services/artifact_cleanup.py
@@ -6,9 +6,8 @@ from typing import List, Optional, Tuple
 from sqlalchemy.orm import Session
 import logging

-from ..models import Artifact, Tag
+from ..models import Artifact, PackageVersion
 from ..repositories.artifact import ArtifactRepository
-from ..repositories.tag import TagRepository
 from ..storage import S3Storage

 logger = logging.getLogger(__name__)
@@ -21,8 +20,8 @@ class ArtifactCleanupService:
    Reference counting rules:
    - ref_count starts at 1 when artifact is first uploaded
    - ref_count increments when the same artifact is uploaded again (deduplication)
-    - ref_count decrements when a tag is deleted or updated to point elsewhere
-    - ref_count decrements when a package is deleted (for each tag pointing to artifact)
+    - ref_count decrements when a version is deleted or updated to point elsewhere
+    - ref_count decrements when a package is deleted (for each version pointing to artifact)
    - When ref_count reaches 0, artifact is a candidate for deletion from S3
    """

@@ -30,12 +29,11 @@ class ArtifactCleanupService:
        self.db = db
        self.storage = storage
        self.artifact_repo = ArtifactRepository(db)
-        self.tag_repo = TagRepository(db)

-    def on_tag_deleted(self, artifact_id: str) -> Artifact:
+    def on_version_deleted(self, artifact_id: str) -> Artifact:
        """
-        Called when a tag is deleted.
-        Decrements ref_count for the artifact the tag was pointing to.
+        Called when a version is deleted.
+        Decrements ref_count for the artifact the version was pointing to.
        """
        artifact = self.artifact_repo.get_by_sha256(artifact_id)
        if artifact:
@@ -45,11 +43,11 @@ class ArtifactCleanupService:
            )
        return artifact

-    def on_tag_updated(
+    def on_version_updated(
        self, old_artifact_id: str, new_artifact_id: str
    ) -> Tuple[Optional[Artifact], Optional[Artifact]]:
        """
-        Called when a tag is updated to point to a different artifact.
+        Called when a version is updated to point to a different artifact.
        Decrements ref_count for old artifact, increments for new (if different).

        Returns (old_artifact, new_artifact) tuple.
@@ -79,21 +77,21 @@ class ArtifactCleanupService:
    def on_package_deleted(self, package_id) -> List[str]:
        """
        Called when a package is deleted.
-        Decrements ref_count for all artifacts that had tags in the package.
+        Decrements ref_count for all artifacts that had versions in the package.

        Returns list of artifact IDs that were affected.
        """
-        # Get all tags in the package before deletion
-        tags = self.db.query(Tag).filter(Tag.package_id == package_id).all()
+        # Get all versions in the package before deletion
+        versions = self.db.query(PackageVersion).filter(PackageVersion.package_id == package_id).all()

        affected_artifacts = []
-        for tag in tags:
-            artifact = self.artifact_repo.get_by_sha256(tag.artifact_id)
+        for version in versions:
+            artifact = self.artifact_repo.get_by_sha256(version.artifact_id)
            if artifact:
                self.artifact_repo.decrement_ref_count(artifact)
-                affected_artifacts.append(tag.artifact_id)
+                affected_artifacts.append(version.artifact_id)
                logger.info(
-                    f"Decremented ref_count for artifact {tag.artifact_id} (package delete)"
+                    f"Decremented ref_count for artifact {version.artifact_id} (package delete)"
                )

        return affected_artifacts
@@ -152,7 +150,7 @@ class ArtifactCleanupService:

    def verify_ref_counts(self, fix: bool = False) -> List[dict]:
        """
-        Verify that ref_counts match actual tag references.
+        Verify that ref_counts match actual version references.

        Args:
            fix: If True, fix any mismatched ref_counts
@@ -162,28 +160,28 @@ class ArtifactCleanupService:
        """
        from sqlalchemy import func

-        # Get actual tag counts per artifact
-        tag_counts = (
-            self.db.query(Tag.artifact_id, func.count(Tag.id).label("tag_count"))
-            .group_by(Tag.artifact_id)
+        # Get actual version counts per artifact
+        version_counts = (
+            self.db.query(PackageVersion.artifact_id, func.count(PackageVersion.id).label("version_count"))
+            .group_by(PackageVersion.artifact_id)
            .all()
        )
-        tag_count_map = {artifact_id: count for artifact_id, count in tag_counts}
+        version_count_map = {artifact_id: count for artifact_id, count in version_counts}

        # Check all artifacts
        artifacts = self.db.query(Artifact).all()
        mismatches = []

        for artifact in artifacts:
-            actual_count = tag_count_map.get(artifact.id, 0)
+            actual_count = version_count_map.get(artifact.id, 0)
            # ref_count should be at least 1 (initial upload) + additional uploads
-            # But tags are the primary reference, so we check against tag count
+            # But versions are the primary reference, so we check against version count

            if artifact.ref_count < actual_count:
                mismatch = {
                    "artifact_id": artifact.id,
                    "stored_ref_count": artifact.ref_count,
-                    "actual_tag_count": actual_count,
+                    "actual_version_count": actual_count,
                }
                mismatches.append(mismatch)