Add robust PyPI dependency caching with task queue

Replace unbounded thread spawning with managed worker pool: - New pypi_cache_tasks table tracks caching jobs - Thread pool with 5 workers (configurable via ORCHARD_PYPI_CACHE_WORKERS) - Automatic retries with exponential backoff (30s, 60s, then fail) - Deduplication to prevent duplicate caching attempts New API endpoints for visibility and control: - GET /pypi/cache/status - queue health summary - GET /pypi/cache/failed - list failed tasks with errors - POST /pypi/cache/retry/{package} - retry single package - POST /pypi/cache/retry-all - retry all failed packages This fixes silent failures in background dependency caching where packages would fail to cache without any tracking or retry mechanism.
Add design doc for PyPI cache robustness improvements
2026-02-02 11:16:02 -06:00 · 2026-02-02 11:06:51 -06:00 · 2026-01-30 18:59:31 -06:00 · 2026-01-30 18:43:09 -06:00 · 2026-01-30 18:25:30 -06:00 · 2026-01-30 18:11:08 -06:00
40 changed files with 10996 additions and 734 deletions
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -11,13 +11,6 @@ variables:
  # Environment URLs (used by deploy and test jobs)
  STAGE_URL: https://orchard-stage.common.global.bsf.tools
  PROD_URL: https://orchard.common.global.bsf.tools
-  # Stage environment AWS resources (used by reset job)
-  STAGE_RDS_HOST: orchard-stage.cluster-cvw3jzjkozoc.us-gov-west-1.rds.amazonaws.com
-  STAGE_RDS_DBNAME: postgres
-  STAGE_SECRET_ARN: "arn:aws-us-gov:secretsmanager:us-gov-west-1:052673043337:secret:rds!cluster-a573672b-1a38-4665-a654-1b7df37b5297-IaeFQL"
-  STAGE_AUTH_SECRET_ARN: "arn:aws-us-gov:secretsmanager:us-gov-west-1:052673043337:secret:orchard-stage-creds-SMqvQx"
-  STAGE_S3_BUCKET: orchard-artifacts-stage
-  AWS_REGION: us-gov-west-1
  # Shared pip cache directory
  PIP_CACHE_DIR: "$CI_PROJECT_DIR/.pip-cache"

@@ -95,10 +88,18 @@ cve_sbom_analysis:
      when: never
    - when: on_success

-# Override release job to wait for stage integration tests before creating tag
+# Disable prosper_setup for tag pipelines since no build/analysis jobs run
+# (image is already built when commit was on main, and deploy uses helm directly)
+prosper_setup:
+  rules:
+    - if: '$CI_COMMIT_TAG'
+      when: never
+    - when: on_success
+
+# Override release job to wait for stage deployment and smoke tests before creating tag
 # This ensures the tag (which triggers prod deploy) is only created after stage passes
 release:
-  needs: [integration_test_stage, changelog]
+  needs: [smoke_test_stage, changelog]

 # Full integration test suite template (for feature/stage deployments)
 # Runs the complete pytest integration test suite against the deployed environment
@@ -200,108 +201,6 @@ release:
          sys.exit(0)
      PYTEST_SCRIPT

-# Reset stage template - runs from CI runner, uses CI variable for auth
-# Calls the /api/v1/admin/factory-reset endpoint which handles DB and S3 cleanup
-.reset_stage_template: &reset_stage_template
-  stage: deploy
-  image: deps.global.bsf.tools/docker/python:3.12-slim
-  timeout: 5m
-  retry: 1
-  before_script:
-    - pip install --index-url "$PIP_INDEX_URL" httpx
-  script:
-    - |
-      python - <<'RESET_SCRIPT'
-      import httpx
-      import sys
-      import os
-      import time
-
-      BASE_URL = os.environ.get("STAGE_URL", "")
-      ADMIN_USER = "admin"
-      ADMIN_PASS = os.environ.get("STAGE_ADMIN_PASSWORD", "")
-      MAX_RETRIES = 3
-      RETRY_DELAY = 5
-
-      if not BASE_URL:
-          print("ERROR: STAGE_URL not set")
-          sys.exit(1)
-
-      if not ADMIN_PASS:
-          print("ERROR: STAGE_ADMIN_PASSWORD not set")
-          sys.exit(1)
-
-      print(f"=== Resetting stage environment at {BASE_URL} ===")
-
-      def do_reset():
-          with httpx.Client(base_url=BASE_URL, timeout=120.0) as client:
-              print("Logging in as admin...")
-              login_response = client.post(
-                  "/api/v1/auth/login",
-                  json={"username": ADMIN_USER, "password": ADMIN_PASS},
-              )
-              if login_response.status_code != 200:
-                  raise Exception(f"Login failed: {login_response.status_code} - {login_response.text}")
-              print("Login successful")
-
-              print("Calling factory reset endpoint...")
-              reset_response = client.post(
-                  "/api/v1/admin/factory-reset",
-                  headers={"X-Confirm-Reset": "yes-delete-all-data"},
-              )
-
-              if reset_response.status_code == 200:
-                  result = reset_response.json()
-                  print("Factory reset successful!")
-                  print(f"  Database tables dropped: {result['results']['database_tables_dropped']}")
-                  print(f"  S3 objects deleted: {result['results']['s3_objects_deleted']}")
-                  print(f"  Database reinitialized: {result['results']['database_reinitialized']}")
-                  print(f"  Seeded: {result['results']['seeded']}")
-                  return True
-              else:
-                  raise Exception(f"Factory reset failed: {reset_response.status_code} - {reset_response.text}")
-
-      for attempt in range(1, MAX_RETRIES + 1):
-          try:
-              print(f"Attempt {attempt}/{MAX_RETRIES}")
-              if do_reset():
-                  sys.exit(0)
-          except Exception as e:
-              print(f"Attempt {attempt} failed: {e}")
-              if attempt < MAX_RETRIES:
-                  print(f"Retrying in {RETRY_DELAY} seconds...")
-                  time.sleep(RETRY_DELAY)
-              else:
-                  print("All retry attempts failed")
-                  sys.exit(1)
-      RESET_SCRIPT
-  rules:
-    - if: '$CI_COMMIT_BRANCH == "main"'
-      when: on_success
-
-# Reset stage BEFORE integration tests (ensure known state)
-reset_stage_pre:
-  <<: *reset_stage_template
-  needs: [deploy_stage]
-
-# Integration tests for stage deployment
-# Uses CI variable STAGE_ADMIN_PASSWORD (set in GitLab CI/CD settings)
-integration_test_stage:
-  <<: *integration_test_template
-  needs: [reset_stage_pre]
-  variables:
-    ORCHARD_TEST_URL: $STAGE_URL
-    ORCHARD_TEST_PASSWORD: $STAGE_ADMIN_PASSWORD
-  rules:
-    - if: '$CI_COMMIT_BRANCH == "main"'
-      when: on_success
-
-# Reset stage AFTER integration tests (clean slate for next run)
-reset_stage:
-  <<: *reset_stage_template
-  needs: [integration_test_stage]
-  allow_failure: true  # Don't fail pipeline if reset has issues
-
 # Integration tests for feature deployment (full suite)
 # Uses DEV_ADMIN_PASSWORD CI variable (same as deploy_feature)
 integration_test_feature:
@@ -412,9 +311,88 @@ frontend_tests:
  echo "Health check failed after 30 attempts"
  exit 1

-# Deploy to stage (main branch)
-deploy_stage:
+# Ephemeral test deployment in stage namespace (main branch only)
+# Runs integration tests before promoting to long-running stage
+deploy_test:
  <<: *deploy_template
+  variables:
+    NAMESPACE: orch-stage-namespace
+    VALUES_FILE: helm/orchard/values-dev.yaml
+    BASE_URL: https://orchard-test.common.global.bsf.tools
+  before_script:
+    - kubectl config use-context esv/bsf/bsf-integration/orchard/orchard-mvp:orchard-stage
+    - *helm_setup
+  script:
+    - echo "Deploying ephemeral test environment"
+    - cd $CI_PROJECT_DIR
+    - |
+      helm upgrade --install orchard-test ./helm/orchard \
+        --namespace $NAMESPACE \
+        -f $VALUES_FILE \
+        --set image.tag=git.linux-amd64-$CI_COMMIT_SHA \
+        --set orchard.auth.adminPassword=$STAGE_ADMIN_PASSWORD \
+        --set ingress.hosts[0].host=orchard-test.common.global.bsf.tools \
+        --set ingress.tls[0].hosts[0]=orchard-test.common.global.bsf.tools \
+        --set ingress.tls[0].secretName=orchard-test-tls \
+        --set minioIngress.host=minio-test.common.global.bsf.tools \
+        --set minioIngress.tls.secretName=minio-test-tls \
+        --wait \
+        --atomic \
+        --timeout 10m
+    - kubectl rollout status deployment/orchard-test-server -n $NAMESPACE --timeout=10m
+    - *verify_deployment
+  environment:
+    name: test
+    url: https://orchard-test.common.global.bsf.tools
+    on_stop: cleanup_test
+    kubernetes:
+      agent: esv/bsf/bsf-integration/orchard/orchard-mvp:orchard-stage
+  rules:
+    - if: '$CI_COMMIT_BRANCH == "main"'
+      when: on_success
+
+# Integration tests for ephemeral test deployment (main branch)
+# Runs against orchard-test before promoting to long-running stage
+integration_test_main:
+  <<: *integration_test_template
+  needs: [deploy_test]
+  variables:
+    ORCHARD_TEST_URL: https://orchard-test.common.global.bsf.tools
+    ORCHARD_TEST_PASSWORD: $STAGE_ADMIN_PASSWORD
+  rules:
+    - if: '$CI_COMMIT_BRANCH == "main"'
+      when: on_success
+
+# Cleanup ephemeral test deployment after integration tests
+cleanup_test:
+  stage: deploy
+  needs: [integration_test_main]
+  image: deps.global.bsf.tools/registry-1.docker.io/alpine/k8s:1.29.12
+  timeout: 5m
+  variables:
+    NAMESPACE: orch-stage-namespace
+    GIT_STRATEGY: none
+  before_script:
+    - kubectl config use-context esv/bsf/bsf-integration/orchard/orchard-mvp:orchard-stage
+  script:
+    - echo "Cleaning up ephemeral test deployment orchard-test"
+    - helm uninstall orchard-test --namespace $NAMESPACE || true
+  environment:
+    name: test
+    action: stop
+    kubernetes:
+      agent: esv/bsf/bsf-integration/orchard/orchard-mvp:orchard-stage
+  rules:
+    - if: '$CI_COMMIT_BRANCH == "main"'
+      when: on_success
+  allow_failure: true
+
+# Deploy to long-running stage (main branch, after ephemeral tests pass)
+deploy_stage:
+  stage: deploy
+  # Wait for ephemeral test to pass before promoting to long-running stage
+  needs: [cleanup_test]
+  image: deps.global.bsf.tools/registry-1.docker.io/alpine/k8s:1.29.12
  variables:
    NAMESPACE: orch-stage-namespace
    VALUES_FILE: helm/orchard/values-stage.yaml
@@ -423,7 +401,7 @@ deploy_stage:
    - kubectl config use-context esv/bsf/bsf-integration/orchard/orchard-mvp:orchard-stage
    - *helm_setup
  script:
-    - echo "Deploying to stage environment"
+    - echo "Deploying to long-running stage environment"
    - cd $CI_PROJECT_DIR
    - |
      helm upgrade --install orchard-stage ./helm/orchard \
@@ -445,6 +423,16 @@ deploy_stage:
    - if: '$CI_COMMIT_BRANCH == "main"'
      when: on_success

+# Smoke test for long-running stage (after promotion)
+smoke_test_stage:
+  <<: *smoke_test_template
+  needs: [deploy_stage]
+  variables:
+    ORCHARD_TEST_URL: $STAGE_URL
+  rules:
+    - if: '$CI_COMMIT_BRANCH == "main"'
+      when: on_success
+
 # Deploy feature branch to dev namespace
 deploy_feature:
  <<: *deploy_template
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,6 +7,113 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

 ## [Unreleased]
 ### Added
+- Added transparent PyPI proxy implementing PEP 503 Simple API (#108)
+  - `GET /pypi/simple/` - package index (proxied from upstream)
+  - `GET /pypi/simple/{package}/` - version list with rewritten download links
+  - `GET /pypi/simple/{package}/{filename}` - download with automatic caching
+  - Allows `pip install --index-url https://orchard.../pypi/simple/ <package>`
+  - Artifacts cached on first access through configured upstream sources
+- Added `POST /api/v1/cache/resolve` endpoint to cache packages by coordinates instead of URL (#108)
+
+### Changed
+- Upstream sources table text is now centered under column headers (#108)
+- ENV badge now appears inline with source name instead of separate column (#108)
+- Test and Edit buttons now have more prominent button styling (#108)
+- Reduced footer padding for cleaner layout (#108)
+
+### Fixed
+- Fixed purge_seed_data crash when deleting access permissions - was comparing UUID to VARCHAR column (#107)
+
+### Changed
+- Upstream source connectivity test no longer follows redirects, fixing "Exceeded maximum allowed redirects" error with Artifactory proxies (#107)
+- Test runs automatically after saving a new or updated upstream source (#107)
+- Test status now shows as colored dots (green=success, red=error) instead of text badges (#107)
+- Clicking red dot shows error details in a modal (#107)
+- Source name column no longer wraps text for better table layout (#107)
+- Renamed "Cache Management" page to "Upstream Sources" (#107)
+- Moved Delete button from table row to edit modal for cleaner table layout (#107)
+
+### Removed
+- Removed `is_public` field from upstream sources - all sources are now treated as internal/private (#107)
+- Removed `allow_public_internet` (air-gap mode) setting from cache settings - not needed for enterprise proxy use case (#107)
+- Removed seeding of public registry URLs (npm-public, pypi-public, maven-central, docker-hub) (#107)
+- Removed "Public" badge and checkbox from upstream sources UI (#107)
+- Removed "Allow Public Internet" toggle from cache settings UI (#107)
+- Removed "Global Settings" section from cache management UI - auto-create system projects is always enabled (#107)
+- Removed unused CacheSettings frontend types and API functions (#107)
+
+### Added
+- Added `ORCHARD_PURGE_SEED_DATA` environment variable support to stage helm values to remove seed data from long-running deployments (#107)
+- Added frontend system projects visual distinction (#105)
+  - "Cache" badge for system projects in project list
+  - "System Cache" badge on project detail page
+  - Added `is_system` field to Project type
+- Added frontend admin page for upstream sources and cache settings (#75)
+  - New `/admin/cache` page accessible from user menu (admin only)
+  - Upstream sources table with create/edit/delete/test connectivity
+  - Cache settings section with air-gap mode and auto-create system projects toggles
+  - Visual indicators for env-defined sources (locked, cannot be modified)
+  - Environment variable override badges when settings are overridden
+  - API client functions for all cache admin operations
+- Added environment variable overrides for cache configuration (#74)
+  - `ORCHARD_CACHE_ALLOW_PUBLIC_INTERNET` - Override allow_public_internet (air-gap mode)
+  - `ORCHARD_CACHE_AUTO_CREATE_SYSTEM_PROJECTS` - Override auto_create_system_projects
+  - `ORCHARD_UPSTREAM__{NAME}__*` - Define upstream sources via env vars
+  - Env-defined sources appear in API with `source: "env"` marker
+  - Env-defined sources cannot be modified/deleted via API (400 error)
+  - Cache settings response includes `*_env_override` fields when overridden
+  - 7 unit tests for env var parsing and configuration
+- Added Global Cache Settings Admin API (#73)
+  - `GET /api/v1/admin/cache-settings` - Retrieve current cache settings
+  - `PUT /api/v1/admin/cache-settings` - Update cache settings (partial updates)
+  - Admin-only access with audit logging
+  - Controls `allow_public_internet` (air-gap mode) and `auto_create_system_projects`
+  - 7 integration tests for settings management
+- Added Upstream Sources Admin API for managing cache sources (#72)
+  - `GET /api/v1/admin/upstream-sources` - List sources with filtering
+  - `POST /api/v1/admin/upstream-sources` - Create source with auth configuration
+  - `GET /api/v1/admin/upstream-sources/{id}` - Get source details
+  - `PUT /api/v1/admin/upstream-sources/{id}` - Update source (partial updates)
+  - `DELETE /api/v1/admin/upstream-sources/{id}` - Delete source
+  - `POST /api/v1/admin/upstream-sources/{id}/test` - Test connectivity
+  - Admin-only access with audit logging
+  - Credentials never exposed (only has_password/has_headers flags)
+  - 13 integration tests for all CRUD operations
+- Added system project restrictions and management (#71)
+  - System projects (`_npm`, `_pypi`, etc.) cannot be deleted (returns 403)
+  - System projects cannot be made private (must remain public)
+  - `GET /api/v1/system-projects` endpoint to list all system cache projects
+  - 5 integration tests for system project restrictions
+- Added Cache API endpoint for fetching and storing artifacts from upstream URLs (#70)
+  - `POST /api/v1/cache` endpoint to cache artifacts from upstream registries
+  - URL parsing helpers to extract package name/version from npm, PyPI, Maven URLs
+  - Automatic system project creation (`_npm`, `_pypi`, `_maven`, etc.)
+  - URL-to-artifact provenance tracking via `cached_urls` table
+  - Optional user project cross-referencing for custom organization
+  - Cache hit returns existing artifact without re-fetching
+  - Air-gap mode enforcement (blocks public URLs when disabled)
+  - Hash verification for downloaded artifacts
+  - 21 unit tests for URL parsing and cache endpoint
+- Added HTTP client for fetching artifacts from upstream sources (#69)
+  - `UpstreamClient` class in `backend/app/upstream.py` with streaming downloads
+  - SHA256 hash computation while streaming (doesn't load large files into memory)
+  - Auth support: none, basic auth, bearer token, API key (custom headers)
+  - URL-to-source matching by URL prefix with priority ordering
+  - Configuration options: timeouts, retries with exponential backoff, redirect limits, max file size
+  - Air-gap mode enforcement via `allow_public_internet` setting
+  - Response header capture for provenance tracking
+  - Proper error handling with custom exception types
+  - Connection test method for upstream source validation
+  - 33 unit tests for client functionality
+- Added upstream artifact caching schema for hermetic builds (#68)
+  - `upstream_sources` table for configuring upstream registries (npm, PyPI, Maven, etc.)
+  - `cache_settings` table for global settings including air-gap mode
+  - `cached_urls` table for URL-to-artifact provenance tracking
+  - `is_system` column on projects for system cache projects (_npm, _pypi, etc.)
+  - Support for multiple auth types: none, basic auth, bearer token, API key
+  - Fernet encryption for credentials using `ORCHARD_CACHE_ENCRYPTION_KEY`
+  - Default upstream sources seeded (npm-public, pypi-public, maven-central, docker-hub) - disabled by default
+  - Migration `010_upstream_caching.sql`
 - Added team-based multi-tenancy for organizing projects and collaboration (#88-#104)
  - Teams serve as organizational containers for projects
  - Users can belong to multiple teams with different roles (owner, admin, member)
--- a/backend/app/cache.py
+++ b/backend/app/cache.py
@@ -0,0 +1,316 @@
+"""
+Cache service for upstream artifact caching.
+
+Provides URL parsing, system project management, and caching logic
+for the upstream caching feature.
+"""
+
+import logging
+import re
+from dataclasses import dataclass
+from typing import Optional
+from urllib.parse import urlparse, unquote
+
+logger = logging.getLogger(__name__)
+
+
+# System project names for each source type
+SYSTEM_PROJECT_NAMES = {
+    "npm": "_npm",
+    "pypi": "_pypi",
+    "maven": "_maven",
+    "docker": "_docker",
+    "helm": "_helm",
+    "nuget": "_nuget",
+    "deb": "_deb",
+    "rpm": "_rpm",
+    "generic": "_generic",
+}
+
+# System project descriptions
+SYSTEM_PROJECT_DESCRIPTIONS = {
+    "npm": "System cache for npm packages",
+    "pypi": "System cache for PyPI packages",
+    "maven": "System cache for Maven packages",
+    "docker": "System cache for Docker images",
+    "helm": "System cache for Helm charts",
+    "nuget": "System cache for NuGet packages",
+    "deb": "System cache for Debian packages",
+    "rpm": "System cache for RPM packages",
+    "generic": "System cache for generic artifacts",
+}
+
+
+@dataclass
+class ParsedUrl:
+    """Parsed URL information for caching."""
+
+    package_name: str
+    version: Optional[str] = None
+    filename: Optional[str] = None
+
+
+def parse_npm_url(url: str) -> Optional[ParsedUrl]:
+    """
+    Parse npm registry URL to extract package name and version.
+
+    Formats:
+    - https://registry.npmjs.org/{package}/-/{package}-{version}.tgz
+    - https://registry.npmjs.org/@{scope}/{package}/-/{package}-{version}.tgz
+
+    Examples:
+    - https://registry.npmjs.org/lodash/-/lodash-4.17.21.tgz
+    - https://registry.npmjs.org/@types/node/-/node-18.0.0.tgz
+    """
+    parsed = urlparse(url)
+    path = unquote(parsed.path)
+
+    # Pattern for scoped packages: /@scope/package/-/package-version.tgz
+    scoped_pattern = r"^/@([^/]+)/([^/]+)/-/\2-(.+)\.tgz$"
+    match = re.match(scoped_pattern, path)
+    if match:
+        scope, name, version = match.groups()
+        return ParsedUrl(
+            package_name=f"@{scope}/{name}",
+            version=version,
+            filename=f"{name}-{version}.tgz",
+        )
+
+    # Pattern for unscoped packages: /package/-/package-version.tgz
+    unscoped_pattern = r"^/([^/@]+)/-/\1-(.+)\.tgz$"
+    match = re.match(unscoped_pattern, path)
+    if match:
+        name, version = match.groups()
+        return ParsedUrl(
+            package_name=name,
+            version=version,
+            filename=f"{name}-{version}.tgz",
+        )
+
+    return None
+
+
+def parse_pypi_url(url: str) -> Optional[ParsedUrl]:
+    """
+    Parse PyPI URL to extract package name and version.
+
+    Formats:
+    - https://files.pythonhosted.org/packages/.../package-version.tar.gz
+    - https://files.pythonhosted.org/packages/.../package-version-py3-none-any.whl
+    - https://pypi.org/packages/.../package-version.tar.gz
+
+    Examples:
+    - https://files.pythonhosted.org/packages/ab/cd/requests-2.28.0.tar.gz
+    - https://files.pythonhosted.org/packages/ab/cd/requests-2.28.0-py3-none-any.whl
+    """
+    parsed = urlparse(url)
+    path = unquote(parsed.path)
+
+    # Get the filename from the path
+    filename = path.split("/")[-1]
+    if not filename:
+        return None
+
+    # Handle wheel files: package-version-py3-none-any.whl
+    wheel_pattern = r"^([a-zA-Z0-9_-]+)-(\d+[^-]*)-.*\.whl$"
+    match = re.match(wheel_pattern, filename)
+    if match:
+        name, version = match.groups()
+        # Normalize package name (PyPI uses underscores internally)
+        name = name.replace("_", "-").lower()
+        return ParsedUrl(
+            package_name=name,
+            version=version,
+            filename=filename,
+        )
+
+    # Handle source distributions: package-version.tar.gz or package-version.zip
+    sdist_pattern = r"^([a-zA-Z0-9_-]+)-(\d+(?:\.\d+)*(?:[a-zA-Z0-9_.+-]*)?)(?:\.tar\.gz|\.zip|\.tar\.bz2)$"
+    match = re.match(sdist_pattern, filename)
+    if match:
+        name, version = match.groups()
+        name = name.replace("_", "-").lower()
+        return ParsedUrl(
+            package_name=name,
+            version=version,
+            filename=filename,
+        )
+
+    return None
+
+
+def parse_maven_url(url: str) -> Optional[ParsedUrl]:
+    """
+    Parse Maven repository URL to extract artifact info.
+
+    Format:
+    - https://repo1.maven.org/maven2/{group}/{artifact}/{version}/{artifact}-{version}.jar
+
+    Examples:
+    - https://repo1.maven.org/maven2/org/apache/commons/commons-lang3/3.12.0/commons-lang3-3.12.0.jar
+    - https://repo1.maven.org/maven2/com/google/guava/guava/31.1-jre/guava-31.1-jre.jar
+    """
+    parsed = urlparse(url)
+    path = unquote(parsed.path)
+
+    # Find /maven2/ or similar repository path
+    maven2_idx = path.find("/maven2/")
+    if maven2_idx >= 0:
+        path = path[maven2_idx + 8:]  # Remove /maven2/
+    elif path.startswith("/"):
+        path = path[1:]
+
+    parts = path.split("/")
+    if len(parts) < 4:
+        return None
+
+    # Last part is filename, before that is version, before that is artifact
+    filename = parts[-1]
+    version = parts[-2]
+    artifact = parts[-3]
+    group = ".".join(parts[:-3])
+
+    # Verify filename matches expected pattern
+    if not filename.startswith(f"{artifact}-{version}"):
+        return None
+
+    return ParsedUrl(
+        package_name=f"{group}:{artifact}",
+        version=version,
+        filename=filename,
+    )
+
+
+def parse_docker_url(url: str) -> Optional[ParsedUrl]:
+    """
+    Parse Docker registry URL to extract image info.
+
+    Note: Docker registries are more complex (manifests, blobs, etc.)
+    This handles basic blob/manifest URLs.
+
+    Examples:
+    - https://registry-1.docker.io/v2/library/nginx/blobs/sha256:abc123
+    - https://registry-1.docker.io/v2/myuser/myimage/manifests/latest
+    """
+    parsed = urlparse(url)
+    path = unquote(parsed.path)
+
+    # Pattern: /v2/{namespace}/{image}/blobs/{digest} or /manifests/{tag}
+    pattern = r"^/v2/([^/]+(?:/[^/]+)?)/([^/]+)/(blobs|manifests)/(.+)$"
+    match = re.match(pattern, path)
+    if match:
+        namespace, image, artifact_type, reference = match.groups()
+        if namespace == "library":
+            package_name = image
+        else:
+            package_name = f"{namespace}/{image}"
+
+        # For manifests, the reference is the tag
+        version = reference if artifact_type == "manifests" else None
+
+        return ParsedUrl(
+            package_name=package_name,
+            version=version,
+            filename=f"{image}-{reference}" if version else reference,
+        )
+
+    return None
+
+
+def parse_generic_url(url: str) -> ParsedUrl:
+    """
+    Parse a generic URL to extract filename.
+
+    Attempts to extract meaningful package name and version from filename.
+
+    Examples:
+    - https://example.com/downloads/myapp-1.2.3.tar.gz
+    - https://github.com/user/repo/releases/download/v1.0/release.zip
+    """
+    parsed = urlparse(url)
+    path = unquote(parsed.path)
+    filename = path.split("/")[-1] or "artifact"
+
+    # List of known compound and simple extensions
+    known_extensions = [
+        ".tar.gz", ".tar.bz2", ".tar.xz",
+        ".zip", ".tgz", ".gz", ".jar", ".war", ".deb", ".rpm"
+    ]
+
+    # Strip extension from filename first
+    base_name = filename
+    matched_ext = None
+    for ext in known_extensions:
+        if filename.endswith(ext):
+            base_name = filename[:-len(ext)]
+            matched_ext = ext
+            break
+
+    if matched_ext is None:
+        # Unknown extension, return filename as package name
+        return ParsedUrl(
+            package_name=filename,
+            version=None,
+            filename=filename,
+        )
+
+    # Try to extract version from base_name
+    # Pattern: name-version or name_version
+    # Version starts with digit(s) and can include dots, dashes, and alphanumeric suffixes
+    version_pattern = r"^(.+?)[-_](v?\d+(?:\.\d+)*(?:[-_][a-zA-Z0-9]+)?)$"
+    match = re.match(version_pattern, base_name)
+    if match:
+        name, version = match.groups()
+        return ParsedUrl(
+            package_name=name,
+            version=version,
+            filename=filename,
+        )
+
+    # No version found, use base_name as package name
+    return ParsedUrl(
+        package_name=base_name,
+        version=None,
+        filename=filename,
+    )
+
+
+def parse_url(url: str, source_type: str) -> ParsedUrl:
+    """
+    Parse URL to extract package name and version based on source type.
+
+    Args:
+        url: The URL to parse.
+        source_type: The source type (npm, pypi, maven, docker, etc.)
+
+    Returns:
+        ParsedUrl with extracted information.
+    """
+    parsed = None
+
+    if source_type == "npm":
+        parsed = parse_npm_url(url)
+    elif source_type == "pypi":
+        parsed = parse_pypi_url(url)
+    elif source_type == "maven":
+        parsed = parse_maven_url(url)
+    elif source_type == "docker":
+        parsed = parse_docker_url(url)
+
+    # Fall back to generic parsing if type-specific parsing fails
+    if parsed is None:
+        parsed = parse_generic_url(url)
+
+    return parsed
+
+
+def get_system_project_name(source_type: str) -> str:
+    """Get the system project name for a source type."""
+    return SYSTEM_PROJECT_NAMES.get(source_type, "_generic")
+
+
+def get_system_project_description(source_type: str) -> str:
+    """Get the system project description for a source type."""
+    return SYSTEM_PROJECT_DESCRIPTIONS.get(
+        source_type, "System cache for artifacts"
+    )
--- a/backend/app/config.py
+++ b/backend/app/config.py
@@ -1,5 +1,8 @@
 from pydantic_settings import BaseSettings
 from functools import lru_cache
+from typing import Optional
+import os
+import re


 class Settings(BaseSettings):
@@ -56,6 +59,16 @@ class Settings(BaseSettings):
    # Initial admin user settings
    admin_password: str = ""  # Initial admin password (if empty, uses 'changeme123')

+    # Cache settings
+    cache_encryption_key: str = ""  # Fernet key for encrypting upstream credentials (auto-generated if empty)
+    # Global cache settings override (None = use DB value, True/False = override DB)
+    cache_auto_create_system_projects: Optional[bool] = None  # Override auto_create_system_projects
+
+    # PyPI Cache Worker settings
+    pypi_cache_workers: int = 5  # Number of concurrent cache workers
+    pypi_cache_max_depth: int = 10  # Maximum recursion depth for dependency caching
+    pypi_cache_max_attempts: int = 3  # Maximum retry attempts for failed cache tasks
+
    # JWT Authentication settings (optional, for external identity providers)
    jwt_enabled: bool = False  # Enable JWT token validation
    jwt_secret: str = ""  # Secret key for HS256, or leave empty for RS256 with JWKS
@@ -80,6 +93,24 @@ class Settings(BaseSettings):
    def is_production(self) -> bool:
        return self.env.lower() == "production"

+    @property
+    def PORT(self) -> int:
+        """Alias for server_port for compatibility."""
+        return self.server_port
+
+    # Uppercase aliases for PyPI cache settings (for backward compatibility)
+    @property
+    def PYPI_CACHE_WORKERS(self) -> int:
+        return self.pypi_cache_workers
+
+    @property
+    def PYPI_CACHE_MAX_DEPTH(self) -> int:
+        return self.pypi_cache_max_depth
+
+    @property
+    def PYPI_CACHE_MAX_ATTEMPTS(self) -> int:
+        return self.pypi_cache_max_attempts
+
    class Config:
        env_prefix = "ORCHARD_"
        case_sensitive = False
@@ -88,3 +119,110 @@ class Settings(BaseSettings):
@lru_cache()
 def get_settings() -> Settings:
    return Settings()
+
+
+class EnvUpstreamSource:
+    """Represents an upstream source defined via environment variables."""
+
+    def __init__(
+        self,
+        name: str,
+        url: str,
+        source_type: str = "generic",
+        enabled: bool = True,
+        auth_type: str = "none",
+        username: Optional[str] = None,
+        password: Optional[str] = None,
+        priority: int = 100,
+    ):
+        self.name = name
+        self.url = url
+        self.source_type = source_type
+        self.enabled = enabled
+        self.auth_type = auth_type
+        self.username = username
+        self.password = password
+        self.priority = priority
+        self.source = "env"  # Mark as env-defined
+
+
+def parse_upstream_sources_from_env() -> list[EnvUpstreamSource]:
+    """
+    Parse upstream sources from environment variables.
+
+    Uses double underscore (__) as separator to allow source names with single underscores.
+    Pattern: ORCHARD_UPSTREAM__{NAME}__FIELD
+
+    Example:
+        ORCHARD_UPSTREAM__NPM_PRIVATE__URL=https://npm.corp.com
+        ORCHARD_UPSTREAM__NPM_PRIVATE__TYPE=npm
+        ORCHARD_UPSTREAM__NPM_PRIVATE__ENABLED=true
+        ORCHARD_UPSTREAM__NPM_PRIVATE__AUTH_TYPE=basic
+        ORCHARD_UPSTREAM__NPM_PRIVATE__USERNAME=reader
+        ORCHARD_UPSTREAM__NPM_PRIVATE__PASSWORD=secret
+
+    Returns:
+        List of EnvUpstreamSource objects parsed from environment variables.
+    """
+    # Pattern: ORCHARD_UPSTREAM__{NAME}__{FIELD}
+    pattern = re.compile(r"^ORCHARD_UPSTREAM__([A-Z0-9_]+)__([A-Z_]+)$", re.IGNORECASE)
+
+    # Collect all env vars matching the pattern, grouped by source name
+    sources_data: dict[str, dict[str, str]] = {}
+
+    for key, value in os.environ.items():
+        match = pattern.match(key)
+        if match:
+            source_name = match.group(1).lower()  # Normalize to lowercase
+            field = match.group(2).upper()
+            if source_name not in sources_data:
+                sources_data[source_name] = {}
+            sources_data[source_name][field] = value
+
+    # Build source objects from collected data
+    sources: list[EnvUpstreamSource] = []
+
+    for name, data in sources_data.items():
+        # URL is required
+        url = data.get("URL")
+        if not url:
+            continue  # Skip sources without URL
+
+        # Parse boolean fields
+        def parse_bool(val: Optional[str], default: bool) -> bool:
+            if val is None:
+                return default
+            return val.lower() in ("true", "1", "yes", "on")
+
+        # Parse integer fields
+        def parse_int(val: Optional[str], default: int) -> int:
+            if val is None:
+                return default
+            try:
+                return int(val)
+            except ValueError:
+                return default
+
+        source = EnvUpstreamSource(
+            name=name.replace("_", "-"),  # Convert underscores to hyphens for readability
+            url=url,
+            source_type=data.get("TYPE", "generic").lower(),
+            enabled=parse_bool(data.get("ENABLED"), True),
+            auth_type=data.get("AUTH_TYPE", "none").lower(),
+            username=data.get("USERNAME"),
+            password=data.get("PASSWORD"),
+            priority=parse_int(data.get("PRIORITY"), 100),
+        )
+        sources.append(source)
+
+    return sources
+
+
+@lru_cache()
+def get_env_upstream_sources() -> tuple[EnvUpstreamSource, ...]:
+    """
+    Get cached list of upstream sources from environment variables.
+
+    Returns a tuple for hashability (required by lru_cache).
+    """
+    return tuple(parse_upstream_sources_from_env())
--- a/backend/app/database.py
+++ b/backend/app/database.py
@@ -1,17 +1,34 @@
 from sqlalchemy import create_engine, text, event
 from sqlalchemy.orm import sessionmaker, Session
 from sqlalchemy.pool import QueuePool
-from typing import Generator
+from typing import Generator, NamedTuple
 from contextlib import contextmanager
 import logging
 import time
+import hashlib

 from .config import get_settings
 from .models import Base
+from .purge_seed_data import should_purge_seed_data, purge_seed_data

 settings = get_settings()
 logger = logging.getLogger(__name__)

+
+class Migration(NamedTuple):
+    """A database migration with a unique name and SQL to execute."""
+    name: str
+    sql: str
+
+
+# PostgreSQL error codes that indicate "already exists" - safe to skip
+SAFE_PG_ERROR_CODES = {
+    "42P07",  # duplicate_table
+    "42701",  # duplicate_column
+    "42710",  # duplicate_object (index, constraint, etc.)
+    "42P16",  # invalid_table_definition (e.g., column already exists)
+}
+
 # Build connect_args with query timeout if configured
 connect_args = {}
 if settings.database_query_timeout > 0:
@@ -64,290 +81,533 @@ def init_db():
    # Run migrations for schema updates
    _run_migrations()

+    # Purge seed data if requested (for transitioning to production-like environment)
+    if should_purge_seed_data():
+        db = SessionLocal()
+        try:
+            purge_seed_data(db)
+        finally:
+            db.close()
+
+
+def _ensure_migrations_table(conn) -> None:
+    """Create the migrations tracking table if it doesn't exist."""
+    conn.execute(text("""
+        CREATE TABLE IF NOT EXISTS _schema_migrations (
+            name VARCHAR(255) PRIMARY KEY,
+            checksum VARCHAR(64) NOT NULL,
+            applied_at TIMESTAMP WITH TIME ZONE DEFAULT NOW()
+        );
+    """))
+    conn.commit()
+
+
+def _get_applied_migrations(conn) -> dict[str, str]:
+    """Get all applied migrations and their checksums."""
+    result = conn.execute(text(
+        "SELECT name, checksum FROM _schema_migrations"
+    ))
+    return {row[0]: row[1] for row in result}
+
+
+def _compute_checksum(sql: str) -> str:
+    """Compute a checksum for migration SQL to detect changes."""
+    return hashlib.sha256(sql.strip().encode()).hexdigest()[:16]
+
+
+def _is_safe_error(exception: Exception) -> bool:
+    """Check if the error indicates the migration was already applied."""
+    # Check for psycopg2 errors with pgcode attribute
+    original = getattr(exception, "orig", None)
+    if original is not None:
+        pgcode = getattr(original, "pgcode", None)
+        if pgcode in SAFE_PG_ERROR_CODES:
+            return True
+
+    # Fallback: check error message for common "already exists" patterns
+    error_str = str(exception).lower()
+    safe_patterns = [
+        "already exists",
+        "duplicate key",
+        "relation .* already exists",
+        "column .* already exists",
+    ]
+    return any(pattern in error_str for pattern in safe_patterns)
+
+
+def _record_migration(conn, name: str, checksum: str) -> None:
+    """Record a migration as applied."""
+    conn.execute(text(
+        "INSERT INTO _schema_migrations (name, checksum) VALUES (:name, :checksum)"
+    ), {"name": name, "checksum": checksum})
+    conn.commit()
+

 def _run_migrations():
-    """Run manual migrations for schema updates"""
+    """Run manual migrations for schema updates with tracking and error detection."""
    migrations = [
-        # Add format_metadata column to artifacts table
-        """
-        DO $$
-        BEGIN
-            IF NOT EXISTS (
-                SELECT 1 FROM information_schema.columns
-                WHERE table_name = 'artifacts' AND column_name = 'format_metadata'
-            ) THEN
-                ALTER TABLE artifacts ADD COLUMN format_metadata JSONB DEFAULT '{}';
-            END IF;
-        END $$;
-        """,
-        # Add format column to packages table
-        """
-        DO $$
-        BEGIN
-            IF NOT EXISTS (
-                SELECT 1 FROM information_schema.columns
-                WHERE table_name = 'packages' AND column_name = 'format'
-            ) THEN
-                ALTER TABLE packages ADD COLUMN format VARCHAR(50) DEFAULT 'generic' NOT NULL;
-                CREATE INDEX IF NOT EXISTS idx_packages_format ON packages(format);
-            END IF;
-        END $$;
-        """,
-        # Add platform column to packages table
-        """
-        DO $$
-        BEGIN
-            IF NOT EXISTS (
-                SELECT 1 FROM information_schema.columns
-                WHERE table_name = 'packages' AND column_name = 'platform'
-            ) THEN
-                ALTER TABLE packages ADD COLUMN platform VARCHAR(50) DEFAULT 'any' NOT NULL;
-                CREATE INDEX IF NOT EXISTS idx_packages_platform ON packages(platform);
-            END IF;
-        END $$;
-        """,
-        # Add ref_count index and constraints for artifacts
-        """
-        DO $$
-        BEGIN
-            -- Add ref_count index
-            IF NOT EXISTS (
-                SELECT 1 FROM pg_indexes WHERE indexname = 'idx_artifacts_ref_count'
-            ) THEN
-                CREATE INDEX idx_artifacts_ref_count ON artifacts(ref_count);
-            END IF;
-
-            -- Add ref_count >= 0 constraint
-            IF NOT EXISTS (
-                SELECT 1 FROM pg_constraint WHERE conname = 'check_ref_count_non_negative'
-            ) THEN
-                ALTER TABLE artifacts ADD CONSTRAINT check_ref_count_non_negative CHECK (ref_count >= 0);
-            END IF;
-        END $$;
-        """,
-        # Add composite indexes for packages and tags
-        """
-        DO $$
-        BEGIN
-            -- Composite index for package lookup by project and name
-            IF NOT EXISTS (
-                SELECT 1 FROM pg_indexes WHERE indexname = 'idx_packages_project_name'
-            ) THEN
-                CREATE UNIQUE INDEX idx_packages_project_name ON packages(project_id, name);
-            END IF;
-
-            -- Composite index for tag lookup by package and name
-            IF NOT EXISTS (
-                SELECT 1 FROM pg_indexes WHERE indexname = 'idx_tags_package_name'
-            ) THEN
-                CREATE UNIQUE INDEX idx_tags_package_name ON tags(package_id, name);
-            END IF;
-
-            -- Composite index for recent tags queries
-            IF NOT EXISTS (
-                SELECT 1 FROM pg_indexes WHERE indexname = 'idx_tags_package_created_at'
-            ) THEN
-                CREATE INDEX idx_tags_package_created_at ON tags(package_id, created_at);
-            END IF;
-        END $$;
-        """,
-        # Add package_versions indexes and triggers (007_package_versions.sql)
-        """
-        DO $$
-        BEGIN
-            -- Create indexes for package_versions if table exists
-            IF EXISTS (SELECT 1 FROM information_schema.tables WHERE table_name = 'package_versions') THEN
-                -- Indexes for common queries
-                IF NOT EXISTS (SELECT 1 FROM pg_indexes WHERE indexname = 'idx_package_versions_package_id') THEN
-                    CREATE INDEX idx_package_versions_package_id ON package_versions(package_id);
+        Migration(
+            name="001_add_format_metadata",
+            sql="""
+            DO $$
+            BEGIN
+                IF NOT EXISTS (
+                    SELECT 1 FROM information_schema.columns
+                    WHERE table_name = 'artifacts' AND column_name = 'format_metadata'
+                ) THEN
+                    ALTER TABLE artifacts ADD COLUMN format_metadata JSONB DEFAULT '{}';
                END IF;
-                IF NOT EXISTS (SELECT 1 FROM pg_indexes WHERE indexname = 'idx_package_versions_artifact_id') THEN
-                    CREATE INDEX idx_package_versions_artifact_id ON package_versions(artifact_id);
+            END $$;
+            """,
+        ),
+        Migration(
+            name="002_add_package_format",
+            sql="""
+            DO $$
+            BEGIN
+                IF NOT EXISTS (
+                    SELECT 1 FROM information_schema.columns
+                    WHERE table_name = 'packages' AND column_name = 'format'
+                ) THEN
+                    ALTER TABLE packages ADD COLUMN format VARCHAR(50) DEFAULT 'generic' NOT NULL;
+                    CREATE INDEX IF NOT EXISTS idx_packages_format ON packages(format);
                END IF;
-                IF NOT EXISTS (SELECT 1 FROM pg_indexes WHERE indexname = 'idx_package_versions_package_version') THEN
-                    CREATE INDEX idx_package_versions_package_version ON package_versions(package_id, version);
+            END $$;
+            """,
+        ),
+        Migration(
+            name="003_add_package_platform",
+            sql="""
+            DO $$
+            BEGIN
+                IF NOT EXISTS (
+                    SELECT 1 FROM information_schema.columns
+                    WHERE table_name = 'packages' AND column_name = 'platform'
+                ) THEN
+                    ALTER TABLE packages ADD COLUMN platform VARCHAR(50) DEFAULT 'any' NOT NULL;
+                    CREATE INDEX IF NOT EXISTS idx_packages_platform ON packages(platform);
                END IF;
-            END IF;
-        END $$;
-        """,
-        # Create ref_count trigger functions for tags (ensures triggers exist even if initial migration wasn't run)
-        """
-        CREATE OR REPLACE FUNCTION increment_artifact_ref_count()
-        RETURNS TRIGGER AS $$
-        BEGIN
-            UPDATE artifacts SET ref_count = ref_count + 1 WHERE id = NEW.artifact_id;
-            RETURN NEW;
-        END;
-        $$ LANGUAGE plpgsql;
-        """,
-        """
-        CREATE OR REPLACE FUNCTION decrement_artifact_ref_count()
-        RETURNS TRIGGER AS $$
-        BEGIN
-            UPDATE artifacts SET ref_count = ref_count - 1 WHERE id = OLD.artifact_id;
-            RETURN OLD;
-        END;
-        $$ LANGUAGE plpgsql;
-        """,
-        """
-        CREATE OR REPLACE FUNCTION update_artifact_ref_count()
-        RETURNS TRIGGER AS $$
-        BEGIN
-            IF OLD.artifact_id != NEW.artifact_id THEN
-                UPDATE artifacts SET ref_count = ref_count - 1 WHERE id = OLD.artifact_id;
+            END $$;
+            """,
+        ),
+        Migration(
+            name="004_add_ref_count_index_constraint",
+            sql="""
+            DO $$
+            BEGIN
+                IF NOT EXISTS (
+                    SELECT 1 FROM pg_indexes WHERE indexname = 'idx_artifacts_ref_count'
+                ) THEN
+                    CREATE INDEX idx_artifacts_ref_count ON artifacts(ref_count);
+                END IF;
+
+                IF NOT EXISTS (
+                    SELECT 1 FROM pg_constraint WHERE conname = 'check_ref_count_non_negative'
+                ) THEN
+                    ALTER TABLE artifacts ADD CONSTRAINT check_ref_count_non_negative CHECK (ref_count >= 0);
+                END IF;
+            END $$;
+            """,
+        ),
+        Migration(
+            name="005_add_composite_indexes",
+            sql="""
+            DO $$
+            BEGIN
+                IF NOT EXISTS (
+                    SELECT 1 FROM pg_indexes WHERE indexname = 'idx_packages_project_name'
+                ) THEN
+                    CREATE UNIQUE INDEX idx_packages_project_name ON packages(project_id, name);
+                END IF;
+
+                IF NOT EXISTS (
+                    SELECT 1 FROM pg_indexes WHERE indexname = 'idx_tags_package_name'
+                ) THEN
+                    CREATE UNIQUE INDEX idx_tags_package_name ON tags(package_id, name);
+                END IF;
+
+                IF NOT EXISTS (
+                    SELECT 1 FROM pg_indexes WHERE indexname = 'idx_tags_package_created_at'
+                ) THEN
+                    CREATE INDEX idx_tags_package_created_at ON tags(package_id, created_at);
+                END IF;
+            END $$;
+            """,
+        ),
+        Migration(
+            name="006_add_package_versions_indexes",
+            sql="""
+            DO $$
+            BEGIN
+                IF EXISTS (SELECT 1 FROM information_schema.tables WHERE table_name = 'package_versions') THEN
+                    IF NOT EXISTS (SELECT 1 FROM pg_indexes WHERE indexname = 'idx_package_versions_package_id') THEN
+                        CREATE INDEX idx_package_versions_package_id ON package_versions(package_id);
+                    END IF;
+                    IF NOT EXISTS (SELECT 1 FROM pg_indexes WHERE indexname = 'idx_package_versions_artifact_id') THEN
+                        CREATE INDEX idx_package_versions_artifact_id ON package_versions(artifact_id);
+                    END IF;
+                    IF NOT EXISTS (SELECT 1 FROM pg_indexes WHERE indexname = 'idx_package_versions_package_version') THEN
+                        CREATE INDEX idx_package_versions_package_version ON package_versions(package_id, version);
+                    END IF;
+                END IF;
+            END $$;
+            """,
+        ),
+        Migration(
+            name="007_create_ref_count_trigger_functions",
+            sql="""
+            CREATE OR REPLACE FUNCTION increment_artifact_ref_count()
+            RETURNS TRIGGER AS $$
+            BEGIN
                UPDATE artifacts SET ref_count = ref_count + 1 WHERE id = NEW.artifact_id;
-            END IF;
-            RETURN NEW;
-        END;
-        $$ LANGUAGE plpgsql;
-        """,
-        # Create triggers for tags ref_count management
-        """
-        DO $$
-        BEGIN
-            -- Drop and recreate triggers to ensure they're current
-            DROP TRIGGER IF EXISTS tags_ref_count_insert_trigger ON tags;
-            CREATE TRIGGER tags_ref_count_insert_trigger
-                AFTER INSERT ON tags
-                FOR EACH ROW
-                EXECUTE FUNCTION increment_artifact_ref_count();
+                RETURN NEW;
+            END;
+            $$ LANGUAGE plpgsql;

-            DROP TRIGGER IF EXISTS tags_ref_count_delete_trigger ON tags;
-            CREATE TRIGGER tags_ref_count_delete_trigger
-                AFTER DELETE ON tags
-                FOR EACH ROW
-                EXECUTE FUNCTION decrement_artifact_ref_count();
+            CREATE OR REPLACE FUNCTION decrement_artifact_ref_count()
+            RETURNS TRIGGER AS $$
+            BEGIN
+                UPDATE artifacts SET ref_count = ref_count - 1 WHERE id = OLD.artifact_id;
+                RETURN OLD;
+            END;
+            $$ LANGUAGE plpgsql;

-            DROP TRIGGER IF EXISTS tags_ref_count_update_trigger ON tags;
-            CREATE TRIGGER tags_ref_count_update_trigger
-                AFTER UPDATE ON tags
-                FOR EACH ROW
-                WHEN (OLD.artifact_id IS DISTINCT FROM NEW.artifact_id)
-                EXECUTE FUNCTION update_artifact_ref_count();
-        END $$;
-        """,
-        # Create ref_count trigger functions for package_versions
-        """
-        CREATE OR REPLACE FUNCTION increment_version_ref_count()
-        RETURNS TRIGGER AS $$
-        BEGIN
-            UPDATE artifacts SET ref_count = ref_count + 1 WHERE id = NEW.artifact_id;
-            RETURN NEW;
-        END;
-        $$ LANGUAGE plpgsql;
-        """,
-        """
-        CREATE OR REPLACE FUNCTION decrement_version_ref_count()
-        RETURNS TRIGGER AS $$
-        BEGIN
-            UPDATE artifacts SET ref_count = ref_count - 1 WHERE id = OLD.artifact_id;
-            RETURN OLD;
-        END;
-        $$ LANGUAGE plpgsql;
-        """,
-        # Create triggers for package_versions ref_count
-        """
-        DO $$
-        BEGIN
-            IF EXISTS (SELECT 1 FROM information_schema.tables WHERE table_name = 'package_versions') THEN
-                -- Drop and recreate triggers to ensure they're current
-                DROP TRIGGER IF EXISTS package_versions_ref_count_insert ON package_versions;
-                CREATE TRIGGER package_versions_ref_count_insert
-                    AFTER INSERT ON package_versions
+            CREATE OR REPLACE FUNCTION update_artifact_ref_count()
+            RETURNS TRIGGER AS $$
+            BEGIN
+                IF OLD.artifact_id != NEW.artifact_id THEN
+                    UPDATE artifacts SET ref_count = ref_count - 1 WHERE id = OLD.artifact_id;
+                    UPDATE artifacts SET ref_count = ref_count + 1 WHERE id = NEW.artifact_id;
+                END IF;
+                RETURN NEW;
+            END;
+            $$ LANGUAGE plpgsql;
+            """,
+        ),
+        Migration(
+            name="008_create_tags_ref_count_triggers",
+            sql="""
+            DO $$
+            BEGIN
+                DROP TRIGGER IF EXISTS tags_ref_count_insert_trigger ON tags;
+                CREATE TRIGGER tags_ref_count_insert_trigger
+                    AFTER INSERT ON tags
                    FOR EACH ROW
-                    EXECUTE FUNCTION increment_version_ref_count();
+                    EXECUTE FUNCTION increment_artifact_ref_count();

-                DROP TRIGGER IF EXISTS package_versions_ref_count_delete ON package_versions;
-                CREATE TRIGGER package_versions_ref_count_delete
-                    AFTER DELETE ON package_versions
+                DROP TRIGGER IF EXISTS tags_ref_count_delete_trigger ON tags;
+                CREATE TRIGGER tags_ref_count_delete_trigger
+                    AFTER DELETE ON tags
                    FOR EACH ROW
-                    EXECUTE FUNCTION decrement_version_ref_count();
-            END IF;
-        END $$;
-        """,
-        # Migrate existing semver tags to package_versions
-        r"""
-        DO $$
-        BEGIN
-            IF EXISTS (SELECT 1 FROM information_schema.tables WHERE table_name = 'package_versions') THEN
-                -- Migrate tags that look like versions (v1.0.0, 1.2.3, 2.0.0-beta, etc.)
-                INSERT INTO package_versions (package_id, artifact_id, version, version_source, created_by, created_at)
-                SELECT
-                    t.package_id,
-                    t.artifact_id,
-                    CASE WHEN t.name LIKE 'v%' THEN substring(t.name from 2) ELSE t.name END,
-                    'migrated_from_tag',
-                    t.created_by,
-                    t.created_at
-                FROM tags t
-                WHERE t.name ~ '^v?[0-9]+\.[0-9]+(\.[0-9]+)?([-.][a-zA-Z0-9]+)?$'
-                ON CONFLICT (package_id, version) DO NOTHING;
-            END IF;
-        END $$;
-        """,
-        # Teams and multi-tenancy migration (009_teams.sql)
-        """
-        CREATE TABLE IF NOT EXISTS teams (
-            id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
-            name VARCHAR(255) NOT NULL,
-            slug VARCHAR(255) NOT NULL UNIQUE,
-            description TEXT,
-            created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
-            updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
-            created_by VARCHAR(255) NOT NULL,
-            settings JSONB DEFAULT '{}'
-        );
-        """,
-        """
-        CREATE TABLE IF NOT EXISTS team_memberships (
-            id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
-            team_id UUID NOT NULL REFERENCES teams(id) ON DELETE CASCADE,
-            user_id UUID NOT NULL REFERENCES users(id) ON DELETE CASCADE,
-            role VARCHAR(50) NOT NULL DEFAULT 'member',
-            created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
-            invited_by VARCHAR(255),
-            CONSTRAINT team_memberships_unique UNIQUE (team_id, user_id),
-            CONSTRAINT team_memberships_role_check CHECK (role IN ('owner', 'admin', 'member'))
-        );
-        """,
-        """
-        DO $$
-        BEGIN
-            IF NOT EXISTS (
-                SELECT 1 FROM information_schema.columns
-                WHERE table_name = 'projects' AND column_name = 'team_id'
-            ) THEN
-                ALTER TABLE projects ADD COLUMN team_id UUID REFERENCES teams(id) ON DELETE SET NULL;
-                CREATE INDEX IF NOT EXISTS idx_projects_team_id ON projects(team_id);
-            END IF;
-        END $$;
-        """,
-        """
-        DO $$
-        BEGIN
-            IF NOT EXISTS (SELECT 1 FROM pg_indexes WHERE indexname = 'idx_teams_slug') THEN
-                CREATE INDEX idx_teams_slug ON teams(slug);
-            END IF;
-            IF NOT EXISTS (SELECT 1 FROM pg_indexes WHERE indexname = 'idx_teams_created_by') THEN
-                CREATE INDEX idx_teams_created_by ON teams(created_by);
-            END IF;
-            IF NOT EXISTS (SELECT 1 FROM pg_indexes WHERE indexname = 'idx_team_memberships_team_id') THEN
-                CREATE INDEX idx_team_memberships_team_id ON team_memberships(team_id);
-            END IF;
-            IF NOT EXISTS (SELECT 1 FROM pg_indexes WHERE indexname = 'idx_team_memberships_user_id') THEN
-                CREATE INDEX idx_team_memberships_user_id ON team_memberships(user_id);
-            END IF;
-        END $$;
-        """,
+                    EXECUTE FUNCTION decrement_artifact_ref_count();
+
+                DROP TRIGGER IF EXISTS tags_ref_count_update_trigger ON tags;
+                CREATE TRIGGER tags_ref_count_update_trigger
+                    AFTER UPDATE ON tags
+                    FOR EACH ROW
+                    WHEN (OLD.artifact_id IS DISTINCT FROM NEW.artifact_id)
+                    EXECUTE FUNCTION update_artifact_ref_count();
+            END $$;
+            """,
+        ),
+        Migration(
+            name="009_create_version_ref_count_functions",
+            sql="""
+            CREATE OR REPLACE FUNCTION increment_version_ref_count()
+            RETURNS TRIGGER AS $$
+            BEGIN
+                UPDATE artifacts SET ref_count = ref_count + 1 WHERE id = NEW.artifact_id;
+                RETURN NEW;
+            END;
+            $$ LANGUAGE plpgsql;
+
+            CREATE OR REPLACE FUNCTION decrement_version_ref_count()
+            RETURNS TRIGGER AS $$
+            BEGIN
+                UPDATE artifacts SET ref_count = ref_count - 1 WHERE id = OLD.artifact_id;
+                RETURN OLD;
+            END;
+            $$ LANGUAGE plpgsql;
+            """,
+        ),
+        Migration(
+            name="010_create_package_versions_triggers",
+            sql="""
+            DO $$
+            BEGIN
+                IF EXISTS (SELECT 1 FROM information_schema.tables WHERE table_name = 'package_versions') THEN
+                    DROP TRIGGER IF EXISTS package_versions_ref_count_insert ON package_versions;
+                    CREATE TRIGGER package_versions_ref_count_insert
+                        AFTER INSERT ON package_versions
+                        FOR EACH ROW
+                        EXECUTE FUNCTION increment_version_ref_count();
+
+                    DROP TRIGGER IF EXISTS package_versions_ref_count_delete ON package_versions;
+                    CREATE TRIGGER package_versions_ref_count_delete
+                        AFTER DELETE ON package_versions
+                        FOR EACH ROW
+                        EXECUTE FUNCTION decrement_version_ref_count();
+                END IF;
+            END $$;
+            """,
+        ),
+        Migration(
+            name="011_migrate_semver_tags_to_versions",
+            sql=r"""
+            DO $$
+            BEGIN
+                IF EXISTS (SELECT 1 FROM information_schema.tables WHERE table_name = 'package_versions') THEN
+                    INSERT INTO package_versions (id, package_id, artifact_id, version, version_source, created_by, created_at)
+                    SELECT
+                        gen_random_uuid(),
+                        t.package_id,
+                        t.artifact_id,
+                        CASE WHEN t.name LIKE 'v%' THEN substring(t.name from 2) ELSE t.name END,
+                        'migrated_from_tag',
+                        t.created_by,
+                        t.created_at
+                    FROM tags t
+                    WHERE t.name ~ '^v?[0-9]+\.[0-9]+(\.[0-9]+)?([-.][a-zA-Z0-9]+)?$'
+                    ON CONFLICT (package_id, version) DO NOTHING;
+                END IF;
+            END $$;
+            """,
+        ),
+        Migration(
+            name="012_create_teams_table",
+            sql="""
+            CREATE TABLE IF NOT EXISTS teams (
+                id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
+                name VARCHAR(255) NOT NULL,
+                slug VARCHAR(255) NOT NULL UNIQUE,
+                description TEXT,
+                created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
+                updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
+                created_by VARCHAR(255) NOT NULL,
+                settings JSONB DEFAULT '{}'
+            );
+            """,
+        ),
+        Migration(
+            name="013_create_team_memberships_table",
+            sql="""
+            CREATE TABLE IF NOT EXISTS team_memberships (
+                id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
+                team_id UUID NOT NULL REFERENCES teams(id) ON DELETE CASCADE,
+                user_id UUID NOT NULL REFERENCES users(id) ON DELETE CASCADE,
+                role VARCHAR(50) NOT NULL DEFAULT 'member',
+                created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
+                invited_by VARCHAR(255),
+                CONSTRAINT team_memberships_unique UNIQUE (team_id, user_id),
+                CONSTRAINT team_memberships_role_check CHECK (role IN ('owner', 'admin', 'member'))
+            );
+            """,
+        ),
+        Migration(
+            name="014_add_team_id_to_projects",
+            sql="""
+            DO $$
+            BEGIN
+                IF NOT EXISTS (
+                    SELECT 1 FROM information_schema.columns
+                    WHERE table_name = 'projects' AND column_name = 'team_id'
+                ) THEN
+                    ALTER TABLE projects ADD COLUMN team_id UUID REFERENCES teams(id) ON DELETE SET NULL;
+                    CREATE INDEX IF NOT EXISTS idx_projects_team_id ON projects(team_id);
+                END IF;
+            END $$;
+            """,
+        ),
+        Migration(
+            name="015_add_teams_indexes",
+            sql="""
+            DO $$
+            BEGIN
+                IF NOT EXISTS (SELECT 1 FROM pg_indexes WHERE indexname = 'idx_teams_slug') THEN
+                    CREATE INDEX idx_teams_slug ON teams(slug);
+                END IF;
+                IF NOT EXISTS (SELECT 1 FROM pg_indexes WHERE indexname = 'idx_teams_created_by') THEN
+                    CREATE INDEX idx_teams_created_by ON teams(created_by);
+                END IF;
+                IF NOT EXISTS (SELECT 1 FROM pg_indexes WHERE indexname = 'idx_team_memberships_team_id') THEN
+                    CREATE INDEX idx_team_memberships_team_id ON team_memberships(team_id);
+                END IF;
+                IF NOT EXISTS (SELECT 1 FROM pg_indexes WHERE indexname = 'idx_team_memberships_user_id') THEN
+                    CREATE INDEX idx_team_memberships_user_id ON team_memberships(user_id);
+                END IF;
+            END $$;
+            """,
+        ),
+        Migration(
+            name="016_add_is_system_to_projects",
+            sql="""
+            DO $$
+            BEGIN
+                IF NOT EXISTS (
+                    SELECT 1 FROM information_schema.columns
+                    WHERE table_name = 'projects' AND column_name = 'is_system'
+                ) THEN
+                    ALTER TABLE projects ADD COLUMN is_system BOOLEAN NOT NULL DEFAULT FALSE;
+                    CREATE INDEX IF NOT EXISTS idx_projects_is_system ON projects(is_system);
+                END IF;
+            END $$;
+            """,
+        ),
+        Migration(
+            name="017_create_upstream_sources",
+            sql="""
+            CREATE TABLE IF NOT EXISTS upstream_sources (
+                id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
+                name VARCHAR(255) NOT NULL UNIQUE,
+                source_type VARCHAR(50) NOT NULL DEFAULT 'generic',
+                url VARCHAR(2048) NOT NULL,
+                enabled BOOLEAN NOT NULL DEFAULT FALSE,
+                auth_type VARCHAR(20) NOT NULL DEFAULT 'none',
+                username VARCHAR(255),
+                password_encrypted BYTEA,
+                headers_encrypted BYTEA,
+                priority INTEGER NOT NULL DEFAULT 100,
+                created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
+                updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
+                CONSTRAINT check_source_type CHECK (
+                    source_type IN ('npm', 'pypi', 'maven', 'docker', 'helm', 'nuget', 'deb', 'rpm', 'generic')
+                ),
+                CONSTRAINT check_auth_type CHECK (
+                    auth_type IN ('none', 'basic', 'bearer', 'api_key')
+                ),
+                CONSTRAINT check_priority_positive CHECK (priority > 0)
+            );
+            CREATE INDEX IF NOT EXISTS idx_upstream_sources_enabled ON upstream_sources(enabled);
+            CREATE INDEX IF NOT EXISTS idx_upstream_sources_source_type ON upstream_sources(source_type);
+            CREATE INDEX IF NOT EXISTS idx_upstream_sources_priority ON upstream_sources(priority);
+            """,
+        ),
+        Migration(
+            name="018_create_cache_settings",
+            sql="""
+            CREATE TABLE IF NOT EXISTS cache_settings (
+                id INTEGER PRIMARY KEY DEFAULT 1,
+                auto_create_system_projects BOOLEAN NOT NULL DEFAULT TRUE,
+                created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
+                updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
+                CONSTRAINT check_cache_settings_singleton CHECK (id = 1)
+            );
+            INSERT INTO cache_settings (id, auto_create_system_projects)
+            VALUES (1, TRUE)
+            ON CONFLICT (id) DO NOTHING;
+            """,
+        ),
+        Migration(
+            name="019_create_cached_urls",
+            sql="""
+            CREATE TABLE IF NOT EXISTS cached_urls (
+                id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
+                url VARCHAR(4096) NOT NULL,
+                url_hash VARCHAR(64) NOT NULL UNIQUE,
+                artifact_id VARCHAR(64) NOT NULL REFERENCES artifacts(id),
+                source_id UUID REFERENCES upstream_sources(id) ON DELETE SET NULL,
+                fetched_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW(),
+                response_headers JSONB DEFAULT '{}',
+                created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW()
+            );
+            CREATE INDEX IF NOT EXISTS idx_cached_urls_url_hash ON cached_urls(url_hash);
+            CREATE INDEX IF NOT EXISTS idx_cached_urls_artifact_id ON cached_urls(artifact_id);
+            CREATE INDEX IF NOT EXISTS idx_cached_urls_source_id ON cached_urls(source_id);
+            CREATE INDEX IF NOT EXISTS idx_cached_urls_fetched_at ON cached_urls(fetched_at);
+            """,
+        ),
+        Migration(
+            name="020_seed_default_upstream_sources",
+            sql="""
+            -- Originally seeded public sources, but these are no longer used.
+            -- Migration 023 deletes any previously seeded sources.
+            -- This migration is now a no-op for fresh installs.
+            SELECT 1;
+            """,
+        ),
+        Migration(
+            name="021_remove_is_public_from_upstream_sources",
+            sql="""
+            DO $$
+            BEGIN
+                -- Drop the index if it exists
+                DROP INDEX IF EXISTS idx_upstream_sources_is_public;
+
+                -- Drop the column if it exists
+                IF EXISTS (
+                    SELECT 1 FROM information_schema.columns
+                    WHERE table_name = 'upstream_sources' AND column_name = 'is_public'
+                ) THEN
+                    ALTER TABLE upstream_sources DROP COLUMN is_public;
+                END IF;
+            END $$;
+            """,
+        ),
+        Migration(
+            name="022_remove_allow_public_internet_from_cache_settings",
+            sql="""
+            DO $$
+            BEGIN
+                IF EXISTS (
+                    SELECT 1 FROM information_schema.columns
+                    WHERE table_name = 'cache_settings' AND column_name = 'allow_public_internet'
+                ) THEN
+                    ALTER TABLE cache_settings DROP COLUMN allow_public_internet;
+                END IF;
+            END $$;
+            """,
+        ),
+        Migration(
+            name="023_delete_seeded_public_sources",
+            sql="""
+            -- Delete the seeded public sources that were added by migration 020
+            DELETE FROM upstream_sources
+            WHERE name IN ('npm-public', 'pypi-public', 'maven-central', 'docker-hub');
+            """,
+        ),
    ]

    with engine.connect() as conn:
+        # Ensure migrations tracking table exists
+        _ensure_migrations_table(conn)
+
+        # Get already-applied migrations
+        applied = _get_applied_migrations(conn)
+
        for migration in migrations:
+            checksum = _compute_checksum(migration.sql)
+
+            # Check if migration was already applied
+            if migration.name in applied:
+                stored_checksum = applied[migration.name]
+                if stored_checksum != checksum:
+                    logger.warning(
+                        f"Migration '{migration.name}' has changed since it was applied! "
+                        f"Stored checksum: {stored_checksum}, current: {checksum}"
+                    )
+                continue
+
+            # Run the migration
            try:
-                conn.execute(text(migration))
+                logger.info(f"Running migration: {migration.name}")
+                conn.execute(text(migration.sql))
                conn.commit()
+                _record_migration(conn, migration.name, checksum)
+                logger.info(f"Migration '{migration.name}' applied successfully")
            except Exception as e:
-                logger.warning(f"Migration failed (may already be applied): {e}")
+                conn.rollback()
+                if _is_safe_error(e):
+                    # Migration was already applied (schema already exists)
+                    logger.info(
+                        f"Migration '{migration.name}' already applied (schema exists), recording as complete"
+                    )
+                    _record_migration(conn, migration.name, checksum)
+                else:
+                    # Real error - fail hard
+                    logger.error(f"Migration '{migration.name}' failed: {e}")
+                    raise RuntimeError(
+                        f"Migration '{migration.name}' failed with error: {e}"
+                    ) from e


 def get_db() -> Generator[Session, None, None]:
--- a/backend/app/dependencies.py
+++ b/backend/app/dependencies.py
@@ -10,11 +10,20 @@ Handles:
 - Conflict detection
 """

+import re
 import yaml
 from typing import List, Dict, Any, Optional, Set, Tuple
 from sqlalchemy.orm import Session
 from sqlalchemy import and_

+# Import packaging for PEP 440 version matching
+try:
+    from packaging.specifiers import SpecifierSet, InvalidSpecifier
+    from packaging.version import Version, InvalidVersion
+    HAS_PACKAGING = True
+except ImportError:
+    HAS_PACKAGING = False
+
 from .models import (
    Project,
    Package,
@@ -304,6 +313,87 @@ def get_reverse_dependencies(
    )


+def _is_version_constraint(version_str: str) -> bool:
+    """Check if a version string contains constraint operators."""
+    if not version_str:
+        return False
+    # Check for common constraint operators
+    return any(op in version_str for op in ['>=', '<=', '!=', '~=', '>', '<', '==', '*'])
+
+
+def _resolve_version_constraint(
+    db: Session,
+    package: Package,
+    constraint: str,
+) -> Optional[Tuple[str, str, int]]:
+    """
+    Resolve a version constraint (e.g., '>=1.9') to a specific version.
+
+    Uses PEP 440 version matching to find the best matching version.
+
+    Args:
+        db: Database session
+        package: Package to search versions in
+        constraint: Version constraint string (e.g., '>=1.9', '<2.0,>=1.5')
+
+    Returns:
+        Tuple of (artifact_id, resolved_version, size) or None if not found
+    """
+    if not HAS_PACKAGING:
+        # Fallback: if packaging not available, can't do constraint matching
+        return None
+
+    # Handle wildcard - return latest version
+    if constraint == '*':
+        # Get the latest version by created_at
+        latest = db.query(PackageVersion).filter(
+            PackageVersion.package_id == package.id,
+        ).order_by(PackageVersion.created_at.desc()).first()
+        if latest:
+            artifact = db.query(Artifact).filter(Artifact.id == latest.artifact_id).first()
+            if artifact:
+                return (artifact.id, latest.version, artifact.size)
+        return None
+
+    try:
+        specifier = SpecifierSet(constraint)
+    except InvalidSpecifier:
+        # Invalid constraint, try as exact version
+        return None
+
+    # Get all versions for this package
+    all_versions = db.query(PackageVersion).filter(
+        PackageVersion.package_id == package.id,
+    ).all()
+
+    if not all_versions:
+        return None
+
+    # Find matching versions
+    matching = []
+    for pv in all_versions:
+        try:
+            v = Version(pv.version)
+            if v in specifier:
+                matching.append((pv, v))
+        except InvalidVersion:
+            # Skip invalid versions
+            continue
+
+    if not matching:
+        return None
+
+    # Sort by version (descending) and return the latest matching
+    matching.sort(key=lambda x: x[1], reverse=True)
+    best_match = matching[0][0]
+
+    artifact = db.query(Artifact).filter(Artifact.id == best_match.artifact_id).first()
+    if artifact:
+        return (artifact.id, best_match.version, artifact.size)
+
+    return None
+
+
 def _resolve_dependency_to_artifact(
    db: Session,
    project_name: str,
@@ -314,11 +404,17 @@ def _resolve_dependency_to_artifact(
    """
    Resolve a dependency constraint to an artifact ID.

+    Supports:
+    - Exact version matching (e.g., '1.2.3')
+    - Version constraints (e.g., '>=1.9', '<2.0,>=1.5')
+    - Tag matching
+    - Wildcard ('*' for any version)
+
    Args:
        db: Database session
        project_name: Project name
        package_name: Package name
-        version: Version constraint (exact)
+        version: Version or version constraint
        tag: Tag constraint

    Returns:
@@ -337,17 +433,23 @@ def _resolve_dependency_to_artifact(
        return None

    if version:
-        # Look up by version
-        pkg_version = db.query(PackageVersion).filter(
-            PackageVersion.package_id == package.id,
-            PackageVersion.version == version,
-        ).first()
-        if pkg_version:
-            artifact = db.query(Artifact).filter(
-                Artifact.id == pkg_version.artifact_id
+        # Check if this is a version constraint (>=, <, etc.) or exact version
+        if _is_version_constraint(version):
+            result = _resolve_version_constraint(db, package, version)
+            if result:
+                return result
+        else:
+            # Look up by exact version
+            pkg_version = db.query(PackageVersion).filter(
+                PackageVersion.package_id == package.id,
+                PackageVersion.version == version,
            ).first()
-            if artifact:
-                return (artifact.id, version, artifact.size)
+            if pkg_version:
+                artifact = db.query(Artifact).filter(
+                    Artifact.id == pkg_version.artifact_id
+                ).first()
+                if artifact:
+                    return (artifact.id, version, artifact.size)

        # Also check if there's a tag with this exact name
        tag_record = db.query(Tag).filter(
--- a/backend/app/encryption.py
+++ b/backend/app/encryption.py
@@ -0,0 +1,160 @@
+"""
+Encryption utilities for sensitive data storage.
+
+Uses Fernet symmetric encryption for credentials like upstream passwords.
+The encryption key is sourced from ORCHARD_CACHE_ENCRYPTION_KEY environment variable.
+If not set, a random key is generated on startup (with a warning).
+"""
+
+import base64
+import logging
+import os
+import secrets
+from functools import lru_cache
+from typing import Optional
+
+from cryptography.fernet import Fernet, InvalidToken
+
+logger = logging.getLogger(__name__)
+
+# Module-level storage for auto-generated key (only used if env var not set)
+_generated_key: Optional[bytes] = None
+
+
+def _get_key_from_env() -> Optional[bytes]:
+    """Get encryption key from environment variable."""
+    key_str = os.environ.get("ORCHARD_CACHE_ENCRYPTION_KEY", "")
+    if not key_str:
+        return None
+
+    # Support both raw base64 and url-safe base64 formats
+    try:
+        # Try to decode as-is (Fernet keys are url-safe base64)
+        key_bytes = key_str.encode("utf-8")
+        # Validate it's a valid Fernet key by trying to create a Fernet instance
+        Fernet(key_bytes)
+        return key_bytes
+    except Exception:
+        pass
+
+    # Try base64 decoding if it's a raw 32-byte key encoded as base64
+    try:
+        decoded = base64.urlsafe_b64decode(key_str)
+        if len(decoded) == 32:
+            # Re-encode as url-safe base64 for Fernet
+            key_bytes = base64.urlsafe_b64encode(decoded)
+            Fernet(key_bytes)
+            return key_bytes
+    except Exception:
+        pass
+
+    logger.error(
+        "ORCHARD_CACHE_ENCRYPTION_KEY is set but invalid. "
+        "Must be a valid Fernet key (32 bytes, url-safe base64 encoded). "
+        "Generate one with: python -c \"from cryptography.fernet import Fernet; print(Fernet.generate_key().decode())\""
+    )
+    return None
+
+
+def get_encryption_key() -> bytes:
+    """
+    Get the Fernet encryption key.
+
+    Returns the key from ORCHARD_CACHE_ENCRYPTION_KEY if set and valid,
+    otherwise generates a random key (with a warning logged).
+
+    The generated key is cached for the lifetime of the process.
+    """
+    global _generated_key
+
+    # Try to get from environment
+    env_key = _get_key_from_env()
+    if env_key:
+        return env_key
+
+    # Generate a new key if needed
+    if _generated_key is None:
+        _generated_key = Fernet.generate_key()
+        logger.warning(
+            "ORCHARD_CACHE_ENCRYPTION_KEY not set - using auto-generated key. "
+            "Encrypted credentials will be lost on restart! "
+            "Set ORCHARD_CACHE_ENCRYPTION_KEY for persistent encryption. "
+            "Generate a key with: python -c \"from cryptography.fernet import Fernet; print(Fernet.generate_key().decode())\""
+        )
+
+    return _generated_key
+
+
+@lru_cache(maxsize=1)
+def _get_fernet() -> Fernet:
+    """Get a cached Fernet instance."""
+    return Fernet(get_encryption_key())
+
+
+def encrypt_value(plaintext: str) -> bytes:
+    """
+    Encrypt a string value using Fernet.
+
+    Args:
+        plaintext: The string to encrypt
+
+    Returns:
+        Encrypted bytes (includes Fernet token with timestamp)
+    """
+    if not plaintext:
+        raise ValueError("Cannot encrypt empty value")
+
+    fernet = _get_fernet()
+    return fernet.encrypt(plaintext.encode("utf-8"))
+
+
+def decrypt_value(ciphertext: bytes) -> str:
+    """
+    Decrypt a Fernet-encrypted value.
+
+    Args:
+        ciphertext: The encrypted bytes
+
+    Returns:
+        Decrypted string
+
+    Raises:
+        InvalidToken: If decryption fails (wrong key or corrupted data)
+    """
+    if not ciphertext:
+        raise ValueError("Cannot decrypt empty value")
+
+    fernet = _get_fernet()
+    return fernet.decrypt(ciphertext).decode("utf-8")
+
+
+def can_decrypt(ciphertext: bytes) -> bool:
+    """
+    Check if a value can be decrypted with the current key.
+
+    Useful for checking if credentials are still valid after key rotation.
+
+    Args:
+        ciphertext: The encrypted bytes
+
+    Returns:
+        True if decryption succeeds, False otherwise
+    """
+    if not ciphertext:
+        return False
+
+    try:
+        decrypt_value(ciphertext)
+        return True
+    except (InvalidToken, ValueError):
+        return False
+
+
+def generate_key() -> str:
+    """
+    Generate a new Fernet encryption key.
+
+    Returns:
+        A valid Fernet key as a string (url-safe base64 encoded)
+    """
+    return Fernet.generate_key().decode("utf-8")
--- a/backend/app/main.py
+++ b/backend/app/main.py
@@ -11,9 +11,11 @@ from slowapi.errors import RateLimitExceeded
 from .config import get_settings
 from .database import init_db, SessionLocal
 from .routes import router
+from .pypi_proxy import router as pypi_router
 from .seed import seed_database
 from .auth import create_default_admin
 from .rate_limit import limiter
+from .pypi_cache_worker import init_cache_worker_pool, shutdown_cache_worker_pool

 settings = get_settings()
 logging.basicConfig(level=logging.INFO)
@@ -48,8 +50,13 @@ async def lifespan(app: FastAPI):
    else:
        logger.info(f"Running in {settings.env} mode - skipping seed data")

+    # Initialize PyPI cache worker pool
+    init_cache_worker_pool()
+
    yield
-    # Shutdown: cleanup if needed
+
+    # Shutdown: cleanup
+    shutdown_cache_worker_pool()


 app = FastAPI(
@@ -65,6 +72,7 @@ app.add_exception_handler(RateLimitExceeded, _rate_limit_exceeded_handler)

 # Include API routes
 app.include_router(router)
+app.include_router(pypi_router)

 # Serve static files (React build) if the directory exists
 static_dir = os.path.join(os.path.dirname(__file__), "..", "..", "frontend", "dist")
--- a/backend/app/models.py
+++ b/backend/app/models.py
@@ -12,6 +12,7 @@ from sqlalchemy import (
    Index,
    JSON,
    ARRAY,
+    LargeBinary,
 )
 from sqlalchemy.dialects.postgresql import UUID
 from sqlalchemy.orm import relationship, declarative_base
@@ -27,6 +28,7 @@ class Project(Base):
    name = Column(String(255), unique=True, nullable=False)
    description = Column(Text)
    is_public = Column(Boolean, default=True)
+    is_system = Column(Boolean, default=False, nullable=False)
    created_at = Column(DateTime(timezone=True), default=datetime.utcnow)
    updated_at = Column(
        DateTime(timezone=True), default=datetime.utcnow, onupdate=datetime.utcnow
@@ -46,6 +48,7 @@ class Project(Base):
        Index("idx_projects_name", "name"),
        Index("idx_projects_created_by", "created_by"),
        Index("idx_projects_team_id", "team_id"),
+        Index("idx_projects_is_system", "is_system"),
    )


@@ -637,3 +640,233 @@ class TeamMembership(Base):
            name="check_team_role",
        ),
    )
+
+
+# =============================================================================
+# Upstream Caching Models
+# =============================================================================
+
+# Valid source types for upstream registries
+SOURCE_TYPES = ["npm", "pypi", "maven", "docker", "helm", "nuget", "deb", "rpm", "generic"]
+
+# Valid authentication types
+AUTH_TYPES = ["none", "basic", "bearer", "api_key"]
+
+
+class UpstreamSource(Base):
+    """Configuration for an upstream artifact registry.
+
+    Stores connection details and authentication for upstream registries
+    like npm, PyPI, Maven Central, or private Artifactory instances.
+    """
+
+    __tablename__ = "upstream_sources"
+
+    id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
+    name = Column(String(255), unique=True, nullable=False)
+    source_type = Column(String(50), default="generic", nullable=False)
+    url = Column(String(2048), nullable=False)
+    enabled = Column(Boolean, default=False, nullable=False)
+    auth_type = Column(String(20), default="none", nullable=False)
+    username = Column(String(255))
+    password_encrypted = Column(LargeBinary)
+    headers_encrypted = Column(LargeBinary)
+    priority = Column(Integer, default=100, nullable=False)
+    created_at = Column(DateTime(timezone=True), default=datetime.utcnow)
+    updated_at = Column(
+        DateTime(timezone=True), default=datetime.utcnow, onupdate=datetime.utcnow
+    )
+
+    # Relationships
+    cached_urls = relationship("CachedUrl", back_populates="source")
+
+    __table_args__ = (
+        Index("idx_upstream_sources_enabled", "enabled"),
+        Index("idx_upstream_sources_source_type", "source_type"),
+        Index("idx_upstream_sources_priority", "priority"),
+        CheckConstraint(
+            "source_type IN ('npm', 'pypi', 'maven', 'docker', 'helm', 'nuget', 'deb', 'rpm', 'generic')",
+            name="check_source_type",
+        ),
+        CheckConstraint(
+            "auth_type IN ('none', 'basic', 'bearer', 'api_key')",
+            name="check_auth_type",
+        ),
+        CheckConstraint("priority > 0", name="check_priority_positive"),
+    )
+
+    def set_password(self, password: str) -> None:
+        """Encrypt and store a password/token."""
+        from .encryption import encrypt_value
+
+        if password:
+            self.password_encrypted = encrypt_value(password)
+        else:
+            self.password_encrypted = None
+
+    def get_password(self) -> str | None:
+        """Decrypt and return the stored password/token."""
+        from .encryption import decrypt_value
+
+        if self.password_encrypted:
+            try:
+                return decrypt_value(self.password_encrypted)
+            except Exception:
+                return None
+        return None
+
+    def has_password(self) -> bool:
+        """Check if a password/token is stored."""
+        return self.password_encrypted is not None
+
+    def set_headers(self, headers: dict) -> None:
+        """Encrypt and store custom headers as JSON."""
+        from .encryption import encrypt_value
+        import json
+
+        if headers:
+            self.headers_encrypted = encrypt_value(json.dumps(headers))
+        else:
+            self.headers_encrypted = None
+
+    def get_headers(self) -> dict | None:
+        """Decrypt and return custom headers."""
+        from .encryption import decrypt_value
+        import json
+
+        if self.headers_encrypted:
+            try:
+                return json.loads(decrypt_value(self.headers_encrypted))
+            except Exception:
+                return None
+        return None
+
+
+class CacheSettings(Base):
+    """Global cache settings (singleton table).
+
+    Controls behavior of the upstream caching system.
+    """
+
+    __tablename__ = "cache_settings"
+
+    id = Column(Integer, primary_key=True, default=1)
+    auto_create_system_projects = Column(Boolean, default=True, nullable=False)
+    created_at = Column(DateTime(timezone=True), default=datetime.utcnow)
+    updated_at = Column(
+        DateTime(timezone=True), default=datetime.utcnow, onupdate=datetime.utcnow
+    )
+
+    __table_args__ = (
+        CheckConstraint("id = 1", name="check_cache_settings_singleton"),
+    )
+
+
+class CachedUrl(Base):
+    """Tracks URL to artifact mappings for provenance.
+
+    Records which URLs have been cached and maps them to their stored artifacts.
+    Enables "is this URL already cached?" lookups and audit trails.
+    """
+
+    __tablename__ = "cached_urls"
+
+    id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
+    url = Column(String(4096), nullable=False)
+    url_hash = Column(String(64), unique=True, nullable=False)
+    artifact_id = Column(
+        String(64), ForeignKey("artifacts.id"), nullable=False
+    )
+    source_id = Column(
+        UUID(as_uuid=True),
+        ForeignKey("upstream_sources.id", ondelete="SET NULL"),
+    )
+    fetched_at = Column(DateTime(timezone=True), default=datetime.utcnow, nullable=False)
+    response_headers = Column(JSON, default=dict)
+    created_at = Column(DateTime(timezone=True), default=datetime.utcnow)
+
+    # Relationships
+    artifact = relationship("Artifact")
+    source = relationship("UpstreamSource", back_populates="cached_urls")
+
+    __table_args__ = (
+        Index("idx_cached_urls_url_hash", "url_hash"),
+        Index("idx_cached_urls_artifact_id", "artifact_id"),
+        Index("idx_cached_urls_source_id", "source_id"),
+        Index("idx_cached_urls_fetched_at", "fetched_at"),
+    )
+
+    @staticmethod
+    def compute_url_hash(url: str) -> str:
+        """Compute SHA256 hash of a URL for fast lookups."""
+        import hashlib
+        return hashlib.sha256(url.encode("utf-8")).hexdigest()
+
+
+class PyPICacheTask(Base):
+    """Task for caching a PyPI package and its dependencies.
+
+    Tracks the status of background caching operations with retry support.
+    Used by the PyPI proxy to ensure reliable dependency caching.
+    """
+
+    __tablename__ = "pypi_cache_tasks"
+
+    id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
+
+    # What to cache
+    package_name = Column(String(255), nullable=False)
+    version_constraint = Column(String(255))
+
+    # Origin tracking
+    parent_task_id = Column(
+        UUID(as_uuid=True),
+        ForeignKey("pypi_cache_tasks.id", ondelete="SET NULL"),
+    )
+    depth = Column(Integer, nullable=False, default=0)
+    triggered_by_artifact = Column(
+        String(64),
+        ForeignKey("artifacts.id", ondelete="SET NULL"),
+    )
+
+    # Status
+    status = Column(String(20), nullable=False, default="pending")
+    attempts = Column(Integer, nullable=False, default=0)
+    max_attempts = Column(Integer, nullable=False, default=3)
+
+    # Results
+    cached_artifact_id = Column(
+        String(64),
+        ForeignKey("artifacts.id", ondelete="SET NULL"),
+    )
+    error_message = Column(Text)
+
+    # Timing
+    created_at = Column(DateTime(timezone=True), nullable=False, default=datetime.utcnow)
+    started_at = Column(DateTime(timezone=True))
+    completed_at = Column(DateTime(timezone=True))
+    next_retry_at = Column(DateTime(timezone=True))
+
+    # Relationships
+    parent_task = relationship(
+        "PyPICacheTask",
+        remote_side=[id],
+        backref="child_tasks",
+    )
+
+    __table_args__ = (
+        Index("idx_pypi_cache_tasks_status_retry", "status", "next_retry_at"),
+        Index("idx_pypi_cache_tasks_package_status", "package_name", "status"),
+        Index("idx_pypi_cache_tasks_parent", "parent_task_id"),
+        Index("idx_pypi_cache_tasks_triggered_by", "triggered_by_artifact"),
+        Index("idx_pypi_cache_tasks_cached_artifact", "cached_artifact_id"),
+        Index("idx_pypi_cache_tasks_depth_created", "depth", "created_at"),
+        CheckConstraint(
+            "status IN ('pending', 'in_progress', 'completed', 'failed')",
+            name="check_task_status",
+        ),
+        CheckConstraint("depth >= 0", name="check_depth_non_negative"),
+        CheckConstraint("attempts >= 0", name="check_attempts_non_negative"),
+    )
+
+
--- a/backend/app/purge_seed_data.py
+++ b/backend/app/purge_seed_data.py
@@ -0,0 +1,212 @@
+"""
+Purge seed/demo data from the database.
+
+This is used when transitioning an environment from dev/test to production-like.
+Triggered by setting ORCHARD_PURGE_SEED_DATA=true environment variable.
+"""
+import logging
+import os
+from sqlalchemy.orm import Session
+
+from .models import (
+    Project,
+    Package,
+    Artifact,
+    Tag,
+    Upload,
+    PackageVersion,
+    ArtifactDependency,
+    Team,
+    TeamMembership,
+    User,
+    AccessPermission,
+)
+from .storage import get_storage
+
+logger = logging.getLogger(__name__)
+
+# Seed data identifiers (from seed.py)
+SEED_PROJECT_NAMES = [
+    "frontend-libs",
+    "backend-services",
+    "mobile-apps",
+    "internal-tools",
+]
+
+SEED_TEAM_SLUG = "demo-team"
+
+SEED_USERNAMES = [
+    "alice",
+    "bob",
+    "charlie",
+    "diana",
+    "eve",
+    "frank",
+]
+
+
+def should_purge_seed_data() -> bool:
+    """Check if seed data should be purged based on environment variable."""
+    return os.environ.get("ORCHARD_PURGE_SEED_DATA", "").lower() == "true"
+
+
+def purge_seed_data(db: Session) -> dict:
+    """
+    Purge all seed/demo data from the database.
+
+    Returns a dict with counts of deleted items.
+    """
+    logger.warning("PURGING SEED DATA - This will delete demo projects, users, and teams")
+
+    results = {
+        "dependencies_deleted": 0,
+        "tags_deleted": 0,
+        "versions_deleted": 0,
+        "uploads_deleted": 0,
+        "artifacts_deleted": 0,
+        "packages_deleted": 0,
+        "projects_deleted": 0,
+        "permissions_deleted": 0,
+        "team_memberships_deleted": 0,
+        "users_deleted": 0,
+        "teams_deleted": 0,
+        "s3_objects_deleted": 0,
+    }
+
+    storage = get_storage()
+
+    # Find seed projects
+    seed_projects = db.query(Project).filter(Project.name.in_(SEED_PROJECT_NAMES)).all()
+    seed_project_ids = [p.id for p in seed_projects]
+
+    if not seed_projects:
+        logger.info("No seed projects found, nothing to purge")
+        return results
+
+    logger.info(f"Found {len(seed_projects)} seed projects to purge")
+
+    # Find packages in seed projects
+    seed_packages = db.query(Package).filter(Package.project_id.in_(seed_project_ids)).all()
+    seed_package_ids = [p.id for p in seed_packages]
+
+    # Find artifacts in seed packages (via uploads)
+    seed_uploads = db.query(Upload).filter(Upload.package_id.in_(seed_package_ids)).all()
+    seed_artifact_ids = list(set(u.artifact_id for u in seed_uploads))
+
+    # Delete in order (respecting foreign keys)
+
+    # 1. Delete artifact dependencies
+    if seed_artifact_ids:
+        count = db.query(ArtifactDependency).filter(
+            ArtifactDependency.artifact_id.in_(seed_artifact_ids)
+        ).delete(synchronize_session=False)
+        results["dependencies_deleted"] = count
+        logger.info(f"Deleted {count} artifact dependencies")
+
+    # 2. Delete tags
+    if seed_package_ids:
+        count = db.query(Tag).filter(Tag.package_id.in_(seed_package_ids)).delete(
+            synchronize_session=False
+        )
+        results["tags_deleted"] = count
+        logger.info(f"Deleted {count} tags")
+
+    # 3. Delete package versions
+    if seed_package_ids:
+        count = db.query(PackageVersion).filter(
+            PackageVersion.package_id.in_(seed_package_ids)
+        ).delete(synchronize_session=False)
+        results["versions_deleted"] = count
+        logger.info(f"Deleted {count} package versions")
+
+    # 4. Delete uploads
+    if seed_package_ids:
+        count = db.query(Upload).filter(Upload.package_id.in_(seed_package_ids)).delete(
+            synchronize_session=False
+        )
+        results["uploads_deleted"] = count
+        logger.info(f"Deleted {count} uploads")
+
+    # 5. Delete S3 objects for seed artifacts
+    if seed_artifact_ids:
+        seed_artifacts = db.query(Artifact).filter(Artifact.id.in_(seed_artifact_ids)).all()
+        for artifact in seed_artifacts:
+            if artifact.s3_key:
+                try:
+                    storage.client.delete_object(Bucket=storage.bucket, Key=artifact.s3_key)
+                    results["s3_objects_deleted"] += 1
+                except Exception as e:
+                    logger.warning(f"Failed to delete S3 object {artifact.s3_key}: {e}")
+        logger.info(f"Deleted {results['s3_objects_deleted']} S3 objects")
+
+    # 6. Delete artifacts (only those with ref_count that would be 0 after our deletions)
+    # Since we deleted all tags/versions pointing to these artifacts, we can delete them
+    if seed_artifact_ids:
+        count = db.query(Artifact).filter(Artifact.id.in_(seed_artifact_ids)).delete(
+            synchronize_session=False
+        )
+        results["artifacts_deleted"] = count
+        logger.info(f"Deleted {count} artifacts")
+
+    # 7. Delete packages
+    if seed_package_ids:
+        count = db.query(Package).filter(Package.id.in_(seed_package_ids)).delete(
+            synchronize_session=False
+        )
+        results["packages_deleted"] = count
+        logger.info(f"Deleted {count} packages")
+
+    # 8. Delete access permissions for seed projects
+    if seed_project_ids:
+        count = db.query(AccessPermission).filter(
+            AccessPermission.project_id.in_(seed_project_ids)
+        ).delete(synchronize_session=False)
+        results["permissions_deleted"] = count
+        logger.info(f"Deleted {count} access permissions")
+
+    # 9. Delete seed projects
+    count = db.query(Project).filter(Project.name.in_(SEED_PROJECT_NAMES)).delete(
+        synchronize_session=False
+    )
+    results["projects_deleted"] = count
+    logger.info(f"Deleted {count} projects")
+
+    # 10. Find and delete seed team
+    seed_team = db.query(Team).filter(Team.slug == SEED_TEAM_SLUG).first()
+    if seed_team:
+        # Delete team memberships first
+        count = db.query(TeamMembership).filter(
+            TeamMembership.team_id == seed_team.id
+        ).delete(synchronize_session=False)
+        results["team_memberships_deleted"] = count
+        logger.info(f"Deleted {count} team memberships")
+
+        # Delete the team
+        db.delete(seed_team)
+        results["teams_deleted"] = 1
+        logger.info(f"Deleted team: {SEED_TEAM_SLUG}")
+
+    # 11. Delete seed users (but NOT admin)
+    seed_users = db.query(User).filter(User.username.in_(SEED_USERNAMES)).all()
+    for user in seed_users:
+        # Delete any remaining team memberships for this user
+        db.query(TeamMembership).filter(TeamMembership.user_id == user.id).delete(
+            synchronize_session=False
+        )
+        # Delete any access permissions for this user
+        # Note: AccessPermission.user_id is VARCHAR (username), not UUID
+        db.query(AccessPermission).filter(AccessPermission.user_id == user.username).delete(
+            synchronize_session=False
+        )
+        db.delete(user)
+        results["users_deleted"] += 1
+
+    if results["users_deleted"] > 0:
+        logger.info(f"Deleted {results['users_deleted']} seed users")
+
+    db.commit()
+
+    logger.warning("SEED DATA PURGE COMPLETE")
+    logger.info(f"Purge results: {results}")
+
+    return results
--- a/backend/app/pypi_cache_worker.py
+++ b/backend/app/pypi_cache_worker.py
@@ -0,0 +1,576 @@
+"""
+PyPI cache worker module.
+
+Manages a thread pool for background caching of PyPI packages and their dependencies.
+Replaces unbounded thread spawning with a managed queue-based approach.
+"""
+
+import logging
+import re
+import threading
+import time
+from concurrent.futures import ThreadPoolExecutor
+from datetime import datetime, timedelta
+from typing import List, Optional
+from uuid import UUID
+
+import httpx
+from sqlalchemy import or_
+from sqlalchemy.orm import Session
+
+from .config import get_settings
+
+settings = get_settings()
+from .database import SessionLocal
+from .models import PyPICacheTask, Package, Project, Tag
+
+logger = logging.getLogger(__name__)
+
+# Module-level worker pool state
+_cache_worker_pool: Optional[ThreadPoolExecutor] = None
+_cache_worker_running: bool = False
+_dispatcher_thread: Optional[threading.Thread] = None
+
+
+def init_cache_worker_pool(max_workers: Optional[int] = None):
+    """
+    Initialize the cache worker pool. Called on app startup.
+
+    Args:
+        max_workers: Number of concurrent workers. Defaults to PYPI_CACHE_WORKERS setting.
+    """
+    global _cache_worker_pool, _cache_worker_running, _dispatcher_thread
+
+    if _cache_worker_pool is not None:
+        logger.warning("Cache worker pool already initialized")
+        return
+
+    workers = max_workers or settings.PYPI_CACHE_WORKERS
+    _cache_worker_pool = ThreadPoolExecutor(
+        max_workers=workers,
+        thread_name_prefix="pypi-cache-",
+    )
+    _cache_worker_running = True
+
+    # Start the dispatcher thread
+    _dispatcher_thread = threading.Thread(
+        target=_cache_dispatcher_loop,
+        daemon=True,
+        name="pypi-cache-dispatcher",
+    )
+    _dispatcher_thread.start()
+
+    logger.info(f"PyPI cache worker pool initialized with {workers} workers")
+
+
+def shutdown_cache_worker_pool(wait: bool = True, timeout: float = 30.0):
+    """
+    Shutdown the cache worker pool gracefully.
+
+    Args:
+        wait: Whether to wait for pending tasks to complete.
+        timeout: Maximum time to wait for shutdown.
+    """
+    global _cache_worker_pool, _cache_worker_running, _dispatcher_thread
+
+    if _cache_worker_pool is None:
+        return
+
+    logger.info("Shutting down PyPI cache worker pool...")
+    _cache_worker_running = False
+
+    # Wait for dispatcher to stop
+    if _dispatcher_thread and _dispatcher_thread.is_alive():
+        _dispatcher_thread.join(timeout=5.0)
+
+    # Shutdown thread pool
+    _cache_worker_pool.shutdown(wait=wait, cancel_futures=not wait)
+    _cache_worker_pool = None
+    _dispatcher_thread = None
+
+    logger.info("PyPI cache worker pool shut down")
+
+
+def _cache_dispatcher_loop():
+    """
+    Main dispatcher loop: poll DB for pending tasks and submit to worker pool.
+    """
+    logger.info("PyPI cache dispatcher started")
+
+    while _cache_worker_running:
+        try:
+            db = SessionLocal()
+            try:
+                tasks = _get_ready_tasks(db, limit=10)
+
+                for task in tasks:
+                    # Mark in_progress before submitting
+                    task.status = "in_progress"
+                    task.started_at = datetime.utcnow()
+                    db.commit()
+
+                    # Submit to worker pool
+                    _cache_worker_pool.submit(_process_cache_task, task.id)
+
+                # Sleep if no work (avoid busy loop)
+                if not tasks:
+                    time.sleep(2.0)
+                else:
+                    # Small delay between batches to avoid overwhelming
+                    time.sleep(0.1)
+
+            finally:
+                db.close()
+
+        except Exception as e:
+            logger.error(f"PyPI cache dispatcher error: {e}")
+            time.sleep(5.0)
+
+    logger.info("PyPI cache dispatcher stopped")
+
+
+def _get_ready_tasks(db: Session, limit: int = 10) -> List[PyPICacheTask]:
+    """
+    Get tasks ready to process.
+
+    Returns pending tasks that are either new or ready for retry.
+    Orders by depth (shallow first) then creation time (FIFO).
+    """
+    now = datetime.utcnow()
+    return (
+        db.query(PyPICacheTask)
+        .filter(
+            PyPICacheTask.status == "pending",
+            or_(
+                PyPICacheTask.next_retry_at == None,  # New tasks
+                PyPICacheTask.next_retry_at <= now,   # Retry tasks ready
+            ),
+        )
+        .order_by(
+            PyPICacheTask.depth.asc(),      # Prefer shallow deps first
+            PyPICacheTask.created_at.asc(),  # FIFO within same depth
+        )
+        .limit(limit)
+        .all()
+    )
+
+
+def _process_cache_task(task_id: UUID):
+    """
+    Process a single cache task. Called by worker pool.
+
+    Args:
+        task_id: The ID of the task to process.
+    """
+    db = SessionLocal()
+    try:
+        task = db.query(PyPICacheTask).filter(PyPICacheTask.id == task_id).first()
+        if not task:
+            logger.warning(f"PyPI cache task {task_id} not found")
+            return
+
+        logger.info(
+            f"Processing cache task: {task.package_name} "
+            f"(depth={task.depth}, attempt={task.attempts + 1})"
+        )
+
+        # Check if already cached by another task (dedup)
+        existing_artifact = _find_cached_package(db, task.package_name)
+        if existing_artifact:
+            logger.info(f"Package {task.package_name} already cached, skipping")
+            _mark_task_completed(db, task, cached_artifact_id=existing_artifact)
+            return
+
+        # Check depth limit
+        max_depth = settings.PYPI_CACHE_MAX_DEPTH
+        if task.depth >= max_depth:
+            _mark_task_failed(db, task, f"Max depth {max_depth} exceeded")
+            return
+
+        # Do the actual caching
+        result = _fetch_and_cache_package(task.package_name, task.version_constraint)
+
+        if result["success"]:
+            _mark_task_completed(db, task, cached_artifact_id=result.get("artifact_id"))
+            logger.info(f"Successfully cached {task.package_name}")
+        else:
+            _handle_task_failure(db, task, result["error"])
+
+    except Exception as e:
+        logger.exception(f"Error processing cache task {task_id}")
+        db = SessionLocal()  # Get fresh session after exception
+        try:
+            task = db.query(PyPICacheTask).filter(PyPICacheTask.id == task_id).first()
+            if task:
+                _handle_task_failure(db, task, str(e))
+        finally:
+            db.close()
+    finally:
+        db.close()
+
+
+def _find_cached_package(db: Session, package_name: str) -> Optional[str]:
+    """
+    Check if a package is already cached.
+
+    Args:
+        db: Database session.
+        package_name: Normalized package name.
+
+    Returns:
+        Artifact ID if cached, None otherwise.
+    """
+    # Normalize package name (PEP 503)
+    normalized = re.sub(r"[-_.]+", "-", package_name).lower()
+
+    # Check if _pypi project has this package with at least one tag
+    system_project = db.query(Project).filter(Project.name == "_pypi").first()
+    if not system_project:
+        return None
+
+    package = (
+        db.query(Package)
+        .filter(
+            Package.project_id == system_project.id,
+            Package.name == normalized,
+        )
+        .first()
+    )
+    if not package:
+        return None
+
+    # Check if package has any tags (cached files)
+    tag = db.query(Tag).filter(Tag.package_id == package.id).first()
+    if tag:
+        return tag.artifact_id
+
+    return None
+
+
+def _fetch_and_cache_package(
+    package_name: str,
+    version_constraint: Optional[str] = None,
+) -> dict:
+    """
+    Fetch and cache a PyPI package by making requests through our own proxy.
+
+    Args:
+        package_name: The package name to cache.
+        version_constraint: Optional version constraint (currently not used for selection).
+
+    Returns:
+        Dict with "success" bool, "artifact_id" on success, "error" on failure.
+    """
+    # Normalize package name (PEP 503)
+    normalized_name = re.sub(r"[-_.]+", "-", package_name).lower()
+
+    # Build the URL to our own proxy
+    # Use localhost since we're making internal requests
+    base_url = f"http://localhost:{settings.PORT}"
+
+    try:
+        with httpx.Client(timeout=60.0, follow_redirects=True) as client:
+            # Step 1: Get the simple index page
+            simple_url = f"{base_url}/pypi/simple/{normalized_name}/"
+            logger.debug(f"Fetching index: {simple_url}")
+
+            response = client.get(simple_url)
+            if response.status_code == 404:
+                return {"success": False, "error": f"Package {package_name} not found on upstream"}
+            if response.status_code != 200:
+                return {"success": False, "error": f"Failed to get index: HTTP {response.status_code}"}
+
+            # Step 2: Parse HTML to find downloadable files
+            html = response.text
+
+            # Create pattern that matches both normalized (hyphens) and original (underscores)
+            name_pattern = re.sub(r"[-_]+", "[-_]+", normalized_name)
+
+            # Look for wheel files first (preferred)
+            wheel_pattern = rf'href="([^"]*{name_pattern}[^"]*\.whl[^"]*)"'
+            matches = re.findall(wheel_pattern, html, re.IGNORECASE)
+
+            if not matches:
+                # Fall back to sdist
+                sdist_pattern = rf'href="([^"]*{name_pattern}[^"]*\.tar\.gz[^"]*)"'
+                matches = re.findall(sdist_pattern, html, re.IGNORECASE)
+
+            if not matches:
+                logger.warning(
+                    f"No downloadable files found for {package_name}. "
+                    f"Pattern: {wheel_pattern}, HTML preview: {html[:500]}"
+                )
+                return {"success": False, "error": "No downloadable files found"}
+
+            # Get the last match (usually latest version)
+            download_url = matches[-1]
+
+            # Make URL absolute if needed
+            if download_url.startswith("/"):
+                download_url = f"{base_url}{download_url}"
+            elif not download_url.startswith("http"):
+                download_url = f"{base_url}/pypi/simple/{normalized_name}/{download_url}"
+
+            # Step 3: Download the file through our proxy (this caches it)
+            logger.debug(f"Downloading: {download_url}")
+            response = client.get(download_url)
+
+            if response.status_code != 200:
+                return {"success": False, "error": f"Download failed: HTTP {response.status_code}"}
+
+            # Get artifact ID from response header
+            artifact_id = response.headers.get("X-Checksum-SHA256")
+
+            return {"success": True, "artifact_id": artifact_id}
+
+    except httpx.TimeoutException as e:
+        return {"success": False, "error": f"Timeout: {e}"}
+    except httpx.ConnectError as e:
+        return {"success": False, "error": f"Connection failed: {e}"}
+    except Exception as e:
+        return {"success": False, "error": str(e)}
+
+
+def _mark_task_completed(
+    db: Session,
+    task: PyPICacheTask,
+    cached_artifact_id: Optional[str] = None,
+):
+    """Mark a task as completed."""
+    task.status = "completed"
+    task.completed_at = datetime.utcnow()
+    task.cached_artifact_id = cached_artifact_id
+    task.error_message = None
+    db.commit()
+
+
+def _mark_task_failed(db: Session, task: PyPICacheTask, error: str):
+    """Mark a task as permanently failed."""
+    task.status = "failed"
+    task.completed_at = datetime.utcnow()
+    task.error_message = error[:1000] if error else None
+    db.commit()
+    logger.warning(f"PyPI cache task failed permanently: {task.package_name} - {error}")
+
+
+def _handle_task_failure(db: Session, task: PyPICacheTask, error: str):
+    """
+    Handle a failed cache attempt with exponential backoff.
+
+    Args:
+        db: Database session.
+        task: The failed task.
+        error: Error message.
+    """
+    task.attempts += 1
+    task.error_message = error[:1000] if error else None
+
+    max_attempts = task.max_attempts or settings.PYPI_CACHE_MAX_ATTEMPTS
+
+    if task.attempts >= max_attempts:
+        # Give up after max attempts
+        task.status = "failed"
+        task.completed_at = datetime.utcnow()
+        logger.warning(
+            f"PyPI cache task failed permanently: {task.package_name} - {error} "
+            f"(after {task.attempts} attempts)"
+        )
+    else:
+        # Schedule retry with exponential backoff
+        # Attempt 1 failed → retry in 30s
+        # Attempt 2 failed → retry in 60s
+        # Attempt 3 failed → permanent failure (if max_attempts=3)
+        backoff_seconds = 30 * (2 ** (task.attempts - 1))
+        task.status = "pending"
+        task.next_retry_at = datetime.utcnow() + timedelta(seconds=backoff_seconds)
+        logger.info(
+            f"PyPI cache task will retry: {task.package_name} in {backoff_seconds}s "
+            f"(attempt {task.attempts}/{max_attempts})"
+        )
+
+    db.commit()
+
+
+def enqueue_cache_task(
+    db: Session,
+    package_name: str,
+    version_constraint: Optional[str] = None,
+    parent_task_id: Optional[UUID] = None,
+    depth: int = 0,
+    triggered_by_artifact: Optional[str] = None,
+) -> Optional[PyPICacheTask]:
+    """
+    Enqueue a package for caching.
+
+    Performs deduplication: won't create a task if one already exists
+    for the same package in pending/in_progress state, or if the package
+    is already cached.
+
+    Args:
+        db: Database session.
+        package_name: The package name to cache.
+        version_constraint: Optional version constraint.
+        parent_task_id: Parent task that spawned this one.
+        depth: Recursion depth.
+        triggered_by_artifact: Artifact that declared this dependency.
+
+    Returns:
+        The created or existing task, or None if already cached.
+    """
+    # Normalize package name (PEP 503)
+    normalized = re.sub(r"[-_.]+", "-", package_name).lower()
+
+    # Check for existing pending/in_progress task
+    existing_task = (
+        db.query(PyPICacheTask)
+        .filter(
+            PyPICacheTask.package_name == normalized,
+            PyPICacheTask.status.in_(["pending", "in_progress"]),
+        )
+        .first()
+    )
+    if existing_task:
+        logger.debug(f"Task already exists for {normalized}: {existing_task.id}")
+        return existing_task
+
+    # Check if already cached
+    if _find_cached_package(db, normalized):
+        logger.debug(f"Package {normalized} already cached, skipping task creation")
+        return None
+
+    # Create new task
+    task = PyPICacheTask(
+        package_name=normalized,
+        version_constraint=version_constraint,
+        parent_task_id=parent_task_id,
+        depth=depth,
+        triggered_by_artifact=triggered_by_artifact,
+        max_attempts=settings.PYPI_CACHE_MAX_ATTEMPTS,
+    )
+    db.add(task)
+    db.flush()
+
+    logger.info(f"Enqueued cache task for {normalized} (depth={depth})")
+    return task
+
+
+def get_cache_status(db: Session) -> dict:
+    """
+    Get summary of cache task queue status.
+
+    Returns:
+        Dict with counts by status.
+    """
+    from sqlalchemy import func
+
+    stats = (
+        db.query(PyPICacheTask.status, func.count(PyPICacheTask.id))
+        .group_by(PyPICacheTask.status)
+        .all()
+    )
+
+    return {
+        "pending": next((s[1] for s in stats if s[0] == "pending"), 0),
+        "in_progress": next((s[1] for s in stats if s[0] == "in_progress"), 0),
+        "completed": next((s[1] for s in stats if s[0] == "completed"), 0),
+        "failed": next((s[1] for s in stats if s[0] == "failed"), 0),
+    }
+
+
+def get_failed_tasks(db: Session, limit: int = 50) -> List[dict]:
+    """
+    Get list of failed tasks for debugging.
+
+    Args:
+        db: Database session.
+        limit: Maximum number of tasks to return.
+
+    Returns:
+        List of failed task info dicts.
+    """
+    tasks = (
+        db.query(PyPICacheTask)
+        .filter(PyPICacheTask.status == "failed")
+        .order_by(PyPICacheTask.completed_at.desc())
+        .limit(limit)
+        .all()
+    )
+
+    return [
+        {
+            "id": str(task.id),
+            "package": task.package_name,
+            "error": task.error_message,
+            "attempts": task.attempts,
+            "depth": task.depth,
+            "failed_at": task.completed_at.isoformat() if task.completed_at else None,
+        }
+        for task in tasks
+    ]
+
+
+def retry_failed_task(db: Session, package_name: str) -> Optional[PyPICacheTask]:
+    """
+    Reset a failed task to retry.
+
+    Args:
+        db: Database session.
+        package_name: The package name to retry.
+
+    Returns:
+        The reset task, or None if not found.
+    """
+    normalized = re.sub(r"[-_.]+", "-", package_name).lower()
+
+    task = (
+        db.query(PyPICacheTask)
+        .filter(
+            PyPICacheTask.package_name == normalized,
+            PyPICacheTask.status == "failed",
+        )
+        .first()
+    )
+
+    if not task:
+        return None
+
+    task.status = "pending"
+    task.attempts = 0
+    task.next_retry_at = None
+    task.error_message = None
+    task.started_at = None
+    task.completed_at = None
+    db.commit()
+
+    logger.info(f"Reset failed task for retry: {normalized}")
+    return task
+
+
+def retry_all_failed_tasks(db: Session) -> int:
+    """
+    Reset all failed tasks to retry.
+
+    Args:
+        db: Database session.
+
+    Returns:
+        Number of tasks reset.
+    """
+    count = (
+        db.query(PyPICacheTask)
+        .filter(PyPICacheTask.status == "failed")
+        .update(
+            {
+                "status": "pending",
+                "attempts": 0,
+                "next_retry_at": None,
+                "error_message": None,
+                "started_at": None,
+                "completed_at": None,
+            }
+        )
+    )
+    db.commit()
+
+    logger.info(f"Reset {count} failed tasks for retry")
+    return count
--- a/backend/app/pypi_proxy.py
+++ b/backend/app/pypi_proxy.py
@@ -0,0 +1,868 @@
+"""
+Transparent PyPI proxy implementing PEP 503 (Simple API).
+
+Provides endpoints that allow pip to use Orchard as a PyPI index URL.
+Artifacts are cached on first access through configured upstream sources.
+"""
+
+import hashlib
+import logging
+import re
+import tarfile
+import zipfile
+from io import BytesIO
+from typing import Optional, List, Tuple
+from urllib.parse import urljoin, urlparse, quote, unquote
+
+import httpx
+from fastapi import APIRouter, BackgroundTasks, Depends, HTTPException, Request, Response
+from fastapi.responses import StreamingResponse, HTMLResponse
+from sqlalchemy.orm import Session
+
+from .database import get_db
+from .models import UpstreamSource, CachedUrl, Artifact, Project, Package, Tag, PackageVersion, ArtifactDependency
+from .storage import S3Storage, get_storage
+from .config import get_env_upstream_sources
+from .pypi_cache_worker import (
+    enqueue_cache_task,
+    get_cache_status,
+    get_failed_tasks,
+    retry_failed_task,
+    retry_all_failed_tasks,
+)
+
+logger = logging.getLogger(__name__)
+
+router = APIRouter(prefix="/pypi", tags=["pypi-proxy"])
+
+
+def _parse_requires_dist(requires_dist: str) -> Tuple[str, Optional[str]]:
+    """Parse a Requires-Dist line into (package_name, version_constraint).
+
+    Examples:
+        "requests (>=2.25.0)" -> ("requests", ">=2.25.0")
+        "typing-extensions; python_version < '3.8'" -> ("typing-extensions", None)
+        "numpy>=1.21.0" -> ("numpy", ">=1.21.0")
+        "certifi" -> ("certifi", None)
+
+    Returns:
+        Tuple of (normalized_package_name, version_constraint or None)
+    """
+    # Remove any environment markers (after semicolon)
+    if ';' in requires_dist:
+        requires_dist = requires_dist.split(';')[0].strip()
+
+    # Match patterns like "package (>=1.0)" or "package>=1.0" or "package"
+    # Pattern breakdown: package name, optional whitespace, optional version in parens or directly
+    match = re.match(
+        r'^([a-zA-Z0-9][-a-zA-Z0-9._]*)\s*(?:\(([^)]+)\)|([<>=!~][^\s;]+))?',
+        requires_dist.strip()
+    )
+
+    if not match:
+        return None, None
+
+    package_name = match.group(1)
+    # Version can be in parentheses (group 2) or directly after name (group 3)
+    version_constraint = match.group(2) or match.group(3)
+
+    # Normalize package name (PEP 503)
+    normalized_name = re.sub(r'[-_.]+', '-', package_name).lower()
+
+    # Clean up version constraint
+    if version_constraint:
+        version_constraint = version_constraint.strip()
+
+    return normalized_name, version_constraint
+
+
+def _extract_requires_from_metadata(metadata_content: str) -> List[Tuple[str, Optional[str]]]:
+    """Extract all Requires-Dist entries from METADATA/PKG-INFO content.
+
+    Args:
+        metadata_content: The content of a METADATA or PKG-INFO file
+
+    Returns:
+        List of (package_name, version_constraint) tuples
+    """
+    dependencies = []
+
+    for line in metadata_content.split('\n'):
+        if line.startswith('Requires-Dist:'):
+            # Extract the value after "Requires-Dist:"
+            value = line[len('Requires-Dist:'):].strip()
+            pkg_name, version = _parse_requires_dist(value)
+            if pkg_name:
+                dependencies.append((pkg_name, version))
+
+    return dependencies
+
+
+def _extract_metadata_from_wheel(content: bytes) -> Optional[str]:
+    """Extract METADATA file content from a wheel (zip) file.
+
+    Wheel files have structure: {package}-{version}.dist-info/METADATA
+
+    Args:
+        content: The wheel file content as bytes
+
+    Returns:
+        METADATA file content as string, or None if not found
+    """
+    try:
+        with zipfile.ZipFile(BytesIO(content)) as zf:
+            # Find the .dist-info directory
+            for name in zf.namelist():
+                if name.endswith('.dist-info/METADATA'):
+                    return zf.read(name).decode('utf-8', errors='replace')
+    except Exception as e:
+        logger.warning(f"Failed to extract metadata from wheel: {e}")
+    return None
+
+
+def _extract_metadata_from_sdist(content: bytes, filename: str) -> Optional[str]:
+    """Extract PKG-INFO file content from a source distribution (.tar.gz).
+
+    Source distributions have structure: {package}-{version}/PKG-INFO
+
+    Args:
+        content: The tarball content as bytes
+        filename: The original filename (used to determine package name)
+
+    Returns:
+        PKG-INFO file content as string, or None if not found
+    """
+    try:
+        with tarfile.open(fileobj=BytesIO(content), mode='r:gz') as tf:
+            # Find PKG-INFO in the root directory of the archive
+            for member in tf.getmembers():
+                if member.name.endswith('/PKG-INFO') and member.name.count('/') == 1:
+                    f = tf.extractfile(member)
+                    if f:
+                        return f.read().decode('utf-8', errors='replace')
+    except Exception as e:
+        logger.warning(f"Failed to extract metadata from sdist {filename}: {e}")
+    return None
+
+
+def _extract_dependencies(content: bytes, filename: str) -> List[Tuple[str, Optional[str]]]:
+    """Extract dependencies from a PyPI package file.
+
+    Supports wheel (.whl) and source distribution (.tar.gz) formats.
+
+    Args:
+        content: The package file content as bytes
+        filename: The original filename
+
+    Returns:
+        List of (package_name, version_constraint) tuples
+    """
+    metadata = None
+
+    if filename.endswith('.whl'):
+        metadata = _extract_metadata_from_wheel(content)
+    elif filename.endswith('.tar.gz'):
+        metadata = _extract_metadata_from_sdist(content, filename)
+
+    if metadata:
+        return _extract_requires_from_metadata(metadata)
+
+    return []
+
+# Timeout configuration for proxy requests
+PROXY_CONNECT_TIMEOUT = 30.0
+PROXY_READ_TIMEOUT = 60.0
+
+
+def _extract_pypi_version(filename: str) -> Optional[str]:
+    """Extract version from PyPI filename.
+
+    Handles formats like:
+    - cowsay-6.1-py3-none-any.whl
+    - cowsay-1.0.tar.gz
+    - some_package-1.2.3.post1-cp39-cp39-linux_x86_64.whl
+    """
+    # Remove extension
+    if filename.endswith('.whl'):
+        # Wheel: name-version-pytag-abitag-platform.whl
+        parts = filename[:-4].split('-')
+        if len(parts) >= 2:
+            return parts[1]
+    elif filename.endswith('.tar.gz'):
+        # Source: name-version.tar.gz
+        base = filename[:-7]
+        # Find the last hyphen that precedes a version-like string
+        match = re.match(r'^(.+)-(\d+.*)$', base)
+        if match:
+            return match.group(2)
+    elif filename.endswith('.zip'):
+        # Egg/zip: name-version.zip
+        base = filename[:-4]
+        match = re.match(r'^(.+)-(\d+.*)$', base)
+        if match:
+            return match.group(2)
+    return None
+
+
+def _get_pypi_upstream_sources(db: Session) -> list[UpstreamSource]:
+    """Get all enabled upstream sources configured for PyPI."""
+    # Get database sources
+    db_sources = (
+        db.query(UpstreamSource)
+        .filter(
+            UpstreamSource.source_type == "pypi",
+            UpstreamSource.enabled == True,
+        )
+        .order_by(UpstreamSource.priority)
+        .all()
+    )
+
+    # Get env sources
+    env_sources = [
+        s for s in get_env_upstream_sources()
+        if s.source_type == "pypi" and s.enabled
+    ]
+
+    # Combine and sort by priority
+    all_sources = list(db_sources) + list(env_sources)
+    return sorted(all_sources, key=lambda s: s.priority)
+
+
+def _build_auth_headers(source) -> dict:
+    """Build authentication headers for an upstream source."""
+    headers = {}
+
+    if hasattr(source, 'auth_type'):
+        if source.auth_type == "bearer":
+            password = source.get_password() if hasattr(source, 'get_password') else getattr(source, 'password', None)
+            if password:
+                headers["Authorization"] = f"Bearer {password}"
+        elif source.auth_type == "api_key":
+            custom_headers = source.get_headers() if hasattr(source, 'get_headers') else {}
+            if custom_headers:
+                headers.update(custom_headers)
+
+    return headers
+
+
+def _get_basic_auth(source) -> Optional[tuple[str, str]]:
+    """Get basic auth credentials if applicable."""
+    if hasattr(source, 'auth_type') and source.auth_type == "basic":
+        username = getattr(source, 'username', None)
+        if username:
+            password = source.get_password() if hasattr(source, 'get_password') else getattr(source, 'password', '')
+            return (username, password or '')
+    return None
+
+
+def _get_base_url(request: Request) -> str:
+    """
+    Get the external base URL, respecting X-Forwarded-Proto header.
+
+    When behind a reverse proxy that terminates SSL, the request.base_url
+    will show http:// even though the external URL is https://. This function
+    checks the X-Forwarded-Proto header to determine the correct scheme.
+    """
+    base_url = str(request.base_url).rstrip('/')
+
+    # Check for X-Forwarded-Proto header (set by reverse proxies)
+    forwarded_proto = request.headers.get('x-forwarded-proto')
+    if forwarded_proto:
+        # Replace the scheme with the forwarded protocol
+        parsed = urlparse(base_url)
+        base_url = f"{forwarded_proto}://{parsed.netloc}{parsed.path}"
+
+    return base_url
+
+
+def _rewrite_package_links(html: str, base_url: str, package_name: str, upstream_base_url: str) -> str:
+    """
+    Rewrite download links in a PyPI simple page to go through our proxy.
+
+    Args:
+        html: The HTML content from upstream
+        base_url: Our server's base URL
+        package_name: The package name for the URL path
+        upstream_base_url: The upstream URL used to fetch this page (for resolving relative URLs)
+
+    Returns:
+        HTML with rewritten download links
+    """
+    # Pattern to match href attributes in anchor tags
+    # PyPI simple pages have links like:
+    # <a href="https://files.pythonhosted.org/packages/.../file.tar.gz#sha256=...">file.tar.gz</a>
+    # Or relative URLs from Artifactory like:
+    # <a href="../../packages/packages/62/35/.../requests-0.10.0.tar.gz#sha256=...">
+
+    def replace_href(match):
+        original_url = match.group(1)
+
+        # Resolve relative URLs to absolute using the upstream base URL
+        if not original_url.startswith(('http://', 'https://')):
+            # Split off fragment before resolving
+            url_without_fragment = original_url.split('#')[0]
+            fragment_part = original_url[len(url_without_fragment):]
+            absolute_url = urljoin(upstream_base_url, url_without_fragment) + fragment_part
+        else:
+            absolute_url = original_url
+
+        # Extract the filename from the URL
+        parsed = urlparse(absolute_url)
+        path_parts = parsed.path.split('/')
+        filename = path_parts[-1] if path_parts else ''
+
+        # Keep the hash fragment if present
+        fragment = f"#{parsed.fragment}" if parsed.fragment else ""
+
+        # Encode the absolute URL (without fragment) for safe transmission
+        encoded_url = quote(absolute_url.split('#')[0], safe='')
+
+        # Build new URL pointing to our proxy
+        new_url = f"{base_url}/pypi/simple/{package_name}/{filename}?upstream={encoded_url}{fragment}"
+
+        return f'href="{new_url}"'
+
+    # Match href="..." patterns
+    rewritten = re.sub(r'href="([^"]+)"', replace_href, html)
+
+    return rewritten
+
+
+@router.get("/simple/")
+async def pypi_simple_index(
+    request: Request,
+    db: Session = Depends(get_db),
+):
+    """
+    PyPI Simple API index - lists all packages.
+
+    Proxies to the first available upstream PyPI source.
+    """
+    sources = _get_pypi_upstream_sources(db)
+
+    if not sources:
+        raise HTTPException(
+            status_code=503,
+            detail="No PyPI upstream sources configured"
+        )
+
+    # Try each source in priority order
+    last_error = None
+    for source in sources:
+        try:
+            headers = {"User-Agent": "Orchard-PyPI-Proxy/1.0"}
+            headers.update(_build_auth_headers(source))
+            auth = _get_basic_auth(source)
+
+            # Use URL as-is - users should provide full path including /simple
+            simple_url = source.url.rstrip('/') + '/'
+
+            timeout = httpx.Timeout(PROXY_READ_TIMEOUT, connect=PROXY_CONNECT_TIMEOUT)
+
+            with httpx.Client(timeout=timeout, follow_redirects=False) as client:
+                response = client.get(
+                    simple_url,
+                    headers=headers,
+                    auth=auth,
+                )
+
+                # Handle redirects manually to avoid loops
+                if response.status_code in (301, 302, 303, 307, 308):
+                    redirect_url = response.headers.get('location')
+                    if redirect_url:
+                        # Follow the redirect once
+                        response = client.get(
+                            redirect_url,
+                            headers=headers,
+                            auth=auth,
+                            follow_redirects=False,
+                        )
+
+                if response.status_code == 200:
+                    # Return the index as-is (links are to package pages, not files)
+                    # We could rewrite these too, but for now just proxy
+                    content = response.text
+
+                    # Rewrite package links to go through our proxy
+                    base_url = _get_base_url(request)
+                    content = re.sub(
+                        r'href="([^"]+)/"',
+                        lambda m: f'href="{base_url}/pypi/simple/{m.group(1)}/"',
+                        content
+                    )
+
+                    return HTMLResponse(content=content)
+
+                last_error = f"HTTP {response.status_code}"
+
+        except httpx.ConnectError as e:
+            last_error = f"Connection failed: {e}"
+            logger.warning(f"PyPI proxy: failed to connect to {source.url}: {e}")
+        except httpx.TimeoutException as e:
+            last_error = f"Timeout: {e}"
+            logger.warning(f"PyPI proxy: timeout connecting to {source.url}: {e}")
+        except Exception as e:
+            last_error = str(e)
+            logger.warning(f"PyPI proxy: error fetching from {source.url}: {e}")
+
+    raise HTTPException(
+        status_code=502,
+        detail=f"Failed to fetch package index from upstream: {last_error}"
+    )
+
+
+@router.get("/simple/{package_name}/")
+async def pypi_package_versions(
+    request: Request,
+    package_name: str,
+    db: Session = Depends(get_db),
+):
+    """
+    PyPI Simple API package page - lists all versions/files for a package.
+
+    Proxies to upstream and rewrites download links to go through our cache.
+    """
+    sources = _get_pypi_upstream_sources(db)
+
+    if not sources:
+        raise HTTPException(
+            status_code=503,
+            detail="No PyPI upstream sources configured"
+        )
+
+    base_url = _get_base_url(request)
+
+    # Normalize package name (PEP 503)
+    normalized_name = re.sub(r'[-_.]+', '-', package_name).lower()
+
+    # Try each source in priority order
+    last_error = None
+    for source in sources:
+        try:
+            headers = {"User-Agent": "Orchard-PyPI-Proxy/1.0"}
+            headers.update(_build_auth_headers(source))
+            auth = _get_basic_auth(source)
+
+            # Use URL as-is - users should provide full path including /simple
+            package_url = source.url.rstrip('/') + f'/{normalized_name}/'
+            final_url = package_url  # Track final URL after redirects
+
+            timeout = httpx.Timeout(PROXY_READ_TIMEOUT, connect=PROXY_CONNECT_TIMEOUT)
+
+            with httpx.Client(timeout=timeout, follow_redirects=False) as client:
+                response = client.get(
+                    package_url,
+                    headers=headers,
+                    auth=auth,
+                )
+
+                # Handle redirects manually
+                redirect_count = 0
+                while response.status_code in (301, 302, 303, 307, 308) and redirect_count < 5:
+                    redirect_url = response.headers.get('location')
+                    if not redirect_url:
+                        break
+
+                    # Make redirect URL absolute if needed
+                    if not redirect_url.startswith('http'):
+                        redirect_url = urljoin(final_url, redirect_url)
+
+                    final_url = redirect_url  # Update final URL
+
+                    response = client.get(
+                        redirect_url,
+                        headers=headers,
+                        auth=auth,
+                        follow_redirects=False,
+                    )
+                    redirect_count += 1
+
+                if response.status_code == 200:
+                    content = response.text
+
+                    # Rewrite download links to go through our proxy
+                    # Pass final_url so relative URLs can be resolved correctly
+                    content = _rewrite_package_links(content, base_url, normalized_name, final_url)
+
+                    return HTMLResponse(content=content)
+
+                if response.status_code == 404:
+                    # Package not found in this source, try next
+                    last_error = f"Package not found in {source.name}"
+                    continue
+
+                last_error = f"HTTP {response.status_code}"
+
+        except httpx.ConnectError as e:
+            last_error = f"Connection failed: {e}"
+            logger.warning(f"PyPI proxy: failed to connect to {source.url}: {e}")
+        except httpx.TimeoutException as e:
+            last_error = f"Timeout: {e}"
+            logger.warning(f"PyPI proxy: timeout connecting to {source.url}: {e}")
+        except Exception as e:
+            last_error = str(e)
+            logger.warning(f"PyPI proxy: error fetching {package_name} from {source.url}: {e}")
+
+    raise HTTPException(
+        status_code=404,
+        detail=f"Package '{package_name}' not found: {last_error}"
+    )
+
+
+@router.get("/simple/{package_name}/{filename}")
+async def pypi_download_file(
+    request: Request,
+    package_name: str,
+    filename: str,
+    upstream: Optional[str] = None,
+    db: Session = Depends(get_db),
+    storage: S3Storage = Depends(get_storage),
+):
+    """
+    Download a package file, caching it in Orchard.
+
+    Args:
+        package_name: The package name
+        filename: The filename to download
+        upstream: URL-encoded upstream URL to fetch from
+    """
+    if not upstream:
+        raise HTTPException(
+            status_code=400,
+            detail="Missing 'upstream' query parameter with source URL"
+        )
+
+    # Decode the upstream URL
+    upstream_url = unquote(upstream)
+
+    # Check if we already have this URL cached
+    url_hash = hashlib.sha256(upstream_url.encode()).hexdigest()
+    cached_url = db.query(CachedUrl).filter(CachedUrl.url_hash == url_hash).first()
+
+    if cached_url:
+        # Serve from cache
+        artifact = db.query(Artifact).filter(Artifact.id == cached_url.artifact_id).first()
+        if artifact:
+            logger.info(f"PyPI proxy: serving cached {filename} (artifact {artifact.id[:12]})")
+
+            # Stream from S3
+            try:
+                stream, content_length, _ = storage.get_stream(artifact.s3_key)
+
+                def stream_content():
+                    """Generator that yields chunks from the S3 stream."""
+                    try:
+                        for chunk in stream.iter_chunks():
+                            yield chunk
+                    finally:
+                        stream.close()
+
+                return StreamingResponse(
+                    stream_content(),
+                    media_type=artifact.content_type or "application/octet-stream",
+                    headers={
+                        "Content-Disposition": f'attachment; filename="{filename}"',
+                        "Content-Length": str(content_length),
+                        "X-Checksum-SHA256": artifact.id,
+                        "X-Cache": "HIT",
+                    }
+                )
+            except Exception as e:
+                logger.error(f"PyPI proxy: error streaming cached artifact: {e}")
+                # Fall through to fetch from upstream
+
+    # Not cached - fetch from upstream
+    sources = _get_pypi_upstream_sources(db)
+
+    # Use the first available source for authentication headers
+    # Note: The upstream URL may point to files.pythonhosted.org or other CDNs,
+    # not the configured source URL directly, so we can't strictly validate the host
+    matched_source = sources[0] if sources else None
+
+    try:
+        headers = {"User-Agent": "Orchard-PyPI-Proxy/1.0"}
+        if matched_source:
+            headers.update(_build_auth_headers(matched_source))
+        auth = _get_basic_auth(matched_source) if matched_source else None
+
+        timeout = httpx.Timeout(300.0, connect=PROXY_CONNECT_TIMEOUT)  # 5 minutes for large files
+
+        # Fetch the file
+        logger.info(f"PyPI proxy: fetching {filename} from {upstream_url}")
+
+        with httpx.Client(timeout=timeout, follow_redirects=False) as client:
+            response = client.get(
+                upstream_url,
+                headers=headers,
+                auth=auth,
+            )
+
+            # Handle redirects manually
+            redirect_count = 0
+            while response.status_code in (301, 302, 303, 307, 308) and redirect_count < 5:
+                redirect_url = response.headers.get('location')
+                if not redirect_url:
+                    break
+
+                if not redirect_url.startswith('http'):
+                    redirect_url = urljoin(upstream_url, redirect_url)
+
+                logger.info(f"PyPI proxy: following redirect to {redirect_url}")
+
+                # Don't send auth to different hosts
+                redirect_headers = {"User-Agent": "Orchard-PyPI-Proxy/1.0"}
+                redirect_auth = None
+                if urlparse(redirect_url).netloc == urlparse(upstream_url).netloc:
+                    redirect_headers.update(headers)
+                    redirect_auth = auth
+
+                response = client.get(
+                    redirect_url,
+                    headers=redirect_headers,
+                    auth=redirect_auth,
+                    follow_redirects=False,
+                )
+                redirect_count += 1
+
+            if response.status_code != 200:
+                raise HTTPException(
+                    status_code=response.status_code,
+                    detail=f"Upstream returned {response.status_code}"
+                )
+
+            content = response.content
+            content_type = response.headers.get('content-type', 'application/octet-stream')
+
+        # Store in S3 (computes hash and deduplicates automatically)
+        from io import BytesIO
+        result = storage.store(BytesIO(content))
+        sha256 = result.sha256
+        size = result.size
+
+        logger.info(f"PyPI proxy: downloaded {filename}, {size} bytes, sha256={sha256[:12]}")
+
+        # Check if artifact already exists
+        existing = db.query(Artifact).filter(Artifact.id == sha256).first()
+        if existing:
+            # Increment ref count
+            existing.ref_count += 1
+            db.flush()
+        else:
+            # Create artifact record
+            new_artifact = Artifact(
+                id=sha256,
+                original_name=filename,
+                content_type=content_type,
+                size=size,
+                ref_count=1,
+                created_by="pypi-proxy",
+                s3_key=result.s3_key,
+                checksum_md5=result.md5,
+                checksum_sha1=result.sha1,
+                s3_etag=result.s3_etag,
+            )
+            db.add(new_artifact)
+            db.flush()
+
+        # Create/get system project and package
+        system_project = db.query(Project).filter(Project.name == "_pypi").first()
+        if not system_project:
+            system_project = Project(
+                name="_pypi",
+                description="System project for cached PyPI packages",
+                is_public=True,
+                is_system=True,
+                created_by="pypi-proxy",
+            )
+            db.add(system_project)
+            db.flush()
+        elif not system_project.is_system:
+            # Ensure existing project is marked as system
+            system_project.is_system = True
+            db.flush()
+
+        # Normalize package name
+        normalized_name = re.sub(r'[-_.]+', '-', package_name).lower()
+
+        package = db.query(Package).filter(
+            Package.project_id == system_project.id,
+            Package.name == normalized_name,
+        ).first()
+        if not package:
+            package = Package(
+                project_id=system_project.id,
+                name=normalized_name,
+                description=f"PyPI package: {normalized_name}",
+                format="pypi",
+            )
+            db.add(package)
+            db.flush()
+
+        # Create tag with filename
+        existing_tag = db.query(Tag).filter(
+            Tag.package_id == package.id,
+            Tag.name == filename,
+        ).first()
+        if not existing_tag:
+            tag = Tag(
+                package_id=package.id,
+                name=filename,
+                artifact_id=sha256,
+                created_by="pypi-proxy",
+            )
+            db.add(tag)
+
+        # Extract and create version
+        # Only create version for actual package files, not .metadata files
+        version = _extract_pypi_version(filename)
+        if version and not filename.endswith('.metadata'):
+            # Check by version string (the unique constraint is on package_id + version)
+            existing_version = db.query(PackageVersion).filter(
+                PackageVersion.package_id == package.id,
+                PackageVersion.version == version,
+            ).first()
+            if not existing_version:
+                pkg_version = PackageVersion(
+                    package_id=package.id,
+                    artifact_id=sha256,
+                    version=version,
+                    version_source="filename",
+                    created_by="pypi-proxy",
+                )
+                db.add(pkg_version)
+
+        # Cache the URL mapping
+        existing_cached = db.query(CachedUrl).filter(CachedUrl.url_hash == url_hash).first()
+        if not existing_cached:
+            cached_url_record = CachedUrl(
+                url_hash=url_hash,
+                url=upstream_url,
+                artifact_id=sha256,
+            )
+            db.add(cached_url_record)
+
+        # Extract and store dependencies
+        dependencies = _extract_dependencies(content, filename)
+        unique_deps = []
+        if dependencies:
+            # Deduplicate dependencies by package name (keep first occurrence)
+            seen_packages = set()
+            for dep_name, dep_version in dependencies:
+                if dep_name not in seen_packages:
+                    seen_packages.add(dep_name)
+                    unique_deps.append((dep_name, dep_version))
+
+            logger.info(f"PyPI proxy: extracted {len(unique_deps)} dependencies from {filename} (deduped from {len(dependencies)})")
+            for dep_name, dep_version in unique_deps:
+                # Check if this dependency already exists for this artifact
+                existing_dep = db.query(ArtifactDependency).filter(
+                    ArtifactDependency.artifact_id == sha256,
+                    ArtifactDependency.dependency_project == "_pypi",
+                    ArtifactDependency.dependency_package == dep_name,
+                ).first()
+
+                if not existing_dep:
+                    dep = ArtifactDependency(
+                        artifact_id=sha256,
+                        dependency_project="_pypi",
+                        dependency_package=dep_name,
+                        version_constraint=dep_version if dep_version else "*",
+                    )
+                    db.add(dep)
+
+        # Proactively cache dependencies via task queue
+        if unique_deps:
+            for dep_name, dep_version in unique_deps:
+                enqueue_cache_task(
+                    db,
+                    package_name=dep_name,
+                    version_constraint=dep_version,
+                    parent_task_id=None,  # Top-level, triggered by user download
+                    depth=0,
+                    triggered_by_artifact=sha256,
+                )
+            logger.info(f"PyPI proxy: queued {len(unique_deps)} dependencies for caching")
+
+        db.commit()
+
+        # Return the file
+        return Response(
+            content=content,
+            media_type=content_type,
+            headers={
+                "Content-Disposition": f'attachment; filename="{filename}"',
+                "Content-Length": str(size),
+                "X-Checksum-SHA256": sha256,
+                "X-Cache": "MISS",
+            }
+        )
+
+    except httpx.ConnectError as e:
+        raise HTTPException(status_code=502, detail=f"Connection failed: {e}")
+    except httpx.TimeoutException as e:
+        raise HTTPException(status_code=504, detail=f"Timeout: {e}")
+    except HTTPException:
+        raise
+    except Exception as e:
+        logger.exception(f"PyPI proxy: error downloading {filename}")
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+# =============================================================================
+# Cache Status and Management Endpoints
+# =============================================================================
+
+
+@router.get("/cache/status")
+async def pypi_cache_status(db: Session = Depends(get_db)):
+    """
+    Get summary of the PyPI cache task queue.
+
+    Returns counts of tasks by status (pending, in_progress, completed, failed).
+    """
+    return get_cache_status(db)
+
+
+@router.get("/cache/failed")
+async def pypi_cache_failed(
+    limit: int = 50,
+    db: Session = Depends(get_db),
+):
+    """
+    Get list of failed cache tasks for debugging.
+
+    Args:
+        limit: Maximum number of tasks to return (default 50).
+    """
+    return get_failed_tasks(db, limit=limit)
+
+
+@router.post("/cache/retry/{package_name}")
+async def pypi_cache_retry(
+    package_name: str,
+    db: Session = Depends(get_db),
+):
+    """
+    Reset a failed cache task to retry.
+
+    Args:
+        package_name: The package name to retry.
+    """
+    task = retry_failed_task(db, package_name)
+    if not task:
+        raise HTTPException(
+            status_code=404,
+            detail=f"No failed cache task found for package '{package_name}'"
+        )
+    return {"message": f"Retry queued for {task.package_name}", "task_id": str(task.id)}
+
+
+@router.post("/cache/retry-all")
+async def pypi_cache_retry_all(db: Session = Depends(get_db)):
+    """
+    Reset all failed cache tasks to retry.
+
+    Returns the count of tasks that were reset.
+    """
+    count = retry_all_failed_tasks(db)
+    return {"message": f"Queued {count} tasks for retry", "count": count}
--- a/backend/app/routes.py
+++ b/backend/app/routes.py
--- a/backend/app/schemas.py
+++ b/backend/app/schemas.py
@@ -33,6 +33,7 @@ class ProjectResponse(BaseModel):
    name: str
    description: Optional[str]
    is_public: bool
+    is_system: bool = False
    created_at: datetime
    updated_at: datetime
    created_by: str
@@ -1196,3 +1197,277 @@ class TeamMemberResponse(BaseModel):
    class Config:
        from_attributes = True

+
+# =============================================================================
+# Upstream Caching Schemas
+# =============================================================================
+
+# Valid source types
+SOURCE_TYPES = ["npm", "pypi", "maven", "docker", "helm", "nuget", "deb", "rpm", "generic"]
+
+# Valid auth types
+AUTH_TYPES = ["none", "basic", "bearer", "api_key"]
+
+
+class UpstreamSourceCreate(BaseModel):
+    """Create a new upstream source"""
+    name: str
+    source_type: str = "generic"
+    url: str
+    enabled: bool = False
+    auth_type: str = "none"
+    username: Optional[str] = None
+    password: Optional[str] = None  # Write-only
+    headers: Optional[dict] = None  # Write-only, custom headers
+    priority: int = 100
+
+    @field_validator('name')
+    @classmethod
+    def validate_name(cls, v: str) -> str:
+        v = v.strip()
+        if not v:
+            raise ValueError("name cannot be empty")
+        if len(v) > 255:
+            raise ValueError("name must be 255 characters or less")
+        return v
+
+    @field_validator('source_type')
+    @classmethod
+    def validate_source_type(cls, v: str) -> str:
+        if v not in SOURCE_TYPES:
+            raise ValueError(f"source_type must be one of: {', '.join(SOURCE_TYPES)}")
+        return v
+
+    @field_validator('url')
+    @classmethod
+    def validate_url(cls, v: str) -> str:
+        v = v.strip()
+        if not v:
+            raise ValueError("url cannot be empty")
+        if not (v.startswith('http://') or v.startswith('https://')):
+            raise ValueError("url must start with http:// or https://")
+        if len(v) > 2048:
+            raise ValueError("url must be 2048 characters or less")
+        return v
+
+    @field_validator('auth_type')
+    @classmethod
+    def validate_auth_type(cls, v: str) -> str:
+        if v not in AUTH_TYPES:
+            raise ValueError(f"auth_type must be one of: {', '.join(AUTH_TYPES)}")
+        return v
+
+    @field_validator('priority')
+    @classmethod
+    def validate_priority(cls, v: int) -> int:
+        if v <= 0:
+            raise ValueError("priority must be greater than 0")
+        return v
+
+
+class UpstreamSourceUpdate(BaseModel):
+    """Update an upstream source (partial)"""
+    name: Optional[str] = None
+    source_type: Optional[str] = None
+    url: Optional[str] = None
+    enabled: Optional[bool] = None
+    auth_type: Optional[str] = None
+    username: Optional[str] = None
+    password: Optional[str] = None  # Write-only, None = keep existing, empty string = clear
+    headers: Optional[dict] = None  # Write-only
+    priority: Optional[int] = None
+
+    @field_validator('name')
+    @classmethod
+    def validate_name(cls, v: Optional[str]) -> Optional[str]:
+        if v is not None:
+            v = v.strip()
+            if not v:
+                raise ValueError("name cannot be empty")
+            if len(v) > 255:
+                raise ValueError("name must be 255 characters or less")
+        return v
+
+    @field_validator('source_type')
+    @classmethod
+    def validate_source_type(cls, v: Optional[str]) -> Optional[str]:
+        if v is not None and v not in SOURCE_TYPES:
+            raise ValueError(f"source_type must be one of: {', '.join(SOURCE_TYPES)}")
+        return v
+
+    @field_validator('url')
+    @classmethod
+    def validate_url(cls, v: Optional[str]) -> Optional[str]:
+        if v is not None:
+            v = v.strip()
+            if not v:
+                raise ValueError("url cannot be empty")
+            if not (v.startswith('http://') or v.startswith('https://')):
+                raise ValueError("url must start with http:// or https://")
+            if len(v) > 2048:
+                raise ValueError("url must be 2048 characters or less")
+        return v
+
+    @field_validator('auth_type')
+    @classmethod
+    def validate_auth_type(cls, v: Optional[str]) -> Optional[str]:
+        if v is not None and v not in AUTH_TYPES:
+            raise ValueError(f"auth_type must be one of: {', '.join(AUTH_TYPES)}")
+        return v
+
+    @field_validator('priority')
+    @classmethod
+    def validate_priority(cls, v: Optional[int]) -> Optional[int]:
+        if v is not None and v <= 0:
+            raise ValueError("priority must be greater than 0")
+        return v
+
+
+class UpstreamSourceResponse(BaseModel):
+    """Upstream source response (credentials never included)"""
+    id: UUID
+    name: str
+    source_type: str
+    url: str
+    enabled: bool
+    auth_type: str
+    username: Optional[str]
+    has_password: bool  # True if password is set
+    has_headers: bool  # True if custom headers are set
+    priority: int
+    source: str = "database"  # "database" or "env" (env = defined via environment variables)
+    created_at: Optional[datetime] = None  # May be None for legacy/env data
+    updated_at: Optional[datetime] = None  # May be None for legacy/env data
+
+    class Config:
+        from_attributes = True
+
+
+class CacheSettingsResponse(BaseModel):
+    """Global cache settings response"""
+    auto_create_system_projects: bool
+    auto_create_system_projects_env_override: Optional[bool] = None  # Set if overridden by env var
+    created_at: Optional[datetime] = None  # May be None for legacy data
+    updated_at: Optional[datetime] = None  # May be None for legacy data
+
+    class Config:
+        from_attributes = True
+
+
+class CacheSettingsUpdate(BaseModel):
+    """Update cache settings (partial)"""
+    auto_create_system_projects: Optional[bool] = None
+
+
+class CachedUrlResponse(BaseModel):
+    """Cached URL response"""
+    id: UUID
+    url: str
+    url_hash: str
+    artifact_id: str
+    source_id: Optional[UUID]
+    source_name: Optional[str] = None  # Populated from join
+    fetched_at: datetime
+    created_at: datetime
+
+    class Config:
+        from_attributes = True
+
+
+class CacheRequest(BaseModel):
+    """Request to cache an artifact from an upstream URL"""
+    url: str
+    source_type: str
+    package_name: Optional[str] = None  # Auto-derived from URL if not provided
+    tag: Optional[str] = None  # Auto-derived from URL if not provided
+    user_project: Optional[str] = None  # Cross-reference to user project
+    user_package: Optional[str] = None
+    user_tag: Optional[str] = None
+    expected_hash: Optional[str] = None  # Verify downloaded content
+
+    @field_validator('url')
+    @classmethod
+    def validate_url(cls, v: str) -> str:
+        v = v.strip()
+        if not v:
+            raise ValueError("url cannot be empty")
+        if not (v.startswith('http://') or v.startswith('https://')):
+            raise ValueError("url must start with http:// or https://")
+        if len(v) > 4096:
+            raise ValueError("url must be 4096 characters or less")
+        return v
+
+    @field_validator('source_type')
+    @classmethod
+    def validate_source_type(cls, v: str) -> str:
+        if v not in SOURCE_TYPES:
+            raise ValueError(f"source_type must be one of: {', '.join(SOURCE_TYPES)}")
+        return v
+
+    @field_validator('expected_hash')
+    @classmethod
+    def validate_expected_hash(cls, v: Optional[str]) -> Optional[str]:
+        if v is not None:
+            v = v.strip().lower()
+            # Remove sha256: prefix if present
+            if v.startswith('sha256:'):
+                v = v[7:]
+            # Validate hex format
+            if len(v) != 64 or not all(c in '0123456789abcdef' for c in v):
+                raise ValueError("expected_hash must be a 64-character hex string (SHA256)")
+        return v
+
+
+class CacheResponse(BaseModel):
+    """Response from caching an artifact"""
+    artifact_id: str
+    sha256: str
+    size: int
+    content_type: Optional[str]
+    already_cached: bool
+    source_url: str
+    source_name: Optional[str]
+    system_project: str
+    system_package: str
+    system_tag: Optional[str]
+    user_reference: Optional[str] = None  # e.g., "my-app/npm-deps:lodash-4.17.21"
+
+
+class CacheResolveRequest(BaseModel):
+    """Request to cache an artifact by package coordinates (no URL required).
+
+    The server will construct the appropriate URL based on source_type and
+    configured upstream sources.
+    """
+    source_type: str
+    package: str
+    version: str
+    user_project: Optional[str] = None
+    user_package: Optional[str] = None
+    user_tag: Optional[str] = None
+
+    @field_validator('source_type')
+    @classmethod
+    def validate_source_type(cls, v: str) -> str:
+        if v not in SOURCE_TYPES:
+            raise ValueError(f"source_type must be one of: {', '.join(SOURCE_TYPES)}")
+        return v
+
+    @field_validator('package')
+    @classmethod
+    def validate_package(cls, v: str) -> str:
+        v = v.strip()
+        if not v:
+            raise ValueError("package cannot be empty")
+        return v
+
+    @field_validator('version')
+    @classmethod
+    def validate_version(cls, v: str) -> str:
+        v = v.strip()
+        if not v:
+            raise ValueError("version cannot be empty")
+        return v
+
+
+
--- a/backend/app/upstream.py
+++ b/backend/app/upstream.py
@@ -0,0 +1,565 @@
+"""
+HTTP client for fetching artifacts from upstream sources.
+
+Provides streaming downloads with SHA256 computation, authentication support,
+and automatic source matching based on URL prefixes.
+"""
+
+from __future__ import annotations
+
+import hashlib
+import logging
+import tempfile
+import time
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import BinaryIO, Optional, TYPE_CHECKING
+from urllib.parse import urlparse
+
+import httpx
+
+if TYPE_CHECKING:
+    from .models import CacheSettings, UpstreamSource
+
+logger = logging.getLogger(__name__)
+
+
+class UpstreamError(Exception):
+    """Base exception for upstream client errors."""
+
+    pass
+
+
+class UpstreamConnectionError(UpstreamError):
+    """Connection to upstream failed (network error, DNS, etc.)."""
+
+    pass
+
+
+class UpstreamTimeoutError(UpstreamError):
+    """Request to upstream timed out."""
+
+    pass
+
+
+class UpstreamHTTPError(UpstreamError):
+    """Upstream returned an HTTP error response."""
+
+    def __init__(self, message: str, status_code: int, response_headers: dict = None):
+        super().__init__(message)
+        self.status_code = status_code
+        self.response_headers = response_headers or {}
+
+
+class UpstreamSSLError(UpstreamError):
+    """SSL/TLS error when connecting to upstream."""
+
+    pass
+
+
+
+
+class FileSizeExceededError(UpstreamError):
+    """File size exceeds the maximum allowed."""
+
+    def __init__(self, message: str, content_length: int, max_size: int):
+        super().__init__(message)
+        self.content_length = content_length
+        self.max_size = max_size
+
+
+class SourceNotFoundError(UpstreamError):
+    """No matching upstream source found for URL."""
+
+    pass
+
+
+class SourceDisabledError(UpstreamError):
+    """The matching upstream source is disabled."""
+
+    pass
+
+
+@dataclass
+class FetchResult:
+    """Result of fetching an artifact from upstream."""
+
+    content: BinaryIO  # File-like object with content
+    sha256: str  # SHA256 hash of content
+    size: int  # Size in bytes
+    content_type: Optional[str]  # Content-Type header
+    response_headers: dict  # All response headers for provenance
+    source_name: Optional[str] = None  # Name of matched upstream source
+    temp_path: Optional[Path] = None  # Path to temp file (for cleanup)
+
+    def close(self):
+        """Close and clean up resources."""
+        if self.content:
+            try:
+                self.content.close()
+            except Exception:
+                pass
+        if self.temp_path and self.temp_path.exists():
+            try:
+                self.temp_path.unlink()
+            except Exception:
+                pass
+
+
+@dataclass
+class UpstreamClientConfig:
+    """Configuration for the upstream client."""
+
+    connect_timeout: float = 30.0  # Connection timeout in seconds
+    read_timeout: float = 300.0  # Read timeout in seconds (5 minutes for large files)
+    max_retries: int = 3  # Maximum number of retry attempts
+    retry_backoff_base: float = 1.0  # Base delay for exponential backoff
+    retry_backoff_max: float = 30.0  # Maximum delay between retries
+    follow_redirects: bool = True  # Whether to follow redirects
+    max_redirects: int = 5  # Maximum number of redirects to follow
+    max_file_size: Optional[int] = None  # Maximum file size (None = unlimited)
+    verify_ssl: bool = True  # Verify SSL certificates
+    user_agent: str = "Orchard-UpstreamClient/1.0"
+
+
+class UpstreamClient:
+    """
+    HTTP client for fetching artifacts from upstream sources.
+
+    Supports streaming downloads, multiple authentication methods,
+    automatic source matching, and air-gap mode enforcement.
+    """
+
+    def __init__(
+        self,
+        sources: list[UpstreamSource] = None,
+        cache_settings: CacheSettings = None,
+        config: UpstreamClientConfig = None,
+    ):
+        """
+        Initialize the upstream client.
+
+        Args:
+            sources: List of upstream sources for URL matching and auth.
+                    Should be sorted by priority (lowest first).
+            cache_settings: Global cache settings including air-gap mode.
+            config: Client configuration options.
+        """
+        self.sources = sources or []
+        self.cache_settings = cache_settings
+        self.config = config or UpstreamClientConfig()
+
+        # Sort sources by priority (lower = higher priority)
+        self.sources = sorted(self.sources, key=lambda s: s.priority)
+
+    def _match_source(self, url: str) -> Optional[UpstreamSource]:
+        """
+        Find the upstream source that matches the given URL.
+
+        Matches by URL prefix, returns the highest priority match.
+
+        Args:
+            url: The URL to match.
+
+        Returns:
+            The matching UpstreamSource or None if no match.
+        """
+        for source in self.sources:
+            # Check if URL starts with source URL (prefix match)
+            if url.startswith(source.url.rstrip("/")):
+                return source
+
+        return None
+
+    def _build_auth_headers(self, source: UpstreamSource) -> dict:
+        """
+        Build authentication headers for the given source.
+
+        Args:
+            source: The upstream source with auth configuration.
+
+        Returns:
+            Dictionary of headers to add to the request.
+        """
+        headers = {}
+
+        if source.auth_type == "none":
+            pass
+        elif source.auth_type == "basic":
+            # httpx handles basic auth via auth parameter, but we can also
+            # do it manually if needed. We'll use the auth parameter instead.
+            pass
+        elif source.auth_type == "bearer":
+            password = source.get_password()
+            if password:
+                headers["Authorization"] = f"Bearer {password}"
+        elif source.auth_type == "api_key":
+            # API key auth uses custom headers
+            custom_headers = source.get_headers()
+            if custom_headers:
+                headers.update(custom_headers)
+
+        return headers
+
+    def _get_basic_auth(self, source: UpstreamSource) -> Optional[tuple[str, str]]:
+        """
+        Get basic auth credentials if applicable.
+
+        Args:
+            source: The upstream source.
+
+        Returns:
+            Tuple of (username, password) or None.
+        """
+        if source.auth_type == "basic" and source.username:
+            password = source.get_password() or ""
+            return (source.username, password)
+        return None
+
+    def _should_retry(self, error: Exception, attempt: int) -> bool:
+        """
+        Determine if a request should be retried.
+
+        Args:
+            error: The exception that occurred.
+            attempt: Current attempt number (0-indexed).
+
+        Returns:
+            True if the request should be retried.
+        """
+        if attempt >= self.config.max_retries - 1:
+            return False
+
+        # Retry on connection errors and timeouts
+        if isinstance(error, (httpx.ConnectError, httpx.ConnectTimeout)):
+            return True
+
+        # Retry on read timeouts
+        if isinstance(error, httpx.ReadTimeout):
+            return True
+
+        # Retry on certain HTTP errors (502, 503, 504)
+        if isinstance(error, httpx.HTTPStatusError):
+            return error.response.status_code in (502, 503, 504)
+
+        return False
+
+    def _calculate_backoff(self, attempt: int) -> float:
+        """
+        Calculate backoff delay for retry.
+
+        Uses exponential backoff with jitter.
+
+        Args:
+            attempt: Current attempt number (0-indexed).
+
+        Returns:
+            Delay in seconds.
+        """
+        import random
+
+        delay = self.config.retry_backoff_base * (2**attempt)
+        # Add jitter (±25%)
+        delay *= 0.75 + random.random() * 0.5
+        return min(delay, self.config.retry_backoff_max)
+
+    def fetch(self, url: str, expected_hash: Optional[str] = None) -> FetchResult:
+        """
+        Fetch an artifact from the given URL.
+
+        Streams the response to a temp file while computing the SHA256 hash.
+        Handles authentication, retries, and error cases.
+
+        Args:
+            url: The URL to fetch.
+            expected_hash: Optional expected SHA256 hash for verification.
+
+        Returns:
+            FetchResult with content, hash, size, and headers.
+
+        Raises:
+            SourceDisabledError: If the matching source is disabled.
+            UpstreamConnectionError: On connection failures.
+            UpstreamTimeoutError: On timeout.
+            UpstreamHTTPError: On HTTP error responses.
+            UpstreamSSLError: On SSL/TLS errors.
+            FileSizeExceededError: If Content-Length exceeds max_file_size.
+        """
+        start_time = time.time()
+
+        # Match URL to source
+        source = self._match_source(url)
+
+        # Check if source is enabled (if we have a match)
+        if source is not None and not source.enabled:
+            raise SourceDisabledError(
+                f"Upstream source '{source.name}' is disabled"
+            )
+
+        source_name = source.name if source else None
+        logger.info(
+            f"Fetching URL: {url} (source: {source_name or 'none'})"
+        )
+
+        # Build request parameters
+        headers = {"User-Agent": self.config.user_agent}
+        auth = None
+
+        if source:
+            headers.update(self._build_auth_headers(source))
+            auth = self._get_basic_auth(source)
+
+        timeout = httpx.Timeout(
+            connect=self.config.connect_timeout,
+            read=self.config.read_timeout,
+            write=30.0,
+            pool=10.0,
+        )
+
+        # Attempt fetch with retries
+        last_error = None
+        for attempt in range(self.config.max_retries):
+            try:
+                return self._do_fetch(
+                    url=url,
+                    headers=headers,
+                    auth=auth,
+                    timeout=timeout,
+                    source_name=source_name,
+                    start_time=start_time,
+                    expected_hash=expected_hash,
+                )
+            except (
+                httpx.ConnectError,
+                httpx.ConnectTimeout,
+                httpx.ReadTimeout,
+                httpx.HTTPStatusError,
+            ) as e:
+                last_error = e
+                if self._should_retry(e, attempt):
+                    delay = self._calculate_backoff(attempt)
+                    logger.warning(
+                        f"Fetch failed (attempt {attempt + 1}/{self.config.max_retries}), "
+                        f"retrying in {delay:.1f}s: {e}"
+                    )
+                    time.sleep(delay)
+                else:
+                    break
+
+        # Convert final error to our exception types
+        self._raise_upstream_error(last_error, url)
+
+    def _do_fetch(
+        self,
+        url: str,
+        headers: dict,
+        auth: Optional[tuple[str, str]],
+        timeout: httpx.Timeout,
+        source_name: Optional[str],
+        start_time: float,
+        expected_hash: Optional[str] = None,
+    ) -> FetchResult:
+        """
+        Perform the actual fetch operation.
+
+        Args:
+            url: URL to fetch.
+            headers: Request headers.
+            auth: Basic auth credentials or None.
+            timeout: Request timeout configuration.
+            source_name: Name of matched source for logging.
+            start_time: Request start time for timing.
+            expected_hash: Optional expected hash for verification.
+
+        Returns:
+            FetchResult with content and metadata.
+        """
+        with httpx.Client(
+            timeout=timeout,
+            follow_redirects=self.config.follow_redirects,
+            max_redirects=self.config.max_redirects,
+            verify=self.config.verify_ssl,
+        ) as client:
+            with client.stream("GET", url, headers=headers, auth=auth) as response:
+                # Check for HTTP errors
+                response.raise_for_status()
+
+                # Check Content-Length against max size
+                content_length = response.headers.get("content-length")
+                if content_length:
+                    content_length = int(content_length)
+                    if (
+                        self.config.max_file_size
+                        and content_length > self.config.max_file_size
+                    ):
+                        raise FileSizeExceededError(
+                            f"File size {content_length} exceeds maximum {self.config.max_file_size}",
+                            content_length,
+                            self.config.max_file_size,
+                        )
+
+                # Stream to temp file while computing hash
+                hasher = hashlib.sha256()
+                size = 0
+
+                # Create temp file
+                temp_file = tempfile.NamedTemporaryFile(
+                    delete=False, prefix="orchard_upstream_"
+                )
+                temp_path = Path(temp_file.name)
+
+                try:
+                    for chunk in response.iter_bytes(chunk_size=65536):
+                        temp_file.write(chunk)
+                        hasher.update(chunk)
+                        size += len(chunk)
+
+                        # Check size while streaming if max_file_size is set
+                        if self.config.max_file_size and size > self.config.max_file_size:
+                            temp_file.close()
+                            temp_path.unlink()
+                            raise FileSizeExceededError(
+                                f"Downloaded size {size} exceeds maximum {self.config.max_file_size}",
+                                size,
+                                self.config.max_file_size,
+                            )
+
+                    temp_file.close()
+
+                    sha256 = hasher.hexdigest()
+
+                    # Verify hash if expected
+                    if expected_hash and sha256 != expected_hash.lower():
+                        temp_path.unlink()
+                        raise UpstreamError(
+                            f"Hash mismatch: expected {expected_hash}, got {sha256}"
+                        )
+
+                    # Capture response headers
+                    response_headers = dict(response.headers)
+
+                    # Get content type
+                    content_type = response.headers.get("content-type")
+
+                    elapsed = time.time() - start_time
+                    logger.info(
+                        f"Fetched {url}: {size} bytes, sha256={sha256[:12]}..., "
+                        f"source={source_name}, time={elapsed:.2f}s"
+                    )
+
+                    # Return file handle positioned at start
+                    content = open(temp_path, "rb")
+
+                    return FetchResult(
+                        content=content,
+                        sha256=sha256,
+                        size=size,
+                        content_type=content_type,
+                        response_headers=response_headers,
+                        source_name=source_name,
+                        temp_path=temp_path,
+                    )
+
+                except Exception:
+                    # Clean up on error
+                    try:
+                        temp_file.close()
+                    except Exception:
+                        pass
+                    if temp_path.exists():
+                        temp_path.unlink()
+                    raise
+
+    def _raise_upstream_error(self, error: Exception, url: str):
+        """
+        Convert httpx exception to appropriate UpstreamError.
+
+        Args:
+            error: The httpx exception.
+            url: The URL that was being fetched.
+
+        Raises:
+            Appropriate UpstreamError subclass.
+        """
+        if error is None:
+            raise UpstreamError(f"Unknown error fetching {url}")
+
+        if isinstance(error, httpx.ConnectError):
+            raise UpstreamConnectionError(
+                f"Failed to connect to upstream: {error}"
+            ) from error
+
+        if isinstance(error, (httpx.ConnectTimeout, httpx.ReadTimeout)):
+            raise UpstreamTimeoutError(
+                f"Request timed out: {error}"
+            ) from error
+
+        if isinstance(error, httpx.HTTPStatusError):
+            raise UpstreamHTTPError(
+                f"HTTP {error.response.status_code}: {error}",
+                error.response.status_code,
+                dict(error.response.headers),
+            ) from error
+
+        # Check for SSL errors in the error chain
+        if "ssl" in str(error).lower() or "certificate" in str(error).lower():
+            raise UpstreamSSLError(f"SSL/TLS error: {error}") from error
+
+        raise UpstreamError(f"Error fetching {url}: {error}") from error
+
+    def test_connection(self, source: UpstreamSource) -> tuple[bool, Optional[str], Optional[int]]:
+        """
+        Test connectivity to an upstream source.
+
+        Performs a HEAD request to the source URL to verify connectivity
+        and authentication. Does not follow redirects - a 3xx response
+        is considered successful since it proves the server is reachable.
+
+        Args:
+            source: The upstream source to test.
+
+        Returns:
+            Tuple of (success, error_message, status_code).
+        """
+        headers = {"User-Agent": self.config.user_agent}
+        headers.update(self._build_auth_headers(source))
+        auth = self._get_basic_auth(source)
+
+        timeout = httpx.Timeout(
+            connect=self.config.connect_timeout,
+            read=30.0,
+            write=30.0,
+            pool=10.0,
+        )
+
+        try:
+            with httpx.Client(
+                timeout=timeout,
+                verify=self.config.verify_ssl,
+            ) as client:
+                response = client.head(
+                    source.url,
+                    headers=headers,
+                    auth=auth,
+                    follow_redirects=False,
+                )
+                # Consider 2xx and 3xx as success, also 405 (Method Not Allowed)
+                # since some servers don't support HEAD
+                if response.status_code < 400 or response.status_code == 405:
+                    return (True, None, response.status_code)
+                else:
+                    return (
+                        False,
+                        f"HTTP {response.status_code}",
+                        response.status_code,
+                    )
+        except httpx.ConnectError as e:
+            return (False, f"Connection failed: {e}", None)
+        except httpx.ConnectTimeout as e:
+            return (False, f"Connection timed out: {e}", None)
+        except httpx.ReadTimeout as e:
+            return (False, f"Read timed out: {e}", None)
+        except httpx.TooManyRedirects as e:
+            return (False, f"Too many redirects: {e}", None)
+        except Exception as e:
+            return (False, f"Error: {e}", None)
--- a/backend/requirements.txt
+++ b/backend/requirements.txt
@@ -11,10 +11,10 @@ python-jose[cryptography]==3.3.0
 passlib[bcrypt]==1.7.4
 bcrypt==4.0.1
 slowapi==0.1.9
+httpx>=0.25.0

 # Test dependencies
 pytest>=7.4.0
 pytest-asyncio>=0.21.0
 pytest-cov>=4.1.0
-httpx>=0.25.0
 moto[s3]>=4.2.0
--- a/backend/scripts/init.py
+++ b/backend/scripts/init.py
@@ -0,0 +1 @@
+# Scripts package
--- a/backend/scripts/backfill_pypi_dependencies.py
+++ b/backend/scripts/backfill_pypi_dependencies.py
@@ -0,0 +1,262 @@
+#!/usr/bin/env python3
+"""
+Backfill script to extract dependencies from cached PyPI packages.
+
+This script scans all artifacts in the _pypi project and extracts
+Requires-Dist metadata from wheel and sdist files that don't already
+have dependencies recorded.
+
+Usage:
+    # From within the container:
+    python -m scripts.backfill_pypi_dependencies
+
+    # Or with docker exec:
+    docker exec orchard_orchard-server_1 python -m scripts.backfill_pypi_dependencies
+
+    # Dry run (preview only):
+    docker exec orchard_orchard-server_1 python -m scripts.backfill_pypi_dependencies --dry-run
+"""
+
+import argparse
+import logging
+import re
+import sys
+import tarfile
+import zipfile
+from io import BytesIO
+from typing import List, Optional, Tuple
+
+# Add parent directory to path for imports
+sys.path.insert(0, "/app")
+
+from sqlalchemy import create_engine
+from sqlalchemy.orm import sessionmaker
+
+from backend.app.config import get_settings
+from backend.app.models import (
+    Artifact,
+    ArtifactDependency,
+    Package,
+    Project,
+    Tag,
+)
+from backend.app.storage import get_storage
+
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s - %(levelname)s - %(message)s",
+)
+logger = logging.getLogger(__name__)
+
+
+def parse_requires_dist(requires_dist: str) -> Tuple[Optional[str], Optional[str]]:
+    """Parse a Requires-Dist line into (package_name, version_constraint)."""
+    # Remove any environment markers (after semicolon)
+    if ";" in requires_dist:
+        requires_dist = requires_dist.split(";")[0].strip()
+
+    # Match patterns like "package (>=1.0)" or "package>=1.0" or "package"
+    match = re.match(
+        r"^([a-zA-Z0-9][-a-zA-Z0-9._]*)\s*(?:\(([^)]+)\)|([<>=!~][^\s;]+))?",
+        requires_dist.strip(),
+    )
+
+    if not match:
+        return None, None
+
+    package_name = match.group(1)
+    version_constraint = match.group(2) or match.group(3)
+
+    # Normalize package name (PEP 503)
+    normalized_name = re.sub(r"[-_.]+", "-", package_name).lower()
+
+    if version_constraint:
+        version_constraint = version_constraint.strip()
+
+    return normalized_name, version_constraint
+
+
+def extract_requires_from_metadata(metadata_content: str) -> List[Tuple[str, Optional[str]]]:
+    """Extract all Requires-Dist entries from METADATA/PKG-INFO content."""
+    dependencies = []
+
+    for line in metadata_content.split("\n"):
+        if line.startswith("Requires-Dist:"):
+            value = line[len("Requires-Dist:"):].strip()
+            pkg_name, version = parse_requires_dist(value)
+            if pkg_name:
+                dependencies.append((pkg_name, version))
+
+    return dependencies
+
+
+def extract_metadata_from_wheel(content: bytes) -> Optional[str]:
+    """Extract METADATA file content from a wheel (zip) file."""
+    try:
+        with zipfile.ZipFile(BytesIO(content)) as zf:
+            for name in zf.namelist():
+                if name.endswith(".dist-info/METADATA"):
+                    return zf.read(name).decode("utf-8", errors="replace")
+    except Exception as e:
+        logger.warning(f"Failed to extract metadata from wheel: {e}")
+    return None
+
+
+def extract_metadata_from_sdist(content: bytes) -> Optional[str]:
+    """Extract PKG-INFO file content from a source distribution (.tar.gz)."""
+    try:
+        with tarfile.open(fileobj=BytesIO(content), mode="r:gz") as tf:
+            for member in tf.getmembers():
+                if member.name.endswith("/PKG-INFO") and member.name.count("/") == 1:
+                    f = tf.extractfile(member)
+                    if f:
+                        return f.read().decode("utf-8", errors="replace")
+    except Exception as e:
+        logger.warning(f"Failed to extract metadata from sdist: {e}")
+    return None
+
+
+def extract_dependencies(content: bytes, filename: str) -> List[Tuple[str, Optional[str]]]:
+    """Extract dependencies from a PyPI package file."""
+    metadata = None
+
+    if filename.endswith(".whl"):
+        metadata = extract_metadata_from_wheel(content)
+    elif filename.endswith(".tar.gz"):
+        metadata = extract_metadata_from_sdist(content)
+
+    if metadata:
+        return extract_requires_from_metadata(metadata)
+
+    return []
+
+
+def backfill_dependencies(dry_run: bool = False):
+    """Main backfill function."""
+    settings = get_settings()
+
+    # Create database connection
+    engine = create_engine(settings.database_url)
+    Session = sessionmaker(bind=engine)
+    db = Session()
+
+    # Create storage client
+    storage = get_storage()
+
+    try:
+        # Find the _pypi project
+        pypi_project = db.query(Project).filter(Project.name == "_pypi").first()
+        if not pypi_project:
+            logger.info("No _pypi project found. Nothing to backfill.")
+            return
+
+        # Get all packages in _pypi
+        packages = db.query(Package).filter(Package.project_id == pypi_project.id).all()
+        logger.info(f"Found {len(packages)} packages in _pypi project")
+
+        total_artifacts = 0
+        artifacts_with_deps = 0
+        artifacts_processed = 0
+        dependencies_added = 0
+
+        for package in packages:
+            # Get all tags (each tag points to an artifact)
+            tags = db.query(Tag).filter(Tag.package_id == package.id).all()
+
+            for tag in tags:
+                total_artifacts += 1
+                filename = tag.name
+
+                # Skip non-package files (like .metadata files)
+                if not (filename.endswith(".whl") or filename.endswith(".tar.gz")):
+                    continue
+
+                # Check if this artifact already has dependencies
+                existing_deps = db.query(ArtifactDependency).filter(
+                    ArtifactDependency.artifact_id == tag.artifact_id
+                ).count()
+
+                if existing_deps > 0:
+                    artifacts_with_deps += 1
+                    continue
+
+                # Get the artifact
+                artifact = db.query(Artifact).filter(Artifact.id == tag.artifact_id).first()
+                if not artifact:
+                    logger.warning(f"Artifact {tag.artifact_id} not found for tag {filename}")
+                    continue
+
+                logger.info(f"Processing {package.name}/{filename}...")
+
+                if dry_run:
+                    logger.info(f"  [DRY RUN] Would extract dependencies from {filename}")
+                    artifacts_processed += 1
+                    continue
+
+                # Download the artifact from S3
+                try:
+                    content = storage.get(artifact.s3_key)
+                except Exception as e:
+                    logger.error(f"  Failed to download {filename}: {e}")
+                    continue
+
+                # Extract dependencies
+                deps = extract_dependencies(content, filename)
+
+                if deps:
+                    logger.info(f"  Found {len(deps)} dependencies")
+                    for dep_name, dep_version in deps:
+                        # Check if already exists (race condition protection)
+                        existing = db.query(ArtifactDependency).filter(
+                            ArtifactDependency.artifact_id == tag.artifact_id,
+                            ArtifactDependency.dependency_project == "_pypi",
+                            ArtifactDependency.dependency_package == dep_name,
+                        ).first()
+
+                        if not existing:
+                            dep = ArtifactDependency(
+                                artifact_id=tag.artifact_id,
+                                dependency_project="_pypi",
+                                dependency_package=dep_name,
+                                version_constraint=dep_version if dep_version else "*",
+                            )
+                            db.add(dep)
+                            dependencies_added += 1
+                            logger.info(f"    + {dep_name} {dep_version or '*'}")
+
+                    db.commit()
+                else:
+                    logger.info(f"  No dependencies found")
+
+                artifacts_processed += 1
+
+        logger.info("")
+        logger.info("=" * 50)
+        logger.info("Backfill complete!")
+        logger.info(f"  Total artifacts: {total_artifacts}")
+        logger.info(f"  Already had deps: {artifacts_with_deps}")
+        logger.info(f"  Processed: {artifacts_processed}")
+        logger.info(f"  Dependencies added: {dependencies_added}")
+        if dry_run:
+            logger.info("  (DRY RUN - no changes made)")
+
+    finally:
+        db.close()
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Backfill dependencies for cached PyPI packages"
+    )
+    parser.add_argument(
+        "--dry-run",
+        action="store_true",
+        help="Preview what would be done without making changes",
+    )
+    args = parser.parse_args()
+
+    backfill_dependencies(dry_run=args.dry_run)
+
+
+if __name__ == "__main__":
+    main()
--- a/backend/tests/integration/test_projects_api.py
+++ b/backend/tests/integration/test_projects_api.py
@@ -128,7 +128,9 @@ class TestProjectListingFilters:
        assert response.status_code == 200

        data = response.json()
-        names = [p["name"] for p in data["items"]]
+        # Filter out system projects (names starting with "_") as they may have
+        # collation-specific sort behavior and aren't part of the test data
+        names = [p["name"] for p in data["items"] if not p["name"].startswith("_")]
        assert names == sorted(names)


--- a/backend/tests/integration/test_pypi_proxy.py
+++ b/backend/tests/integration/test_pypi_proxy.py
@@ -0,0 +1,137 @@
+"""Integration tests for PyPI transparent proxy."""
+
+import os
+import pytest
+import httpx
+
+
+def get_base_url():
+    """Get the base URL for the Orchard server from environment."""
+    return os.environ.get("ORCHARD_TEST_URL", "http://localhost:8080")
+
+
+class TestPyPIProxyEndpoints:
+    """Tests for PyPI proxy endpoints.
+
+    These endpoints are public (no auth required) since pip needs to use them.
+    """
+
+    @pytest.mark.integration
+    def test_pypi_simple_index(self):
+        """Test that /pypi/simple/ returns HTML response."""
+        with httpx.Client(base_url=get_base_url(), timeout=30.0) as client:
+            response = client.get("/pypi/simple/")
+            # Returns 200 if sources configured, 503 if not
+            assert response.status_code in (200, 503)
+            if response.status_code == 200:
+                assert "text/html" in response.headers.get("content-type", "")
+            else:
+                assert "No PyPI upstream sources configured" in response.json()["detail"]
+
+    @pytest.mark.integration
+    def test_pypi_package_endpoint(self):
+        """Test that /pypi/simple/{package}/ returns appropriate response."""
+        with httpx.Client(base_url=get_base_url(), timeout=30.0) as client:
+            response = client.get("/pypi/simple/requests/")
+            # Returns 200 if sources configured and package found,
+            # 404 if package not found, 503 if no sources
+            assert response.status_code in (200, 404, 503)
+            if response.status_code == 200:
+                assert "text/html" in response.headers.get("content-type", "")
+            elif response.status_code == 404:
+                assert "not found" in response.json()["detail"].lower()
+            else:  # 503
+                assert "No PyPI upstream sources configured" in response.json()["detail"]
+
+    @pytest.mark.integration
+    def test_pypi_download_missing_upstream_param(self):
+        """Test that /pypi/simple/{package}/{filename} requires upstream param."""
+        with httpx.Client(base_url=get_base_url(), timeout=30.0) as client:
+            response = client.get("/pypi/simple/requests/requests-2.31.0.tar.gz")
+            assert response.status_code == 400
+            assert "upstream" in response.json()["detail"].lower()
+
+
+class TestPyPILinkRewriting:
+    """Tests for URL rewriting in PyPI proxy responses."""
+
+    def test_rewrite_package_links(self):
+        """Test that download links are rewritten to go through proxy."""
+        from app.pypi_proxy import _rewrite_package_links
+
+        html = '''
+        <html>
+        <body>
+        <a href="https://files.pythonhosted.org/packages/ab/cd/requests-2.31.0.tar.gz#sha256=abc123">requests-2.31.0.tar.gz</a>
+        <a href="https://files.pythonhosted.org/packages/ef/gh/requests-2.31.0-py3-none-any.whl#sha256=def456">requests-2.31.0-py3-none-any.whl</a>
+        </body>
+        </html>
+        '''
+
+        # upstream_base_url is used to resolve relative URLs (not needed here since URLs are absolute)
+        result = _rewrite_package_links(
+            html,
+            "http://localhost:8080",
+            "requests",
+            "https://pypi.org/simple/requests/"
+        )
+
+        # Links should be rewritten to go through our proxy
+        assert "/pypi/simple/requests/requests-2.31.0.tar.gz?upstream=" in result
+        assert "/pypi/simple/requests/requests-2.31.0-py3-none-any.whl?upstream=" in result
+        # Original URLs should be encoded in upstream param
+        assert "files.pythonhosted.org" in result
+        # Hash fragments should be preserved
+        assert "#sha256=abc123" in result
+        assert "#sha256=def456" in result
+
+    def test_rewrite_relative_links(self):
+        """Test that relative URLs are resolved to absolute URLs."""
+        from app.pypi_proxy import _rewrite_package_links
+
+        # Artifactory-style relative URLs
+        html = '''
+        <html>
+        <body>
+        <a href="../../packages/ab/cd/requests-2.31.0.tar.gz#sha256=abc123">requests-2.31.0.tar.gz</a>
+        </body>
+        </html>
+        '''
+
+        result = _rewrite_package_links(
+            html,
+            "https://orchard.example.com",
+            "requests",
+            "https://artifactory.example.com/api/pypi/pypi-remote/simple/requests/"
+        )
+
+        # The relative URL should be resolved to absolute
+        # ../../packages/ab/cd/... from /api/pypi/pypi-remote/simple/requests/ resolves to /api/pypi/pypi-remote/packages/ab/cd/...
+        assert "upstream=https%3A%2F%2Fartifactory.example.com%2Fapi%2Fpypi%2Fpypi-remote%2Fpackages" in result
+        # Hash fragment should be preserved
+        assert "#sha256=abc123" in result
+
+
+class TestPyPIPackageNormalization:
+    """Tests for PyPI package name normalization."""
+
+    @pytest.mark.integration
+    def test_package_name_normalized(self):
+        """Test that package names are normalized per PEP 503.
+
+        Different capitalizations/separators should all be valid paths.
+        The endpoint normalizes to lowercase with hyphens before lookup.
+        """
+        with httpx.Client(base_url=get_base_url(), timeout=30.0) as client:
+            # Test various name formats - all should be valid endpoint paths
+            for package_name in ["Requests", "some_package", "some-package"]:
+                response = client.get(f"/pypi/simple/{package_name}/")
+                # 200 = found, 404 = not found, 503 = no sources configured
+                assert response.status_code in (200, 404, 503), \
+                    f"Unexpected status {response.status_code} for {package_name}"
+
+                # Verify response is appropriate for the status code
+                if response.status_code == 200:
+                    assert "text/html" in response.headers.get("content-type", "")
+                elif response.status_code == 503:
+                    assert "No PyPI upstream sources configured" in response.json()["detail"]
--- a/backend/tests/test_pypi_cache_worker.py
+++ b/backend/tests/test_pypi_cache_worker.py
@@ -0,0 +1,263 @@
+"""Tests for PyPI cache worker module."""
+
+import os
+import pytest
+import re
+from datetime import datetime, timedelta
+from unittest.mock import MagicMock, patch
+from uuid import uuid4
+
+import httpx
+
+
+def get_base_url():
+    """Get the base URL for the Orchard server from environment."""
+    return os.environ.get("ORCHARD_TEST_URL", "http://localhost:8080")
+
+
+class TestPyPICacheTaskModel:
+    """Tests for PyPICacheTask model."""
+
+    def test_model_creation(self):
+        """Test that PyPICacheTask model can be instantiated with explicit values."""
+        from app.models import PyPICacheTask
+
+        task = PyPICacheTask(
+            package_name="requests",
+            version_constraint=">=2.25.0",
+            depth=0,
+            status="pending",
+            attempts=0,
+            max_attempts=3,
+        )
+
+        assert task.package_name == "requests"
+        assert task.version_constraint == ">=2.25.0"
+        assert task.depth == 0
+        assert task.status == "pending"
+        assert task.attempts == 0
+        assert task.max_attempts == 3
+
+    def test_model_fields_exist(self):
+        """Test that PyPICacheTask has all expected fields."""
+        from app.models import PyPICacheTask
+
+        # Create with minimal required field
+        task = PyPICacheTask(package_name="urllib3")
+
+        # Verify all expected attributes exist (SQLAlchemy defaults apply on flush)
+        assert hasattr(task, "status")
+        assert hasattr(task, "depth")
+        assert hasattr(task, "attempts")
+        assert hasattr(task, "max_attempts")
+        assert hasattr(task, "version_constraint")
+        assert hasattr(task, "parent_task_id")
+        assert hasattr(task, "triggered_by_artifact")
+
+
+class TestEnqueueCacheTask:
+    """Tests for enqueue_cache_task function."""
+
+    def test_normalize_package_name(self):
+        """Test that package names are normalized per PEP 503."""
+        # Test the normalization pattern used in the worker
+        test_cases = [
+            ("Requests", "requests"),
+            ("typing_extensions", "typing-extensions"),
+            ("some.package", "some-package"),
+            ("UPPER_CASE", "upper-case"),
+            ("mixed-Case_name", "mixed-case-name"),
+        ]
+
+        for input_name, expected in test_cases:
+            normalized = re.sub(r"[-_.]+", "-", input_name).lower()
+            assert normalized == expected, f"Failed for {input_name}"
+
+
+class TestCacheWorkerFunctions:
+    """Tests for cache worker helper functions."""
+
+    def test_exponential_backoff_calculation(self):
+        """Test that exponential backoff is calculated correctly."""
+        # The formula is: 30 * (2 ** (attempts - 1))
+        # Attempt 1 failed → 30s
+        # Attempt 2 failed → 60s
+        # Attempt 3 failed → 120s
+
+        def calc_backoff(attempts):
+            return 30 * (2 ** (attempts - 1))
+
+        assert calc_backoff(1) == 30
+        assert calc_backoff(2) == 60
+        assert calc_backoff(3) == 120
+
+
+class TestPyPICacheAPIEndpoints:
+    """Integration tests for PyPI cache API endpoints."""
+
+    @pytest.mark.integration
+    def test_cache_status_endpoint(self):
+        """Test GET /pypi/cache/status returns queue statistics."""
+        with httpx.Client(base_url=get_base_url(), timeout=30.0) as client:
+            response = client.get("/pypi/cache/status")
+            assert response.status_code == 200
+
+            data = response.json()
+            assert "pending" in data
+            assert "in_progress" in data
+            assert "completed" in data
+            assert "failed" in data
+
+            # All values should be non-negative integers
+            assert isinstance(data["pending"], int)
+            assert isinstance(data["in_progress"], int)
+            assert isinstance(data["completed"], int)
+            assert isinstance(data["failed"], int)
+            assert data["pending"] >= 0
+            assert data["in_progress"] >= 0
+            assert data["completed"] >= 0
+            assert data["failed"] >= 0
+
+    @pytest.mark.integration
+    def test_cache_failed_endpoint(self):
+        """Test GET /pypi/cache/failed returns list of failed tasks."""
+        with httpx.Client(base_url=get_base_url(), timeout=30.0) as client:
+            response = client.get("/pypi/cache/failed")
+            assert response.status_code == 200
+
+            data = response.json()
+            assert isinstance(data, list)
+
+            # If there are failed tasks, verify structure
+            if data:
+                task = data[0]
+                assert "id" in task
+                assert "package" in task
+                assert "error" in task
+                assert "attempts" in task
+                assert "depth" in task
+
+    @pytest.mark.integration
+    def test_cache_failed_with_limit(self):
+        """Test GET /pypi/cache/failed respects limit parameter."""
+        with httpx.Client(base_url=get_base_url(), timeout=30.0) as client:
+            response = client.get("/pypi/cache/failed?limit=5")
+            assert response.status_code == 200
+
+            data = response.json()
+            assert isinstance(data, list)
+            assert len(data) <= 5
+
+    @pytest.mark.integration
+    def test_cache_retry_nonexistent_package(self):
+        """Test POST /pypi/cache/retry/{package} returns 404 for unknown package."""
+        with httpx.Client(base_url=get_base_url(), timeout=30.0) as client:
+            # Use a random package name that definitely doesn't exist
+            response = client.post(f"/pypi/cache/retry/nonexistent-package-{uuid4().hex[:8]}")
+            assert response.status_code == 404
+            # Check for "no failed" or "not found" in error message
+            detail = response.json()["detail"].lower()
+            assert "no failed" in detail or "not found" in detail
+
+    @pytest.mark.integration
+    def test_cache_retry_all_endpoint(self):
+        """Test POST /pypi/cache/retry-all returns success."""
+        with httpx.Client(base_url=get_base_url(), timeout=30.0) as client:
+            response = client.post("/pypi/cache/retry-all")
+            assert response.status_code == 200
+
+            data = response.json()
+            assert "count" in data
+            assert "message" in data
+            assert isinstance(data["count"], int)
+            assert data["count"] >= 0
+
+
+class TestCacheTaskDeduplication:
+    """Tests for cache task deduplication logic."""
+
+    def test_find_cached_package_returns_none_for_uncached(self):
+        """Test that _find_cached_package returns None for uncached packages."""
+        # This is a unit test pattern - mock the database
+        from unittest.mock import MagicMock
+
+        mock_db = MagicMock()
+        mock_db.query.return_value.filter.return_value.first.return_value = None
+
+        from app.pypi_cache_worker import _find_cached_package
+
+        result = _find_cached_package(mock_db, "nonexistent-package")
+        assert result is None
+
+
+class TestCacheWorkerConfiguration:
+    """Tests for cache worker configuration."""
+
+    def test_config_settings_exist(self):
+        """Test that PyPI cache config settings are available."""
+        from app.config import get_settings
+
+        settings = get_settings()
+
+        # Check that settings exist and have reasonable defaults
+        assert hasattr(settings, "pypi_cache_workers")
+        assert hasattr(settings, "pypi_cache_max_depth")
+        assert hasattr(settings, "pypi_cache_max_attempts")
+
+        # Check aliases work
+        assert settings.PYPI_CACHE_WORKERS == settings.pypi_cache_workers
+        assert settings.PYPI_CACHE_MAX_DEPTH == settings.pypi_cache_max_depth
+        assert settings.PYPI_CACHE_MAX_ATTEMPTS == settings.pypi_cache_max_attempts
+
+    def test_config_default_values(self):
+        """Test that PyPI cache config has sensible defaults."""
+        from app.config import get_settings
+
+        settings = get_settings()
+
+        # These are the defaults from our implementation
+        assert settings.pypi_cache_workers == 5
+        assert settings.pypi_cache_max_depth == 10
+        assert settings.pypi_cache_max_attempts == 3
+
+
+class TestFetchAndCachePackage:
+    """Tests for _fetch_and_cache_package function."""
+
+    def test_result_structure_success(self):
+        """Test that success result has correct structure."""
+        # Mock a successful result
+        result = {"success": True, "artifact_id": "abc123"}
+
+        assert result["success"] is True
+        assert "artifact_id" in result
+
+    def test_result_structure_failure(self):
+        """Test that failure result has correct structure."""
+        # Mock a failure result
+        result = {"success": False, "error": "Package not found"}
+
+        assert result["success"] is False
+        assert "error" in result
+
+
+class TestWorkerPoolLifecycle:
+    """Tests for worker pool initialization and shutdown."""
+
+    def test_init_shutdown_cycle(self):
+        """Test that worker pool can be initialized and shut down cleanly."""
+        from app.pypi_cache_worker import (
+            init_cache_worker_pool,
+            shutdown_cache_worker_pool,
+            _cache_worker_pool,
+            _cache_worker_running,
+        )
+
+        # Note: We can't fully test this in isolation because the module
+        # has global state and may conflict with the running server.
+        # These tests verify the function signatures work.
+
+        # The pool should be initialized by main.py on startup
+        # We just verify the functions are callable
+        assert callable(init_cache_worker_pool)
+        assert callable(shutdown_cache_worker_pool)
--- a/backend/tests/test_upstream_caching.py
+++ b/backend/tests/test_upstream_caching.py
--- a/docs/epic-upstream-caching.md
+++ b/docs/epic-upstream-caching.md
@@ -0,0 +1,672 @@
+# Epic: Upstream Artifact Caching for Hermetic Builds
+
+## Overview
+
+Orchard will act as a permanent, content-addressable cache for upstream artifacts (npm, PyPI, Maven, Docker, etc.). Once an artifact is cached, it is stored forever by SHA256 hash - enabling reproducible builds years later regardless of whether the upstream source still exists.
+
+## Problem Statement
+
+Build reproducibility is critical for enterprise environments:
+- Packages get deleted, yanked, or modified upstream
+- Registries go down or change URLs
+- Version constraints resolve differently over time
+- Air-gapped environments cannot access public internet
+
+Teams need to guarantee that a build from 5 years ago produces the exact same output today.
+
+## Solution
+
+Orchard becomes "the cache that never forgets":
+
+1. **Fetch once, store forever** - When a build needs `lodash@4.17.21`, Orchard fetches it from npm, stores it by SHA256 hash, and never deletes it
+2. **Content-addressable** - Same hash = same bytes, guaranteed
+3. **Format-agnostic** - Orchard doesn't need to understand npm/PyPI/Maven protocols; the client provides the URL, Orchard fetches and stores
+4. **Air-gap support** - Disable public internet entirely, only allow configured private upstreams
+
+## User Workflow
+
+```
+1. Build tool resolves dependencies     npm install / pip install / mvn resolve
+                ↓
+2. Generate lockfile with URLs          package-lock.json / requirements.txt
+                ↓
+3. Cache all URLs in Orchard            orchard cache --file urls.txt
+                ↓
+4. Pin by SHA256 hash                   lodash = "sha256:abc123..."
+                ↓
+5. Future builds fetch by hash          Always get exact same bytes
+```
+
+## Key Features
+
+- **Multiple upstream sources** - Configure npm, PyPI, Maven Central, private Artifactory, etc.
+- **Per-source authentication** - Basic auth, bearer tokens, API keys
+- **System cache projects** - `_npm`, `_pypi`, `_maven` organize cached packages by format
+- **Cross-referencing** - Link cached artifacts to user projects for visibility
+- **URL tracking** - Know which URLs map to which hashes, audit provenance
+- **Air-gap mode** - Global kill switch for all public internet access
+- **Environment variable config** - 12-factor friendly for containerized deployments
+
+## Architecture
+
+```
+┌─────────────────────────────────────────────────────────────────┐
+│                         Orchard Server                          │
+├─────────────────────────────────────────────────────────────────┤
+│  POST /api/v1/cache                                             │
+│    ├── Check if URL already cached (url_hash lookup)            │
+│    ├── Match URL to upstream source (get auth)                  │
+│    ├── Fetch via UpstreamClient (stream + compute SHA256)       │
+│    ├── Store artifact in S3 (content-addressable)               │
+│    ├── Create tag in system project (_npm/lodash:4.17.21)       │
+│    ├── Optionally create tag in user project                    │
+│    └── Record in cached_urls table (provenance)                 │
+├─────────────────────────────────────────────────────────────────┤
+│  Tables                                                         │
+│    ├── upstream_sources (npm-public, pypi-public, artifactory)  │
+│    ├── cache_settings (allow_public_internet, etc.)             │
+│    ├── cached_urls (url → artifact_id mapping)                  │
+│    └── projects.is_system (for _npm, _pypi, etc.)               │
+└─────────────────────────────────────────────────────────────────┘
+```
+
+## Issues Summary
+
+| Issue | Title | Status | Dependencies |
+|-------|-------|--------|--------------|
+| #68 | Schema: Upstream Sources & Cache Tracking | ✅ Complete | None |
+| #69 | HTTP Client: Generic URL Fetcher | Pending | None |
+| #70 | Cache API Endpoint | Pending | #68, #69 |
+| #71 | System Projects (Cache Namespaces) | Pending | #68, #70 |
+| #72 | Upstream Sources Admin API | Pending | #68 |
+| #73 | Global Cache Settings API | Pending | #68 |
+| #74 | Environment Variable Overrides | Pending | #68, #72, #73 |
+| #75 | Frontend: Upstream Sources Management | Pending | #72, #73 |
+| #105 | Frontend: System Projects Integration | Pending | #71 |
+| #77 | CLI: Cache Command | Pending | #70 |
+
+## Implementation Phases
+
+**Phase 1 - Core (MVP):**
+- #68 Schema ✅
+- #69 HTTP Client
+- #70 Cache API
+- #71 System Projects
+
+**Phase 2 - Admin:**
+- #72 Upstream Sources API
+- #73 Cache Settings API
+- #74 Environment Variables
+
+**Phase 3 - Frontend:**
+- #75 Upstream Sources UI
+- #105 System Projects UI
+
+**Phase 4 - CLI:**
+- #77 Cache Command
+
+---
+
+# Issue #68: Schema - Upstream Sources & Cache Tracking
+
+**Status: ✅ Complete**
+
+## Description
+
+Create database schema for flexible multi-source upstream configuration and URL-to-artifact tracking. This replaces the previous singleton proxy_config design with a more flexible model supporting multiple upstream sources, air-gap mode, and provenance tracking.
+
+## Acceptance Criteria
+
+- [x] `upstream_sources` table:
+  - id (UUID, primary key)
+  - name (VARCHAR(255), unique, e.g., "npm-public", "artifactory-private")
+  - source_type (VARCHAR(50), enum: npm, pypi, maven, docker, helm, nuget, deb, rpm, generic)
+  - url (VARCHAR(2048), base URL of upstream)
+  - enabled (BOOLEAN, default false)
+  - is_public (BOOLEAN, true if this is a public internet source)
+  - auth_type (VARCHAR(20), enum: none, basic, bearer, api_key)
+  - username (VARCHAR(255), nullable)
+  - password_encrypted (BYTEA, nullable, Fernet encrypted)
+  - headers_encrypted (BYTEA, nullable, for custom headers like API keys)
+  - priority (INTEGER, default 100, lower = checked first)
+  - created_at, updated_at timestamps
+- [x] `cache_settings` table (singleton, id always 1):
+  - id (INTEGER, primary key, check id = 1)
+  - allow_public_internet (BOOLEAN, default true, air-gap kill switch)
+  - auto_create_system_projects (BOOLEAN, default true)
+  - created_at, updated_at timestamps
+- [x] `cached_urls` table:
+  - id (UUID, primary key)
+  - url (VARCHAR(4096), original URL fetched)
+  - url_hash (VARCHAR(64), SHA256 of URL for fast lookup, indexed)
+  - artifact_id (VARCHAR(64), FK to artifacts)
+  - source_id (UUID, FK to upstream_sources, nullable for manual imports)
+  - fetched_at (TIMESTAMP WITH TIME ZONE)
+  - response_headers (JSONB, original upstream headers for provenance)
+  - created_at timestamp
+- [x] Add `is_system` BOOLEAN column to projects table (default false)
+- [x] Migration SQL file in migrations/
+- [x] Runtime migration in database.py
+- [x] SQLAlchemy models for all new tables
+- [x] Pydantic schemas for API input/output (passwords write-only)
+- [x] Encryption helpers for password/headers fields
+- [x] Seed default upstream sources (disabled by default):
+  - npm-public: https://registry.npmjs.org
+  - pypi-public: https://pypi.org/simple
+  - maven-central: https://repo1.maven.org/maven2
+  - docker-hub: https://registry-1.docker.io
+- [x] Unit tests for models and schemas
+
+## Files Modified
+
+- `migrations/010_upstream_caching.sql`
+- `backend/app/database.py` (migrations 016-020)
+- `backend/app/models.py` (UpstreamSource, CacheSettings, CachedUrl, Project.is_system)
+- `backend/app/schemas.py` (all caching schemas)
+- `backend/app/encryption.py` (renamed env var)
+- `backend/app/config.py` (renamed setting)
+- `backend/tests/test_upstream_caching.py` (37 tests)
+- `frontend/src/components/Layout.tsx` (footer tagline)
+- `CHANGELOG.md`
+
+---
+
+# Issue #69: HTTP Client - Generic URL Fetcher
+
+**Status: Pending**
+
+## Description
+
+Create a reusable HTTP client for fetching artifacts from upstream sources. Supports multiple auth methods, streaming for large files, and computes SHA256 while downloading.
+
+## Acceptance Criteria
+
+- [ ] `UpstreamClient` class in `backend/app/upstream.py`
+- [ ] `fetch(url)` method that:
+  - Streams response body (doesn't load large files into memory)
+  - Computes SHA256 hash while streaming
+  - Returns file content, hash, size, and response headers
+- [ ] Auth support based on upstream source configuration:
+  - None (anonymous)
+  - Basic auth (username/password)
+  - Bearer token (Authorization: Bearer {token})
+  - API key (custom header name/value)
+- [ ] URL-to-source matching:
+  - Match URL to configured upstream source by URL prefix
+  - Apply auth from matched source
+  - Respect source priority for multiple matches
+- [ ] Configuration options:
+  - Timeout (connect and read, default 30s/300s)
+  - Max retries (default 3)
+  - Follow redirects (default true, max 5)
+  - Max file size (reject if Content-Length exceeds limit)
+- [ ] Respect `allow_public_internet` setting:
+  - If false, reject URLs matching `is_public=true` sources
+  - If false, reject URLs not matching any configured source
+- [ ] Capture response headers for provenance tracking
+- [ ] Proper error handling:
+  - Connection errors (retry with backoff)
+  - HTTP errors (4xx, 5xx)
+  - Timeout errors
+  - SSL/TLS errors
+- [ ] Logging for debugging (URL, source matched, status, timing)
+- [ ] Unit tests with mocked HTTP responses
+- [ ] Integration tests against httpbin.org or similar (optional, marked)
+
+## Technical Notes
+
+- Use `httpx` for async HTTP support (already in requirements)
+- Stream to temp file to avoid memory issues with large artifacts
+- Consider checksum verification if upstream provides it (e.g., npm provides shasum)
+
+---
+
+# Issue #70: Cache API Endpoint
+
+**Status: Pending**
+
+## Description
+
+API endpoint to cache an artifact from an upstream URL. This is the core endpoint that fetches from upstream, stores in Orchard, and creates appropriate tags.
+
+## Acceptance Criteria
+
+- [ ] `POST /api/v1/cache` endpoint
+- [ ] Request body:
+  ```json
+  {
+    "url": "https://registry.npmjs.org/lodash/-/lodash-4.17.21.tgz",
+    "source_type": "npm",
+    "package_name": "lodash",
+    "tag": "4.17.21",
+    "user_project": "my-app",
+    "user_package": "npm-deps",
+    "user_tag": "lodash-4.17.21",
+    "expected_hash": "sha256:abc123..."
+  }
+  ```
+  - `url` (required): URL to fetch
+  - `source_type` (required): Determines system project (_npm, _pypi, etc.)
+  - `package_name` (optional): Package name in system project, derived from URL if not provided
+  - `tag` (optional): Tag name in system project, derived from URL if not provided
+  - `user_project`, `user_package`, `user_tag` (optional): Cross-reference in user's project
+  - `expected_hash` (optional): Verify downloaded content matches
+- [ ] Response:
+  ```json
+  {
+    "artifact_id": "abc123...",
+    "sha256": "abc123...",
+    "size": 12345,
+    "content_type": "application/gzip",
+    "already_cached": false,
+    "source_url": "https://registry.npmjs.org/lodash/-/lodash-4.17.21.tgz",
+    "source_name": "npm-public",
+    "system_project": "_npm",
+    "system_package": "lodash",
+    "system_tag": "4.17.21",
+    "user_reference": "my-app/npm-deps:lodash-4.17.21"
+  }
+  ```
+- [ ] Behavior:
+  - Check if URL already cached (by url_hash in cached_urls)
+  - If cached: return existing artifact, optionally create user tag
+  - If not cached: fetch via UpstreamClient, store artifact, create tags
+  - Create/get system project if needed (e.g., `_npm`)
+  - Create package in system project (e.g., `_npm/lodash`)
+  - Create tag in system project (e.g., `_npm/lodash:4.17.21`)
+  - If user reference provided, create tag in user's project
+  - Record in cached_urls table with provenance
+- [ ] Error handling:
+  - 400: Invalid request (bad URL format, missing required fields)
+  - 403: Air-gap mode enabled and URL is from public source
+  - 404: Upstream returned 404
+  - 409: Hash mismatch (if expected_hash provided)
+  - 502: Upstream fetch failed (connection error, timeout)
+  - 503: Upstream source disabled
+- [ ] Authentication required (any authenticated user can cache)
+- [ ] Audit logging for cache operations
+- [ ] Integration tests covering success and error cases
+
+## Technical Notes
+
+- URL parsing for package_name/tag derivation is format-specific:
+  - npm: `/{package}/-/{package}-{version}.tgz` → package=lodash, tag=4.17.21
+  - pypi: `/packages/.../requests-2.28.0.tar.gz` → package=requests, tag=2.28.0
+  - maven: `/{group}/{artifact}/{version}/{artifact}-{version}.jar`
+- Deduplication: if same SHA256 already exists, just create new tag pointing to it
+
+---
+
+# Issue #71: System Projects (Cache Namespaces)
+
+**Status: Pending**
+
+## Description
+
+Implement auto-created system projects for organizing cached artifacts by format type. These are special projects that provide a browsable namespace for all cached upstream packages.
+
+## Acceptance Criteria
+
+- [ ] System project names: `_npm`, `_pypi`, `_maven`, `_docker`, `_helm`, `_nuget`, `_deb`, `_rpm`, `_generic`
+- [ ] Auto-creation:
+  - Created automatically on first cache request for that format
+  - Created by cache endpoint, not at startup
+  - Uses system user as creator (`created_by = "system"`)
+- [ ] System project properties:
+  - `is_system = true`
+  - `is_public = true` (readable by all authenticated users)
+  - `description` = "System cache for {format} packages"
+- [ ] Restrictions:
+  - Cannot be deleted (return 403 with message)
+  - Cannot be renamed
+  - Cannot change `is_public` to false
+  - Only admins can modify description
+- [ ] Helper function: `get_or_create_system_project(source_type)` in routes.py or new cache.py module
+- [ ] Update project deletion endpoint to check `is_system` flag
+- [ ] Update project update endpoint to enforce restrictions
+- [ ] Query helper: list all system projects for UI dropdown
+- [ ] Unit tests for restrictions
+- [ ] Integration tests for auto-creation and restrictions
+
+## Technical Notes
+
+- System projects are identified by `is_system=true`, not just naming convention
+- The `_` prefix is a convention for display purposes
+- Packages within system projects follow upstream naming (e.g., `_npm/lodash`, `_npm/@types/node`)
+
+---
+
+# Issue #72: Upstream Sources Admin API
+
+**Status: Pending**
+
+## Description
+
+CRUD API endpoints for managing upstream sources configuration. Admin-only access.
+
+## Acceptance Criteria
+
+- [ ] `GET /api/v1/admin/upstream-sources` - List all upstream sources
+  - Returns array of sources with id, name, source_type, url, enabled, is_public, auth_type, priority, has_credentials, created_at, updated_at
+  - Supports `?enabled=true/false` filter
+  - Supports `?source_type=npm,pypi` filter
+  - Passwords/tokens never returned
+- [ ] `POST /api/v1/admin/upstream-sources` - Create upstream source
+  - Request: name, source_type, url, enabled, is_public, auth_type, username, password, headers, priority
+  - Validates unique name
+  - Validates URL format
+  - Encrypts password/headers before storage
+  - Returns created source (without secrets)
+- [ ] `GET /api/v1/admin/upstream-sources/{id}` - Get source details
+  - Returns source with `has_credentials` boolean, not actual credentials
+- [ ] `PUT /api/v1/admin/upstream-sources/{id}` - Update source
+  - Partial update supported
+  - If password provided, re-encrypt; if omitted, keep existing
+  - Special value `password: null` clears credentials
+- [ ] `DELETE /api/v1/admin/upstream-sources/{id}` - Delete source
+  - Returns 400 if source has cached_urls referencing it (optional: cascade or reassign)
+- [ ] `POST /api/v1/admin/upstream-sources/{id}/test` - Test connectivity
+  - Attempts HEAD request to source URL
+  - Returns success/failure with status code and timing
+  - Does not cache anything
+- [ ] All endpoints require admin role
+- [ ] Audit logging for all mutations
+- [ ] Pydantic schemas: UpstreamSourceCreate, UpstreamSourceUpdate, UpstreamSourceResponse
+- [ ] Integration tests for all endpoints
+
+## Technical Notes
+
+- Test endpoint should respect auth configuration to verify credentials work
+- Consider adding `last_used_at` and `last_error` fields for observability (future enhancement)
+
+---
+
+# Issue #73: Global Cache Settings API
+
+**Status: Pending**
+
+## Description
+
+API endpoints for managing global cache settings including air-gap mode.
+
+## Acceptance Criteria
+
+- [ ] `GET /api/v1/admin/cache-settings` - Get current settings
+  - Returns: allow_public_internet, auto_create_system_projects, created_at, updated_at
+- [ ] `PUT /api/v1/admin/cache-settings` - Update settings
+  - Partial update supported
+  - Returns updated settings
+- [ ] Settings fields:
+  - `allow_public_internet` (boolean): When false, blocks all requests to sources marked `is_public=true`
+  - `auto_create_system_projects` (boolean): When false, system projects must be created manually
+- [ ] Admin-only access
+- [ ] Audit logging for changes (especially air-gap mode changes)
+- [ ] Pydantic schemas: CacheSettingsResponse, CacheSettingsUpdate
+- [ ] Initialize singleton row on first access if not exists
+- [ ] Integration tests
+
+## Technical Notes
+
+- Air-gap mode change should be logged prominently (security-relevant)
+- Consider requiring confirmation header for disabling air-gap mode (similar to factory reset)
+
+---
+
+# Issue #74: Environment Variable Overrides
+
+**Status: Pending**
+
+## Description
+
+Allow cache and upstream configuration via environment variables for containerized deployments. Environment variables override database settings following 12-factor app principles.
+
+## Acceptance Criteria
+
+- [ ] Global settings overrides:
+  - `ORCHARD_CACHE_ALLOW_PUBLIC_INTERNET=true/false`
+  - `ORCHARD_CACHE_AUTO_CREATE_SYSTEM_PROJECTS=true/false`
+  - `ORCHARD_CACHE_ENCRYPTION_KEY` (Fernet key for credential encryption)
+- [ ] Upstream source definition via env vars:
+  - `ORCHARD_UPSTREAM__{NAME}__URL` (double underscore as separator)
+  - `ORCHARD_UPSTREAM__{NAME}__TYPE` (npm, pypi, maven, etc.)
+  - `ORCHARD_UPSTREAM__{NAME}__ENABLED` (true/false)
+  - `ORCHARD_UPSTREAM__{NAME}__IS_PUBLIC` (true/false)
+  - `ORCHARD_UPSTREAM__{NAME}__AUTH_TYPE` (none, basic, bearer, api_key)
+  - `ORCHARD_UPSTREAM__{NAME}__USERNAME`
+  - `ORCHARD_UPSTREAM__{NAME}__PASSWORD`
+  - `ORCHARD_UPSTREAM__{NAME}__PRIORITY`
+  - Example: `ORCHARD_UPSTREAM__NPM_PRIVATE__URL=https://npm.corp.com`
+- [ ] Env var sources:
+  - Loaded at startup
+  - Merged with database sources
+  - Env var sources have `source = "env"` marker
+  - Cannot be modified via API (return 400)
+  - Cannot be deleted via API (return 400)
+- [ ] Update Settings class in config.py
+- [ ] Update get/list endpoints to include env-defined sources
+- [ ] Document all env vars in CLAUDE.md
+- [ ] Unit tests for env var parsing
+- [ ] Integration tests with env vars set
+
+## Technical Notes
+
+- Double underscore (`__`) separator allows source names with single underscores
+- Env-defined sources should appear in API responses but marked as read-only
+- Consider startup validation that warns about invalid env var combinations
+
+---
+
+# Issue #75: Frontend - Upstream Sources Management
+
+**Status: Pending**
+
+## Description
+
+Admin UI for managing upstream sources and cache settings.
+
+## Acceptance Criteria
+
+- [ ] New admin page: `/admin/cache` or `/admin/upstream-sources`
+- [ ] Upstream sources section:
+  - Table listing all sources with: name, type, URL, enabled toggle, public badge, priority, actions
+  - Visual distinction for env-defined sources (locked icon, no edit/delete)
+  - Create button opens modal/form
+  - Edit button for DB-defined sources
+  - Delete with confirmation modal
+  - Test connection button with status indicator
+- [ ] Create/edit form fields:
+  - Name (text, required)
+  - Source type (dropdown)
+  - URL (text, required)
+  - Priority (number)
+  - Is public (checkbox)
+  - Enabled (checkbox)
+  - Auth type (dropdown: none, basic, bearer, api_key)
+  - Conditional auth fields based on type:
+    - Basic: username, password
+    - Bearer: token
+    - API key: header name, header value
+  - Password fields masked, "unchanged" placeholder on edit
+- [ ] Cache settings section:
+  - Air-gap mode toggle with warning
+  - Auto-create system projects toggle
+  - "Air-gap mode" shows prominent warning banner when enabled
+- [ ] Link from main admin navigation
+- [ ] Loading and error states
+- [ ] Success/error toast notifications
+
+## Technical Notes
+
+- Use existing admin page patterns from user management
+- Air-gap toggle should require confirmation (modal with warning text)
+
+---
+
+# Issue #105: Frontend - System Projects Integration
+
+**Status: Pending**
+
+## Description
+
+Integrate system projects into the frontend UI with appropriate visual treatment and navigation.
+
+## Acceptance Criteria
+
+- [ ] Home page project dropdown:
+  - System projects shown in separate "Cached Packages" section
+  - Visual distinction (icon, different background, or badge)
+  - Format icon for each type (npm, pypi, maven, etc.)
+- [ ] Project list/grid:
+  - System projects can be filtered: "Show system projects" toggle
+  - Or separate tab: "Projects" | "Package Cache"
+- [ ] System project page:
+  - "System Cache" badge in header
+  - Description explains this is auto-managed cache
+  - Settings/delete buttons hidden or disabled
+  - Shows format type prominently
+- [ ] Package page within system project:
+  - Shows "Cached from" with source URL (linked)
+  - Shows "First cached" timestamp
+  - Shows which upstream source provided it
+- [ ] Artifact page:
+  - If artifact came from cache, show provenance:
+    - Original URL
+    - Upstream source name
+    - Fetch timestamp
+- [ ] Search includes system projects (with filter option)
+
+## Technical Notes
+
+- Use React context or query params for system project filtering
+- Consider dedicated route: `/cache/npm/lodash` as alias for `/_npm/lodash`
+
+---
+
+# Issue #77: CLI - Cache Command
+
+**Status: Pending**
+
+## Description
+
+Add a new `orchard cache` command to the existing CLI for caching artifacts from upstream URLs. This integrates with the new cache API endpoint and can optionally update `orchard.ensure` with cached artifacts.
+
+## Acceptance Criteria
+
+- [ ] New command: `orchard cache <url>` in `orchard/commands/cache.py`
+- [ ] Basic usage:
+  ```bash
+  # Cache a URL, print artifact info
+  orchard cache https://registry.npmjs.org/lodash/-/lodash-4.17.21.tgz
+
+  # Output:
+  # Caching https://registry.npmjs.org/lodash/-/lodash-4.17.21.tgz...
+  #   Source type: npm
+  #   Package: lodash
+  #   Version: 4.17.21
+  #
+  # Successfully cached artifact
+  #   Artifact ID: abc123...
+  #   Size: 1.2 MB
+  #   System project: _npm
+  #   System package: lodash
+  #   System tag: 4.17.21
+  ```
+- [ ] Options:
+  | Option | Description |
+  |--------|-------------|
+  | `--type, -t TYPE` | Source type: npm, pypi, maven, docker, helm, generic (auto-detected from URL if not provided) |
+  | `--package, -p NAME` | Package name in system project (auto-derived from URL if not provided) |
+  | `--tag TAG` | Tag name in system project (auto-derived from URL if not provided) |
+  | `--project PROJECT` | Also create tag in this user project |
+  | `--user-package PKG` | Package name in user project (required if --project specified) |
+  | `--user-tag TAG` | Tag name in user project (default: same as system tag) |
+  | `--expected-hash HASH` | Verify downloaded content matches this SHA256 |
+  | `--add` | Add to orchard.ensure after caching |
+  | `--add-path PATH` | Extraction path for --add (default: `<package>/`) |
+  | `--file, -f FILE` | Path to orchard.ensure file |
+  | `--verbose, -v` | Show detailed output |
+- [ ] URL type auto-detection:
+  - `registry.npmjs.org` → npm
+  - `pypi.org` or `files.pythonhosted.org` → pypi
+  - `repo1.maven.org` or contains `/maven2/` → maven
+  - `registry-1.docker.io` or `docker.io` → docker
+  - Otherwise → generic
+- [ ] Package/version extraction from URL patterns:
+  - npm: `/{package}/-/{package}-{version}.tgz`
+  - pypi: `/packages/.../requests-{version}.tar.gz`
+  - maven: `/{group}/{artifact}/{version}/{artifact}-{version}.jar`
+- [ ] Add `cache_artifact()` function to `orchard/api.py`
+- [ ] Integration with `--add` flag:
+  - Parse existing orchard.ensure
+  - Add new dependency entry pointing to cached artifact
+  - Use artifact_id (SHA256) for hermetic pinning
+- [ ] Batch mode: `orchard cache --file urls.txt`
+  - One URL per line
+  - Lines starting with `#` are comments
+  - Report success/failure for each
+- [ ] Exit codes:
+  - 0: Success (or already cached)
+  - 1: Fetch failed
+  - 2: Hash mismatch
+  - 3: Air-gap mode blocked request
+- [ ] Error handling consistent with existing CLI patterns
+- [ ] Unit tests in `test/test_cache.py`
+- [ ] Update README.md with cache command documentation
+
+## Technical Notes
+
+- Follow existing Click patterns from other commands
+- Use `get_auth_headers()` from `orchard/auth.py`
+- URL parsing can use `urllib.parse`
+- Consider adding URL pattern registry for extensibility
+- The `--add` flag should integrate with existing ensure file parsing in `orchard/ensure.py`
+
+## Example Workflows
+
+```bash
+# Simple: cache a single URL
+orchard cache https://registry.npmjs.org/lodash/-/lodash-4.17.21.tgz
+
+# Cache and add to orchard.ensure for current project
+orchard cache https://registry.npmjs.org/lodash/-/lodash-4.17.21.tgz \
+  --add --add-path libs/lodash/
+
+# Cache with explicit metadata
+orchard cache https://internal.corp/files/custom-lib.tar.gz \
+  --type generic \
+  --package custom-lib \
+  --tag v1.0.0
+
+# Cache and cross-reference to user project
+orchard cache https://registry.npmjs.org/lodash/-/lodash-4.17.21.tgz \
+  --project my-app \
+  --user-package npm-deps \
+  --user-tag lodash-4.17.21
+
+# Batch cache from file
+orchard cache --file deps-urls.txt
+
+# Verify hash while caching
+orchard cache https://example.com/file.tar.gz \
+  --expected-hash sha256:abc123...
+```
+
+---
+
+## Out of Scope (Future Enhancements)
+
+- Automatic transitive dependency resolution (client's responsibility)
+- Lockfile parsing (`package-lock.json`, `requirements.txt`) - stretch goal for CLI
+- Cache eviction policies (we cache forever by design)
+- Mirroring/sync between Orchard instances
+- Format-specific metadata extraction (npm package.json parsing, etc.)
+
+## Success Criteria
+
+- [ ] Can cache any URL and retrieve by SHA256 hash
+- [ ] Cached artifacts persist indefinitely
+- [ ] Air-gap mode blocks all public internet access
+- [ ] Multiple upstream sources with different auth
+- [ ] System projects organize cached packages by format
+- [ ] CLI can cache URLs and update orchard.ensure
+- [ ] Admin UI for upstream source management
--- a/docs/plans/2026-02-02-pypi-cache-robustness-design.md
+++ b/docs/plans/2026-02-02-pypi-cache-robustness-design.md
@@ -0,0 +1,251 @@
+# PyPI Cache Robustness Design
+
+**Date:** 2026-02-02
+**Status:** Approved
+**Branch:** fix/pypi-proxy-timeout
+
+## Problem
+
+The current PyPI proxy proactive caching has reliability issues:
+- Unbounded thread spawning for each dependency
+- Silent failures (logged but not tracked or retried)
+- No visibility into cache completeness
+- Deps-of-deps often missing due to untracked failures
+
+## Solution
+
+Database-backed task queue with managed worker pool, automatic retries, and visibility API.
+
+---
+
+## Data Model
+
+New table `pypi_cache_tasks`:
+
+```sql
+CREATE TABLE pypi_cache_tasks (
+    id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
+
+    -- What to cache
+    package_name VARCHAR(255) NOT NULL,
+    version_constraint VARCHAR(255),
+
+    -- Origin tracking
+    parent_task_id UUID REFERENCES pypi_cache_tasks(id) ON DELETE SET NULL,
+    depth INTEGER NOT NULL DEFAULT 0,
+    triggered_by_artifact VARCHAR(64),
+
+    -- Status
+    status VARCHAR(20) NOT NULL DEFAULT 'pending',
+    attempts INTEGER NOT NULL DEFAULT 0,
+    max_attempts INTEGER NOT NULL DEFAULT 3,
+
+    -- Results
+    cached_artifact_id VARCHAR(64),
+    error_message TEXT,
+
+    -- Timing
+    created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
+    started_at TIMESTAMP WITH TIME ZONE,
+    completed_at TIMESTAMP WITH TIME ZONE,
+    next_retry_at TIMESTAMP WITH TIME ZONE
+);
+
+-- Indexes
+CREATE INDEX idx_pypi_cache_tasks_status_retry ON pypi_cache_tasks(status, next_retry_at);
+CREATE INDEX idx_pypi_cache_tasks_package_status ON pypi_cache_tasks(package_name, status);
+CREATE INDEX idx_pypi_cache_tasks_parent ON pypi_cache_tasks(parent_task_id);
+
+-- Constraints
+ALTER TABLE pypi_cache_tasks ADD CONSTRAINT check_task_status
+    CHECK (status IN ('pending', 'in_progress', 'completed', 'failed'));
+```
+
+---
+
+## Worker Architecture
+
+### Thread Pool (5 workers default)
+
+```python
+_cache_worker_pool: ThreadPoolExecutor = None
+_cache_worker_running: bool = False
+
+def init_cache_worker_pool(max_workers: int = 5):
+    global _cache_worker_pool, _cache_worker_running
+    _cache_worker_pool = ThreadPoolExecutor(max_workers=max_workers, thread_name_prefix="pypi-cache-")
+    _cache_worker_running = True
+    threading.Thread(target=_cache_dispatcher_loop, daemon=True).start()
+```
+
+### Dispatcher Loop
+
+- Polls DB every 2 seconds when idle
+- Fetches batch of 10 ready tasks
+- Marks tasks in_progress before submitting to pool
+- Orders by depth (shallow first) then FIFO
+
+### Task Processing
+
+1. Dedup check - skip if package already cached
+2. Dedup check - skip if pending/in_progress task exists for same package
+3. Depth check - fail if >= 10 levels deep
+4. Fetch package index page
+5. Download best matching file (prefer wheels)
+6. Store artifact, extract dependencies
+7. Queue child tasks for each dependency
+8. Mark completed or handle failure
+
+---
+
+## Retry Logic
+
+Exponential backoff with 3 attempts:
+
+| Attempt | Backoff |
+|---------|---------|
+| 1 fails | 30 seconds |
+| 2 fails | 60 seconds |
+| 3 fails | Permanent failure |
+
+```python
+backoff_seconds = 30 * (2 ** (attempts - 1))
+task.next_retry_at = datetime.utcnow() + timedelta(seconds=backoff_seconds)
+```
+
+---
+
+## API Endpoints
+
+| Endpoint | Method | Purpose |
+|----------|--------|---------|
+| `/pypi/cache/status` | GET | Queue health summary |
+| `/pypi/cache/failed` | GET | List failed tasks with errors |
+| `/pypi/cache/retry/{package}` | POST | Retry single failed package |
+| `/pypi/cache/retry-all` | POST | Retry all failed packages |
+
+### Response Examples
+
+**GET /pypi/cache/status**
+```json
+{
+    "pending": 12,
+    "in_progress": 3,
+    "completed": 847,
+    "failed": 5
+}
+```
+
+**GET /pypi/cache/failed**
+```json
+[
+    {
+        "package": "some-obscure-pkg",
+        "error": "Timeout connecting to upstream",
+        "attempts": 3,
+        "failed_at": "2026-02-02T10:30:00Z"
+    }
+]
+```
+
+---
+
+## Integration Points
+
+### Replace Thread Spawning (pypi_proxy.py)
+
+```python
+# OLD: _start_background_dependency_caching(base_url, unique_deps)
+
+# NEW:
+for dep_name, dep_version in unique_deps:
+    _enqueue_cache_task(
+        db,
+        package_name=dep_name,
+        version_constraint=dep_version,
+        parent_task_id=None,
+        depth=0,
+        triggered_by_artifact=sha256,
+    )
+```
+
+### App Startup (main.py)
+
+```python
+@app.on_event("startup")
+async def startup():
+    init_cache_worker_pool(max_workers=settings.PYPI_CACHE_WORKERS)
+
+@app.on_event("shutdown")
+async def shutdown():
+    shutdown_cache_worker_pool()
+```
+
+### Configuration (config.py)
+
+```python
+PYPI_CACHE_WORKERS = int(os.getenv("ORCHARD_PYPI_CACHE_WORKERS", "5"))
+PYPI_CACHE_MAX_DEPTH = int(os.getenv("ORCHARD_PYPI_CACHE_MAX_DEPTH", "10"))
+PYPI_CACHE_MAX_ATTEMPTS = int(os.getenv("ORCHARD_PYPI_CACHE_MAX_ATTEMPTS", "3"))
+```
+
+---
+
+## Files to Create/Modify
+
+| File | Action |
+|------|--------|
+| `migrations/0XX_pypi_cache_tasks.sql` | Create - new table |
+| `backend/app/models.py` | Modify - add PyPICacheTask model |
+| `backend/app/pypi_cache_worker.py` | Create - worker pool + processing |
+| `backend/app/pypi_proxy.py` | Modify - replace threads, add API |
+| `backend/app/main.py` | Modify - init worker on startup |
+| `backend/app/config.py` | Modify - add config variables |
+| `backend/tests/test_pypi_cache_worker.py` | Create - unit tests |
+| `backend/tests/integration/test_pypi_cache_api.py` | Create - API tests |
+
+---
+
+## Deduplication Strategy
+
+### At Task Creation Time
+
+```python
+def _enqueue_cache_task(db, package_name, ...):
+    # Check for existing pending/in_progress task
+    existing_task = db.query(PyPICacheTask).filter(
+        PyPICacheTask.package_name == package_name,
+        PyPICacheTask.status.in_(["pending", "in_progress"])
+    ).first()
+    if existing_task:
+        return existing_task
+
+    # Check if already cached
+    if _find_cached_package(db, package_name):
+        return None
+
+    # Create new task
+    ...
+```
+
+### At Processing Time (safety check)
+
+```python
+def _process_cache_task(task_id):
+    # Double-check in case of race
+    if _find_cached_package(db, task.package_name):
+        _mark_task_completed(db, task, cached_artifact_id=existing.artifact_id)
+        return
+```
+
+---
+
+## Success Criteria
+
+- [ ] No unbounded thread creation
+- [ ] All dependency caching attempts tracked in database
+- [ ] Failed tasks automatically retry with backoff
+- [ ] API provides visibility into queue status
+- [ ] Manual retry capability for failed packages
+- [ ] Existing pip install workflow unchanged (transparent)
+- [ ] Tests cover worker, retry, and API functionality
--- a/frontend/src/App.tsx
+++ b/frontend/src/App.tsx
@@ -11,6 +11,7 @@ import ChangePasswordPage from './pages/ChangePasswordPage';
 import APIKeysPage from './pages/APIKeysPage';
 import AdminUsersPage from './pages/AdminUsersPage';
 import AdminOIDCPage from './pages/AdminOIDCPage';
+import AdminCachePage from './pages/AdminCachePage';
 import ProjectSettingsPage from './pages/ProjectSettingsPage';
 import TeamsPage from './pages/TeamsPage';
 import TeamDashboardPage from './pages/TeamDashboardPage';
@@ -50,6 +51,7 @@ function AppRoutes() {
                <Route path="/settings/api-keys" element={<APIKeysPage />} />
                <Route path="/admin/users" element={<AdminUsersPage />} />
                <Route path="/admin/oidc" element={<AdminOIDCPage />} />
+                <Route path="/admin/cache" element={<AdminCachePage />} />
                <Route path="/teams" element={<TeamsPage />} />
                <Route path="/teams/:slug" element={<TeamDashboardPage />} />
                <Route path="/teams/:slug/settings" element={<TeamSettingsPage />} />
--- a/frontend/src/api.ts
+++ b/frontend/src/api.ts
@@ -42,6 +42,10 @@ import {
  TeamUpdate,
  TeamMemberCreate,
  TeamMemberUpdate,
+  UpstreamSource,
+  UpstreamSourceCreate,
+  UpstreamSourceUpdate,
+  UpstreamSourceTestResult,
 } from './types';

 const API_BASE = '/api/v1';
@@ -682,3 +686,63 @@ export async function searchUsers(query: string, limit: number = 10): Promise<Us
  });
  return handleResponse<UserSearchResult[]>(response);
 }
+
+// Upstream Sources Admin API
+export interface UpstreamSourceListParams {
+  enabled?: boolean;
+  source_type?: string;
+}
+
+export async function listUpstreamSources(params: UpstreamSourceListParams = {}): Promise<UpstreamSource[]> {
+  const query = buildQueryString(params as Record<string, unknown>);
+  const response = await fetch(`${API_BASE}/admin/upstream-sources${query}`, {
+    credentials: 'include',
+  });
+  return handleResponse<UpstreamSource[]>(response);
+}
+
+export async function createUpstreamSource(data: UpstreamSourceCreate): Promise<UpstreamSource> {
+  const response = await fetch(`${API_BASE}/admin/upstream-sources`, {
+    method: 'POST',
+    headers: { 'Content-Type': 'application/json' },
+    body: JSON.stringify(data),
+    credentials: 'include',
+  });
+  return handleResponse<UpstreamSource>(response);
+}
+
+export async function getUpstreamSource(id: string): Promise<UpstreamSource> {
+  const response = await fetch(`${API_BASE}/admin/upstream-sources/${id}`, {
+    credentials: 'include',
+  });
+  return handleResponse<UpstreamSource>(response);
+}
+
+export async function updateUpstreamSource(id: string, data: UpstreamSourceUpdate): Promise<UpstreamSource> {
+  const response = await fetch(`${API_BASE}/admin/upstream-sources/${id}`, {
+    method: 'PUT',
+    headers: { 'Content-Type': 'application/json' },
+    body: JSON.stringify(data),
+    credentials: 'include',
+  });
+  return handleResponse<UpstreamSource>(response);
+}
+
+export async function deleteUpstreamSource(id: string): Promise<void> {
+  const response = await fetch(`${API_BASE}/admin/upstream-sources/${id}`, {
+    method: 'DELETE',
+    credentials: 'include',
+  });
+  if (!response.ok) {
+    const error = await response.json().catch(() => ({ detail: 'Unknown error' }));
+    throw new ApiError(error.detail || `HTTP ${response.status}`, response.status);
+  }
+}
+
+export async function testUpstreamSource(id: string): Promise<UpstreamSourceTestResult> {
+  const response = await fetch(`${API_BASE}/admin/upstream-sources/${id}/test`, {
+    method: 'POST',
+    credentials: 'include',
+  });
+  return handleResponse<UpstreamSourceTestResult>(response);
+}
--- a/frontend/src/components/Layout.css
+++ b/frontend/src/components/Layout.css
@@ -272,7 +272,7 @@
 .footer {
  background: var(--bg-secondary);
  border-top: 1px solid var(--border-primary);
-  padding: 24px 0;
+  padding: 12px 0;
 }

 .footer-content {
--- a/frontend/src/components/Layout.tsx
+++ b/frontend/src/components/Layout.tsx
@@ -183,6 +183,18 @@ function Layout({ children }: LayoutProps) {
                          </svg>
                          SSO Configuration
                        </NavLink>
+                        <NavLink
+                          to="/admin/cache"
+                          className="user-menu-item"
+                          onClick={() => setShowUserMenu(false)}
+                        >
+                          <svg width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2">
+                            <path d="M21 16V8a2 2 0 0 0-1-1.73l-7-4a2 2 0 0 0-2 0l-7 4A2 2 0 0 0 3 8v8a2 2 0 0 0 1 1.73l7 4a2 2 0 0 0 2 0l7-4A2 2 0 0 0 21 16z"/>
+                            <polyline points="3.27 6.96 12 12.01 20.73 6.96"/>
+                            <line x1="12" y1="22.08" x2="12" y2="12"/>
+                          </svg>
+                          Cache Management
+                        </NavLink>
                      </>
                    )}
                    <div className="user-menu-divider"></div>
@@ -229,7 +241,7 @@ function Layout({ children }: LayoutProps) {
            </svg>
            <span className="footer-logo">Orchard</span>
            <span className="footer-separator">·</span>
-            <span className="footer-tagline">Content-Addressable Storage</span>
+            <span className="footer-tagline">The cache that never forgets</span>
          </div>
          <div className="footer-links">
            <a href="/docs">Documentation</a>
--- a/frontend/src/pages/AdminCachePage.css
+++ b/frontend/src/pages/AdminCachePage.css
@@ -0,0 +1,377 @@
+.admin-cache-page {
+  padding: 2rem;
+  max-width: 1400px;
+  margin: 0 auto;
+}
+
+.admin-cache-page h1 {
+  margin-bottom: 2rem;
+  color: var(--text-primary);
+}
+
+.admin-cache-page h2 {
+  margin-bottom: 1rem;
+  color: var(--text-primary);
+  font-size: 1.25rem;
+}
+
+/* Success/Error Messages */
+.success-message {
+  padding: 0.75rem 1rem;
+  background-color: #d4edda;
+  border: 1px solid #c3e6cb;
+  border-radius: 4px;
+  color: #155724;
+  margin-bottom: 1rem;
+}
+
+.error-message {
+  padding: 0.75rem 1rem;
+  background-color: #f8d7da;
+  border: 1px solid #f5c6cb;
+  border-radius: 4px;
+  color: #721c24;
+  margin-bottom: 1rem;
+}
+
+/* Sources Section */
+.sources-section {
+  background: var(--bg-secondary);
+  border: 1px solid var(--border-color);
+  border-radius: 8px;
+  padding: 1.5rem;
+}
+
+.section-header {
+  display: flex;
+  justify-content: space-between;
+  align-items: center;
+  margin-bottom: 1rem;
+}
+
+.section-header h2 {
+  margin: 0;
+}
+
+/* Sources Table */
+.sources-table {
+  width: 100%;
+  border-collapse: collapse;
+  background: var(--bg-primary);
+  border-radius: 4px;
+  overflow: hidden;
+}
+
+.sources-table th,
+.sources-table td {
+  padding: 0.75rem 1rem;
+  text-align: center;
+  border-bottom: 1px solid var(--border-color);
+}
+
+.sources-table th {
+  background: var(--bg-tertiary);
+  font-weight: 600;
+  color: var(--text-secondary);
+  font-size: 0.85rem;
+  text-transform: uppercase;
+}
+
+.sources-table tr:last-child td {
+  border-bottom: none;
+}
+
+.sources-table tr.disabled-row {
+  opacity: 0.6;
+}
+
+.source-name {
+  font-weight: 500;
+  color: var(--text-primary);
+  white-space: nowrap;
+}
+
+/* Name column should be left-aligned */
+.sources-table td:first-child {
+  text-align: left;
+}
+
+.url-cell {
+  font-family: monospace;
+  font-size: 0.9rem;
+  max-width: 300px;
+  overflow: hidden;
+  text-overflow: ellipsis;
+  white-space: nowrap;
+  text-align: left;
+}
+
+/* Badges */
+.env-badge,
+.status-badge {
+  display: inline-block;
+  padding: 0.2rem 0.5rem;
+  border-radius: 4px;
+  font-size: 0.75rem;
+  font-weight: 500;
+  margin-left: 0.5rem;
+}
+
+.env-badge {
+  background-color: #fff3e0;
+  color: #e65100;
+}
+
+.status-badge.enabled {
+  background-color: #e8f5e9;
+  color: #2e7d32;
+}
+
+.status-badge.disabled {
+  background-color: #ffebee;
+  color: #c62828;
+}
+
+.coming-soon-badge {
+  color: #9e9e9e;
+  font-style: italic;
+  font-size: 0.85em;
+}
+
+/* Actions */
+.actions-cell {
+  white-space: nowrap;
+}
+
+.actions-cell .btn {
+  margin-right: 0.5rem;
+}
+
+.actions-cell .btn:last-child {
+  margin-right: 0;
+}
+
+.test-cell {
+  text-align: center;
+  width: 2rem;
+}
+
+.test-dot {
+  font-size: 1rem;
+  cursor: default;
+}
+
+.test-dot.success {
+  color: #2e7d32;
+}
+
+.test-dot.failure {
+  color: #c62828;
+  cursor: pointer;
+}
+
+.test-dot.failure:hover {
+  color: #b71c1c;
+}
+
+.test-dot.testing {
+  color: #1976d2;
+  animation: pulse 1s infinite;
+}
+
+@keyframes pulse {
+  0%, 100% { opacity: 1; }
+  50% { opacity: 0.4; }
+}
+
+/* Error Modal */
+.error-modal-content {
+  background: var(--bg-primary);
+  border-radius: 8px;
+  padding: 2rem;
+  width: 100%;
+  max-width: 500px;
+}
+
+.error-modal-content h3 {
+  margin-top: 0;
+  color: #c62828;
+}
+
+.error-modal-content .error-details {
+  background: var(--bg-tertiary);
+  padding: 1rem;
+  border-radius: 4px;
+  font-family: monospace;
+  font-size: 0.9rem;
+  word-break: break-word;
+  white-space: pre-wrap;
+}
+
+.error-modal-content .modal-actions {
+  display: flex;
+  justify-content: flex-end;
+  margin-top: 1.5rem;
+}
+
+/* Buttons */
+.btn {
+  padding: 0.5rem 1rem;
+  border: 1px solid var(--border-color);
+  border-radius: 4px;
+  background: var(--bg-primary);
+  color: var(--text-primary);
+  cursor: pointer;
+  font-size: 0.875rem;
+}
+
+.btn:hover {
+  background: var(--bg-tertiary);
+}
+
+.btn:disabled {
+  opacity: 0.6;
+  cursor: not-allowed;
+}
+
+.btn-primary {
+  background-color: var(--color-primary);
+  border-color: var(--color-primary);
+  color: white;
+}
+
+.btn-primary:hover {
+  background-color: var(--color-primary-hover);
+}
+
+.btn-danger {
+  background-color: #dc3545;
+  border-color: #dc3545;
+  color: white;
+}
+
+.btn-danger:hover {
+  background-color: #c82333;
+}
+
+.btn-sm {
+  padding: 0.25rem 0.75rem;
+  font-size: 0.8rem;
+}
+
+.btn-secondary {
+  background-color: var(--bg-tertiary);
+  border-color: var(--border-color);
+  color: var(--text-primary);
+  font-weight: 500;
+}
+
+.btn-secondary:hover {
+  background-color: var(--bg-secondary);
+  border-color: var(--text-secondary);
+}
+
+.empty-message {
+  color: var(--text-secondary);
+  font-style: italic;
+  padding: 2rem;
+  text-align: center;
+}
+
+/* Modal */
+.modal-overlay {
+  position: fixed;
+  top: 0;
+  left: 0;
+  right: 0;
+  bottom: 0;
+  background: rgba(0, 0, 0, 0.5);
+  display: flex;
+  align-items: center;
+  justify-content: center;
+  z-index: 1000;
+}
+
+.modal-content {
+  background: var(--bg-primary);
+  border-radius: 8px;
+  padding: 2rem;
+  width: 100%;
+  max-width: 600px;
+  max-height: 90vh;
+  overflow-y: auto;
+}
+
+.modal-content h2 {
+  margin-top: 0;
+}
+
+/* Form */
+.form-group {
+  margin-bottom: 1rem;
+}
+
+.form-group label {
+  display: block;
+  margin-bottom: 0.5rem;
+  font-weight: 500;
+  color: var(--text-primary);
+}
+
+.form-group input,
+.form-group select {
+  width: 100%;
+  padding: 0.5rem;
+  border: 1px solid var(--border-color);
+  border-radius: 4px;
+  background: var(--bg-primary);
+  color: var(--text-primary);
+  font-size: 1rem;
+}
+
+.form-group input:focus,
+.form-group select:focus {
+  outline: none;
+  border-color: var(--color-primary);
+}
+
+.form-row {
+  display: flex;
+  gap: 1rem;
+}
+
+.form-row .form-group {
+  flex: 1;
+}
+
+.checkbox-group label {
+  display: flex;
+  align-items: center;
+  gap: 0.5rem;
+  cursor: pointer;
+}
+
+.checkbox-group input[type="checkbox"] {
+  width: auto;
+}
+
+.help-text {
+  display: block;
+  font-size: 0.8rem;
+  color: var(--text-secondary);
+  margin-top: 0.25rem;
+}
+
+.form-actions {
+  display: flex;
+  justify-content: space-between;
+  align-items: center;
+  margin-top: 1.5rem;
+  padding-top: 1rem;
+  border-top: 1px solid var(--border-color);
+}
+
+.form-actions-right {
+  display: flex;
+  gap: 0.5rem;
+}
--- a/frontend/src/pages/AdminCachePage.tsx
+++ b/frontend/src/pages/AdminCachePage.tsx
@@ -0,0 +1,509 @@
+import { useState, useEffect } from 'react';
+import { useNavigate } from 'react-router-dom';
+import { useAuth } from '../contexts/AuthContext';
+import {
+  listUpstreamSources,
+  createUpstreamSource,
+  updateUpstreamSource,
+  deleteUpstreamSource,
+  testUpstreamSource,
+} from '../api';
+import { UpstreamSource, SourceType, AuthType } from '../types';
+import './AdminCachePage.css';
+
+const SOURCE_TYPES: SourceType[] = ['npm', 'pypi', 'maven', 'docker', 'helm', 'nuget', 'deb', 'rpm', 'generic'];
+const SUPPORTED_SOURCE_TYPES: Set<SourceType> = new Set(['pypi', 'generic']);
+const AUTH_TYPES: AuthType[] = ['none', 'basic', 'bearer', 'api_key'];
+
+function AdminCachePage() {
+  const { user, loading: authLoading } = useAuth();
+  const navigate = useNavigate();
+
+  // Upstream sources state
+  const [sources, setSources] = useState<UpstreamSource[]>([]);
+  const [loadingSources, setLoadingSources] = useState(true);
+  const [sourcesError, setSourcesError] = useState<string | null>(null);
+
+  // Create/Edit form state
+  const [showForm, setShowForm] = useState(false);
+  const [editingSource, setEditingSource] = useState<UpstreamSource | null>(null);
+  const [formData, setFormData] = useState({
+    name: '',
+    source_type: 'generic' as SourceType,
+    url: '',
+    enabled: true,
+    auth_type: 'none' as AuthType,
+    username: '',
+    password: '',
+    priority: 100,
+  });
+  const [formError, setFormError] = useState<string | null>(null);
+  const [isSaving, setIsSaving] = useState(false);
+
+  // Test result state
+  const [testingId, setTestingId] = useState<string | null>(null);
+  const [testResults, setTestResults] = useState<Record<string, { success: boolean; message: string }>>({});
+
+  // Delete confirmation state
+  const [deletingId, setDeletingId] = useState<string | null>(null);
+
+  // Success message
+  const [successMessage, setSuccessMessage] = useState<string | null>(null);
+
+  // Error modal state
+  const [showErrorModal, setShowErrorModal] = useState(false);
+  const [selectedError, setSelectedError] = useState<{ sourceName: string; error: string } | null>(null);
+
+  useEffect(() => {
+    if (!authLoading && !user) {
+      navigate('/login', { state: { from: '/admin/cache' } });
+    }
+  }, [user, authLoading, navigate]);
+
+  useEffect(() => {
+    if (user && user.is_admin) {
+      loadSources();
+    }
+  }, [user]);
+
+  useEffect(() => {
+    if (successMessage) {
+      const timer = setTimeout(() => setSuccessMessage(null), 3000);
+      return () => clearTimeout(timer);
+    }
+  }, [successMessage]);
+
+  async function loadSources() {
+    setLoadingSources(true);
+    setSourcesError(null);
+    try {
+      const data = await listUpstreamSources();
+      setSources(data);
+    } catch (err) {
+      setSourcesError(err instanceof Error ? err.message : 'Failed to load sources');
+    } finally {
+      setLoadingSources(false);
+    }
+  }
+
+  function openCreateForm() {
+    setEditingSource(null);
+    setFormData({
+      name: '',
+      source_type: 'generic',
+      url: '',
+      enabled: true,
+      auth_type: 'none',
+      username: '',
+      password: '',
+      priority: 100,
+    });
+    setFormError(null);
+    setShowForm(true);
+  }
+
+  function openEditForm(source: UpstreamSource) {
+    setEditingSource(source);
+    setFormData({
+      name: source.name,
+      source_type: source.source_type,
+      url: source.url,
+      enabled: source.enabled,
+      auth_type: source.auth_type,
+      username: source.username || '',
+      password: '',
+      priority: source.priority,
+    });
+    setFormError(null);
+    setShowForm(true);
+  }
+
+  async function handleFormSubmit(e: React.FormEvent) {
+    e.preventDefault();
+    if (!formData.name.trim()) {
+      setFormError('Name is required');
+      return;
+    }
+    if (!formData.url.trim()) {
+      setFormError('URL is required');
+      return;
+    }
+
+    setIsSaving(true);
+    setFormError(null);
+
+    try {
+      let savedSourceId: string | null = null;
+
+      if (editingSource) {
+        // Update existing source
+        await updateUpstreamSource(editingSource.id, {
+          name: formData.name.trim(),
+          source_type: formData.source_type,
+          url: formData.url.trim(),
+          enabled: formData.enabled,
+          auth_type: formData.auth_type,
+          username: formData.username.trim() || undefined,
+          password: formData.password || undefined,
+          priority: formData.priority,
+        });
+        savedSourceId = editingSource.id;
+        setSuccessMessage('Source updated successfully');
+      } else {
+        // Create new source
+        const newSource = await createUpstreamSource({
+          name: formData.name.trim(),
+          source_type: formData.source_type,
+          url: formData.url.trim(),
+          enabled: formData.enabled,
+          auth_type: formData.auth_type,
+          username: formData.username.trim() || undefined,
+          password: formData.password || undefined,
+          priority: formData.priority,
+        });
+        savedSourceId = newSource.id;
+        setSuccessMessage('Source created successfully');
+      }
+      setShowForm(false);
+      await loadSources();
+
+      // Auto-test the source after save
+      if (savedSourceId) {
+        testSourceById(savedSourceId);
+      }
+    } catch (err) {
+      setFormError(err instanceof Error ? err.message : 'Failed to save source');
+    } finally {
+      setIsSaving(false);
+    }
+  }
+
+  async function handleDelete(source: UpstreamSource) {
+    if (!window.confirm(`Delete upstream source "${source.name}"? This cannot be undone.`)) {
+      return;
+    }
+
+    setDeletingId(source.id);
+    try {
+      await deleteUpstreamSource(source.id);
+      setSuccessMessage(`Source "${source.name}" deleted`);
+      await loadSources();
+    } catch (err) {
+      setSourcesError(err instanceof Error ? err.message : 'Failed to delete source');
+    } finally {
+      setDeletingId(null);
+    }
+  }
+
+  async function handleTest(source: UpstreamSource) {
+    testSourceById(source.id);
+  }
+
+  async function testSourceById(sourceId: string) {
+    setTestingId(sourceId);
+    setTestResults((prev) => ({ ...prev, [sourceId]: { success: true, message: 'Testing...' } }));
+
+    try {
+      const result = await testUpstreamSource(sourceId);
+      setTestResults((prev) => ({
+        ...prev,
+        [sourceId]: {
+          success: result.success,
+          message: result.success
+            ? `OK (${result.elapsed_ms}ms)`
+            : result.error || `HTTP ${result.status_code}`,
+        },
+      }));
+    } catch (err) {
+      setTestResults((prev) => ({
+        ...prev,
+        [sourceId]: {
+          success: false,
+          message: err instanceof Error ? err.message : 'Test failed',
+        },
+      }));
+    } finally {
+      setTestingId(null);
+    }
+  }
+
+  function showError(sourceName: string, error: string) {
+    setSelectedError({ sourceName, error });
+    setShowErrorModal(true);
+  }
+
+  if (authLoading) {
+    return <div className="admin-cache-page">Loading...</div>;
+  }
+
+  if (!user?.is_admin) {
+    return (
+      <div className="admin-cache-page">
+        <div className="error-message">Access denied. Admin privileges required.</div>
+      </div>
+    );
+  }
+
+  return (
+    <div className="admin-cache-page">
+      <h1>Upstream Sources</h1>
+
+      {successMessage && <div className="success-message">{successMessage}</div>}
+
+      {/* Upstream Sources Section */}
+      <section className="sources-section">
+        <div className="section-header">
+          <button className="btn btn-primary" onClick={openCreateForm}>
+            Add Source
+          </button>
+        </div>
+
+        {loadingSources ? (
+          <p>Loading sources...</p>
+        ) : sourcesError ? (
+          <div className="error-message">{sourcesError}</div>
+        ) : sources.length === 0 ? (
+          <p className="empty-message">No upstream sources configured.</p>
+        ) : (
+          <table className="sources-table">
+            <thead>
+              <tr>
+                <th>Name</th>
+                <th>Type</th>
+                <th>URL</th>
+                <th>Priority</th>
+                <th>Status</th>
+                <th>Test</th>
+                <th>Actions</th>
+              </tr>
+            </thead>
+            <tbody>
+              {sources.map((source) => (
+                <tr key={source.id} className={source.enabled ? '' : 'disabled-row'}>
+                  <td>
+                    <span className="source-name">{source.name}</span>
+                    {source.source === 'env' && (
+                      <span className="env-badge" title="Defined via environment variable">ENV</span>
+                    )}
+                  </td>
+                  <td>
+                    {source.source_type}
+                    {!SUPPORTED_SOURCE_TYPES.has(source.source_type) && (
+                      <span className="coming-soon-badge"> (coming soon)</span>
+                    )}
+                  </td>
+                  <td className="url-cell" title={source.url}>{source.url}</td>
+                  <td>{source.priority}</td>
+                  <td>
+                    <span className={`status-badge ${source.enabled ? 'enabled' : 'disabled'}`}>
+                      {source.enabled ? 'Enabled' : 'Disabled'}
+                    </span>
+                  </td>
+                  <td className="test-cell">
+                    {testingId === source.id ? (
+                      <span className="test-dot testing" title="Testing...">●</span>
+                    ) : testResults[source.id] ? (
+                      testResults[source.id].success ? (
+                        <span className="test-dot success" title={testResults[source.id].message}>●</span>
+                      ) : (
+                        <span
+                          className="test-dot failure"
+                          title="Click to see error"
+                          onClick={() => showError(source.name, testResults[source.id].message)}
+                        >●</span>
+                      )
+                    ) : null}
+                  </td>
+                  <td className="actions-cell">
+                    <button
+                      className="btn btn-sm btn-secondary"
+                      onClick={() => handleTest(source)}
+                      disabled={testingId === source.id}
+                    >
+                      Test
+                    </button>
+                    {source.source !== 'env' && (
+                      <button className="btn btn-sm btn-secondary" onClick={() => openEditForm(source)}>
+                        Edit
+                      </button>
+                    )}
+                  </td>
+                </tr>
+              ))}
+            </tbody>
+          </table>
+        )}
+      </section>
+
+      {/* Create/Edit Modal */}
+      {showForm && (
+        <div className="modal-overlay" onClick={() => setShowForm(false)}>
+          <div className="modal-content" onClick={(e) => e.stopPropagation()}>
+            <h2>{editingSource ? 'Edit Upstream Source' : 'Add Upstream Source'}</h2>
+            <form onSubmit={handleFormSubmit}>
+              {formError && <div className="error-message">{formError}</div>}
+
+              <div className="form-group">
+                <label htmlFor="name">Name</label>
+                <input
+                  type="text"
+                  id="name"
+                  value={formData.name}
+                  onChange={(e) => setFormData({ ...formData, name: e.target.value })}
+                  placeholder="e.g., npm-private"
+                  required
+                />
+              </div>
+
+              <div className="form-row">
+                <div className="form-group">
+                  <label htmlFor="source_type">Type</label>
+                  <select
+                    id="source_type"
+                    value={formData.source_type}
+                    onChange={(e) => setFormData({ ...formData, source_type: e.target.value as SourceType })}
+                  >
+                    {SOURCE_TYPES.map((type) => (
+                      <option key={type} value={type}>
+                        {type}{!SUPPORTED_SOURCE_TYPES.has(type) ? ' (coming soon)' : ''}
+                      </option>
+                    ))}
+                  </select>
+                </div>
+
+                <div className="form-group">
+                  <label htmlFor="priority">Priority</label>
+                  <input
+                    type="number"
+                    id="priority"
+                    value={formData.priority}
+                    onChange={(e) => setFormData({ ...formData, priority: parseInt(e.target.value) || 100 })}
+                    min="1"
+                  />
+                  <span className="help-text">Lower = higher priority</span>
+                </div>
+              </div>
+
+              <div className="form-group">
+                <label htmlFor="url">URL</label>
+                <input
+                  type="url"
+                  id="url"
+                  value={formData.url}
+                  onChange={(e) => setFormData({ ...formData, url: e.target.value })}
+                  placeholder="https://registry.example.com"
+                  required
+                />
+              </div>
+
+              <div className="form-row">
+                <div className="form-group checkbox-group">
+                  <label>
+                    <input
+                      type="checkbox"
+                      checked={formData.enabled}
+                      onChange={(e) => setFormData({ ...formData, enabled: e.target.checked })}
+                    />
+                    Enabled
+                  </label>
+                </div>
+              </div>
+
+              <div className="form-group">
+                <label htmlFor="auth_type">Authentication</label>
+                <select
+                  id="auth_type"
+                  value={formData.auth_type}
+                  onChange={(e) => setFormData({ ...formData, auth_type: e.target.value as AuthType })}
+                >
+                  {AUTH_TYPES.map((type) => (
+                    <option key={type} value={type}>
+                      {type === 'none' ? 'None' : type === 'api_key' ? 'API Key' : type.charAt(0).toUpperCase() + type.slice(1)}
+                    </option>
+                  ))}
+                </select>
+              </div>
+
+              {formData.auth_type !== 'none' && (
+                <div className="form-row">
+                  {(formData.auth_type === 'basic' || formData.auth_type === 'api_key') && (
+                    <div className="form-group">
+                      <label htmlFor="username">{formData.auth_type === 'api_key' ? 'Header Name' : 'Username'}</label>
+                      <input
+                        type="text"
+                        id="username"
+                        value={formData.username}
+                        onChange={(e) => setFormData({ ...formData, username: e.target.value })}
+                        placeholder={formData.auth_type === 'api_key' ? 'X-API-Key' : 'username'}
+                      />
+                    </div>
+                  )}
+                  <div className="form-group">
+                    <label htmlFor="password">
+                      {formData.auth_type === 'bearer'
+                        ? 'Token'
+                        : formData.auth_type === 'api_key'
+                        ? 'API Key Value'
+                        : 'Password'}
+                    </label>
+                    <input
+                      type="password"
+                      id="password"
+                      value={formData.password}
+                      onChange={(e) => setFormData({ ...formData, password: e.target.value })}
+                      placeholder={editingSource ? '(unchanged)' : ''}
+                    />
+                    {editingSource && (
+                      <span className="help-text">Leave empty to keep existing {formData.auth_type === 'bearer' ? 'token' : 'credentials'}</span>
+                    )}
+                  </div>
+                </div>
+              )}
+
+              <div className="form-actions">
+                {editingSource && (
+                  <button
+                    type="button"
+                    className="btn btn-danger"
+                    onClick={() => {
+                      handleDelete(editingSource);
+                      setShowForm(false);
+                    }}
+                    disabled={deletingId === editingSource.id}
+                  >
+                    {deletingId === editingSource.id ? 'Deleting...' : 'Delete'}
+                  </button>
+                )}
+                <div className="form-actions-right">
+                  <button type="button" className="btn" onClick={() => setShowForm(false)}>
+                    Cancel
+                  </button>
+                  <button type="submit" className="btn btn-primary" disabled={isSaving}>
+                    {isSaving ? 'Saving...' : editingSource ? 'Update' : 'Create'}
+                  </button>
+                </div>
+              </div>
+            </form>
+          </div>
+        </div>
+      )}
+
+      {/* Error Details Modal */}
+      {showErrorModal && selectedError && (
+        <div className="modal-overlay" onClick={() => setShowErrorModal(false)}>
+          <div className="error-modal-content" onClick={(e) => e.stopPropagation()}>
+            <h3>Connection Error: {selectedError.sourceName}</h3>
+            <div className="error-details">{selectedError.error}</div>
+            <div className="modal-actions">
+              <button className="btn" onClick={() => setShowErrorModal(false)}>
+                Close
+              </button>
+            </div>
+          </div>
+        </div>
+      )}
+    </div>
+  );
+}
+
+export default AdminCachePage;
--- a/frontend/src/pages/Home.css
+++ b/frontend/src/pages/Home.css
@@ -493,3 +493,16 @@
  gap: 6px;
  flex-wrap: wrap;
 }
+
+/* Cell name styles */
+.cell-name {
+  display: flex;
+  align-items: center;
+  gap: 8px;
+}
+
+/* System project badge */
+.system-badge {
+  font-size: 0.7rem;
+  padding: 2px 6px;
+}
--- a/frontend/src/pages/Home.tsx
+++ b/frontend/src/pages/Home.tsx
@@ -224,6 +224,9 @@ function Home() {
                <span className="cell-name">
                  {!project.is_public && <LockIcon />}
                  {project.name}
+                  {project.is_system && (
+                    <Badge variant="warning" className="system-badge">Cache</Badge>
+                  )}
                </span>
              ),
            },
@@ -246,7 +249,7 @@ function Home() {
              key: 'created_by',
              header: 'Owner',
              className: 'cell-owner',
-              render: (project) => project.created_by,
+              render: (project) => project.team_name || project.created_by,
            },
            ...(user
              ? [
--- a/frontend/src/pages/PackagePage.css
+++ b/frontend/src/pages/PackagePage.css
@@ -642,6 +642,11 @@ tr:hover .copy-btn {
  padding: 20px;
 }

+/* Ensure file modal needs higher z-index when opened from deps modal */
+.modal-overlay:has(.ensure-file-modal) {
+  z-index: 1100;
+}
+
 .ensure-file-modal {
  background: var(--bg-secondary);
  border: 1px solid var(--border-primary);
@@ -793,4 +798,194 @@ tr:hover .copy-btn {
  .ensure-file-modal {
    max-height: 90vh;
  }
+
+  .action-menu-dropdown {
+    right: 0;
+    left: auto;
+  }
+}
+
+/* Header upload button */
+.header-upload-btn {
+  margin-left: auto;
+}
+
+/* Tag/Version cell */
+.tag-version-cell {
+  display: flex;
+  flex-direction: column;
+  gap: 4px;
+}
+
+.tag-version-cell .version-badge {
+  font-size: 0.75rem;
+  color: var(--text-muted);
+}
+
+/* Icon buttons */
+.btn-icon {
+  display: flex;
+  align-items: center;
+  justify-content: center;
+  width: 32px;
+  height: 32px;
+  padding: 0;
+  background: transparent;
+  border: 1px solid transparent;
+  border-radius: var(--radius-sm);
+  color: var(--text-secondary);
+  cursor: pointer;
+  transition: all var(--transition-fast);
+}
+
+.btn-icon:hover {
+  background: var(--bg-hover);
+  color: var(--text-primary);
+}
+
+/* Action menu */
+.action-buttons {
+  display: flex;
+  align-items: center;
+  gap: 4px;
+}
+
+.action-menu {
+  position: relative;
+}
+
+/* Action menu backdrop for click-outside */
+.action-menu-backdrop {
+  position: fixed;
+  top: 0;
+  left: 0;
+  right: 0;
+  bottom: 0;
+  z-index: 999;
+}
+
+.action-menu-dropdown {
+  position: fixed;
+  z-index: 1000;
+  min-width: 180px;
+  padding: 4px 0;
+  background: var(--bg-secondary);
+  border: 1px solid var(--border-primary);
+  border-radius: var(--radius-md);
+  box-shadow: 0 4px 12px rgba(0, 0, 0, 0.15);
+}
+
+.action-menu-dropdown button {
+  display: block;
+  width: 100%;
+  padding: 8px 12px;
+  background: none;
+  border: none;
+  text-align: left;
+  font-size: 0.875rem;
+  color: var(--text-primary);
+  cursor: pointer;
+  transition: background var(--transition-fast);
+}
+
+.action-menu-dropdown button:hover {
+  background: var(--bg-hover);
+}
+
+/* Upload Modal */
+.upload-modal,
+.create-tag-modal {
+  background: var(--bg-secondary);
+  border-radius: var(--radius-lg);
+  width: 90%;
+  max-width: 500px;
+  max-height: 90vh;
+  overflow: hidden;
+}
+
+.modal-header {
+  display: flex;
+  align-items: center;
+  justify-content: space-between;
+  padding: 16px 20px;
+  border-bottom: 1px solid var(--border-primary);
+}
+
+.modal-header h3 {
+  margin: 0;
+  font-size: 1.125rem;
+  font-weight: 600;
+}
+
+.modal-body {
+  padding: 20px;
+}
+
+.modal-description {
+  margin-bottom: 16px;
+  color: var(--text-secondary);
+  font-size: 0.875rem;
+}
+
+.modal-actions {
+  display: flex;
+  justify-content: flex-end;
+  gap: 12px;
+  margin-top: 20px;
+  padding-top: 16px;
+  border-top: 1px solid var(--border-primary);
+}
+
+/* Dependencies Modal */
+.deps-modal {
+  background: var(--bg-secondary);
+  border-radius: var(--radius-lg);
+  width: 90%;
+  max-width: 600px;
+  max-height: 80vh;
+  overflow: hidden;
+  display: flex;
+  flex-direction: column;
+}
+
+.deps-modal .modal-body {
+  overflow-y: auto;
+  flex: 1;
+}
+
+.deps-modal-controls {
+  display: flex;
+  gap: 8px;
+  margin-bottom: 16px;
+}
+
+/* Artifact ID Modal */
+.artifact-id-modal {
+  background: var(--bg-secondary);
+  border-radius: var(--radius-lg);
+  width: 90%;
+  max-width: 500px;
+}
+
+.artifact-id-display {
+  display: flex;
+  align-items: center;
+  gap: 12px;
+  padding: 16px;
+  background: var(--bg-tertiary);
+  border-radius: var(--radius-md);
+  border: 1px solid var(--border-primary);
+}
+
+.artifact-id-display code {
+  font-family: 'JetBrains Mono', 'Fira Code', 'Consolas', monospace;
+  font-size: 0.8125rem;
+  color: var(--text-primary);
+  word-break: break-all;
+  flex: 1;
+}
+
+.artifact-id-display .copy-btn {
+  opacity: 1;
+  flex-shrink: 0;
 }
--- a/frontend/src/pages/PackagePage.tsx
+++ b/frontend/src/pages/PackagePage.tsx
@@ -63,12 +63,17 @@ function PackagePage() {
  const [accessDenied, setAccessDenied] = useState(false);
  const [uploadTag, setUploadTag] = useState('');
  const [uploadSuccess, setUploadSuccess] = useState<string | null>(null);
-  const [artifactIdInput, setArtifactIdInput] = useState('');
  const [accessLevel, setAccessLevel] = useState<AccessLevel | null>(null);
  const [createTagName, setCreateTagName] = useState('');
  const [createTagArtifactId, setCreateTagArtifactId] = useState('');
  const [createTagLoading, setCreateTagLoading] = useState(false);

+  // UI state
+  const [showUploadModal, setShowUploadModal] = useState(false);
+  const [showCreateTagModal, setShowCreateTagModal] = useState(false);
+  const [openMenuId, setOpenMenuId] = useState<string | null>(null);
+  const [menuPosition, setMenuPosition] = useState<{ top: number; left: number } | null>(null);
+
  // Dependencies state
  const [selectedTag, setSelectedTag] = useState<TagDetail | null>(null);
  const [dependencies, setDependencies] = useState<Dependency[]>([]);
@@ -86,6 +91,13 @@ function PackagePage() {
  // Dependency graph modal state
  const [showGraph, setShowGraph] = useState(false);

+  // Dependencies modal state
+  const [showDepsModal, setShowDepsModal] = useState(false);
+
+  // Artifact ID modal state
+  const [showArtifactIdModal, setShowArtifactIdModal] = useState(false);
+  const [viewArtifactId, setViewArtifactId] = useState<string | null>(null);
+
  // Ensure file modal state
  const [showEnsureFile, setShowEnsureFile] = useState(false);
  const [ensureFileContent, setEnsureFileContent] = useState<string | null>(null);
@@ -96,6 +108,9 @@ function PackagePage() {
  // Derived permissions
  const canWrite = accessLevel === 'write' || accessLevel === 'admin';

+  // Detect system projects (convention: name starts with "_")
+  const isSystemProject = projectName?.startsWith('_') ?? false;
+
  // Get params from URL
  const page = parseInt(searchParams.get('page') || '1', 10);
  const search = searchParams.get('search') || '';
@@ -323,92 +338,212 @@ function PackagePage() {
    setSelectedTag(tag);
  };

-  const columns = [
-    {
-      key: 'name',
-      header: 'Tag',
-      sortable: true,
-      render: (t: TagDetail) => (
-        <strong
-          className={`tag-name-link ${selectedTag?.id === t.id ? 'selected' : ''}`}
-          onClick={() => handleTagSelect(t)}
-          style={{ cursor: 'pointer' }}
+  const handleMenuOpen = (e: React.MouseEvent, tagId: string) => {
+    e.stopPropagation();
+    if (openMenuId === tagId) {
+      setOpenMenuId(null);
+      setMenuPosition(null);
+    } else {
+      const rect = e.currentTarget.getBoundingClientRect();
+      setMenuPosition({ top: rect.bottom + 4, left: rect.right - 180 });
+      setOpenMenuId(tagId);
+    }
+  };
+
+  // System projects show Version first, regular projects show Tag first
+  const columns = isSystemProject
+    ? [
+        // System project columns: Version first, then Filename
+        {
+          key: 'version',
+          header: 'Version',
+          sortable: true,
+          render: (t: TagDetail) => (
+            <strong
+              className={`tag-name-link ${selectedTag?.id === t.id ? 'selected' : ''}`}
+              onClick={() => handleTagSelect(t)}
+              style={{ cursor: 'pointer' }}
+            >
+              <span className="version-badge">{t.version || t.name}</span>
+            </strong>
+          ),
+        },
+        {
+          key: 'artifact_original_name',
+          header: 'Filename',
+          className: 'cell-truncate',
+          render: (t: TagDetail) => (
+            <span title={t.artifact_original_name || t.name}>{t.artifact_original_name || t.name}</span>
+          ),
+        },
+        {
+          key: 'artifact_size',
+          header: 'Size',
+          render: (t: TagDetail) => <span>{formatBytes(t.artifact_size)}</span>,
+        },
+        {
+          key: 'created_at',
+          header: 'Cached',
+          sortable: true,
+          render: (t: TagDetail) => (
+            <span>{new Date(t.created_at).toLocaleDateString()}</span>
+          ),
+        },
+        {
+          key: 'actions',
+          header: '',
+          render: (t: TagDetail) => (
+            <div className="action-buttons">
+              <a
+                href={getDownloadUrl(projectName!, packageName!, t.name)}
+                className="btn btn-icon"
+                download
+                title="Download"
+              >
+                <svg width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2">
+                  <path d="M21 15v4a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2v-4" />
+                  <polyline points="7 10 12 15 17 10" />
+                  <line x1="12" y1="15" x2="12" y2="3" />
+                </svg>
+              </a>
+              <button
+                className="btn btn-icon"
+                onClick={(e) => handleMenuOpen(e, t.id)}
+                title="More actions"
+              >
+                <svg width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2">
+                  <circle cx="12" cy="12" r="1" />
+                  <circle cx="12" cy="5" r="1" />
+                  <circle cx="12" cy="19" r="1" />
+                </svg>
+              </button>
+            </div>
+          ),
+        },
+      ]
+    : [
+        // Regular project columns: Tag, Version, Filename
+        {
+          key: 'name',
+          header: 'Tag',
+          sortable: true,
+          render: (t: TagDetail) => (
+            <strong
+              className={`tag-name-link ${selectedTag?.id === t.id ? 'selected' : ''}`}
+              onClick={() => handleTagSelect(t)}
+              style={{ cursor: 'pointer' }}
+            >
+              {t.name}
+            </strong>
+          ),
+        },
+        {
+          key: 'version',
+          header: 'Version',
+          render: (t: TagDetail) => (
+            <span className="version-badge">{t.version || '—'}</span>
+          ),
+        },
+        {
+          key: 'artifact_original_name',
+          header: 'Filename',
+          className: 'cell-truncate',
+          render: (t: TagDetail) => (
+            <span title={t.artifact_original_name || undefined}>{t.artifact_original_name || '—'}</span>
+          ),
+        },
+        {
+          key: 'artifact_size',
+          header: 'Size',
+          render: (t: TagDetail) => <span>{formatBytes(t.artifact_size)}</span>,
+        },
+        {
+          key: 'created_at',
+          header: 'Created',
+          sortable: true,
+          render: (t: TagDetail) => (
+            <span title={`by ${t.created_by}`}>{new Date(t.created_at).toLocaleDateString()}</span>
+          ),
+        },
+        {
+          key: 'actions',
+          header: '',
+          render: (t: TagDetail) => (
+            <div className="action-buttons">
+              <a
+                href={getDownloadUrl(projectName!, packageName!, t.name)}
+                className="btn btn-icon"
+                download
+                title="Download"
+              >
+                <svg width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2">
+                  <path d="M21 15v4a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2v-4" />
+                  <polyline points="7 10 12 15 17 10" />
+                  <line x1="12" y1="15" x2="12" y2="3" />
+                </svg>
+              </a>
+              <button
+                className="btn btn-icon"
+                onClick={(e) => handleMenuOpen(e, t.id)}
+                title="More actions"
+              >
+                <svg width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2">
+                  <circle cx="12" cy="12" r="1" />
+                  <circle cx="12" cy="5" r="1" />
+                  <circle cx="12" cy="19" r="1" />
+                </svg>
+              </button>
+            </div>
+          ),
+        },
+      ];
+
+  // Find the tag for the open menu
+  const openMenuTag = tags.find(t => t.id === openMenuId);
+
+  // Close menu when clicking outside
+  const handleClickOutside = () => {
+    if (openMenuId) {
+      setOpenMenuId(null);
+      setMenuPosition(null);
+    }
+  };
+
+  // Render dropdown menu as a portal-like element
+  const renderActionMenu = () => {
+    if (!openMenuId || !menuPosition || !openMenuTag) return null;
+    const t = openMenuTag;
+    return (
+      <div
+        className="action-menu-backdrop"
+        onClick={handleClickOutside}
+      >
+        <div
+          className="action-menu-dropdown"
+          style={{ top: menuPosition.top, left: menuPosition.left }}
+          onClick={(e) => e.stopPropagation()}
        >
-          {t.name}
-        </strong>
-      ),
-    },
-    {
-      key: 'version',
-      header: 'Version',
-      render: (t: TagDetail) => (
-        <span className="version-badge">{t.version || '-'}</span>
-      ),
-    },
-    {
-      key: 'artifact_id',
-      header: 'Artifact ID',
-      render: (t: TagDetail) => (
-        <div className="artifact-id-cell">
-          <code className="artifact-id">{t.artifact_id.substring(0, 12)}...</code>
-          <CopyButton text={t.artifact_id} />
-        </div>
-      ),
-    },
-    {
-      key: 'artifact_size',
-      header: 'Size',
-      render: (t: TagDetail) => <span>{formatBytes(t.artifact_size)}</span>,
-    },
-    {
-      key: 'artifact_content_type',
-      header: 'Type',
-      render: (t: TagDetail) => (
-        <span className="content-type">{t.artifact_content_type || '-'}</span>
-      ),
-    },
-    {
-      key: 'artifact_original_name',
-      header: 'Filename',
-      className: 'cell-truncate',
-      render: (t: TagDetail) => (
-        <span title={t.artifact_original_name || undefined}>{t.artifact_original_name || '-'}</span>
-      ),
-    },
-    {
-      key: 'created_at',
-      header: 'Created',
-      sortable: true,
-      render: (t: TagDetail) => (
-        <div className="created-cell">
-          <span>{new Date(t.created_at).toLocaleString()}</span>
-          <span className="created-by">by {t.created_by}</span>
-        </div>
-      ),
-    },
-    {
-      key: 'actions',
-      header: 'Actions',
-      render: (t: TagDetail) => (
-        <div className="action-buttons">
-          <button
-            className="btn btn-secondary btn-small"
-            onClick={() => fetchEnsureFileForTag(t.name)}
-            title="View orchard.ensure file"
-          >
-            Ensure
+          <button onClick={() => { setViewArtifactId(t.artifact_id); setShowArtifactIdModal(true); setOpenMenuId(null); setMenuPosition(null); }}>
+            View Artifact ID
+          </button>
+          <button onClick={() => { navigator.clipboard.writeText(t.artifact_id); setOpenMenuId(null); setMenuPosition(null); }}>
+            Copy Artifact ID
+          </button>
+          <button onClick={() => { fetchEnsureFileForTag(t.name); setOpenMenuId(null); setMenuPosition(null); }}>
+            View Ensure File
+          </button>
+          {canWrite && !isSystemProject && (
+            <button onClick={() => { setCreateTagArtifactId(t.artifact_id); setShowCreateTagModal(true); setOpenMenuId(null); setMenuPosition(null); }}>
+              Create/Update Tag
+            </button>
+          )}
+          <button onClick={() => { handleTagSelect(t); setShowDepsModal(true); setOpenMenuId(null); setMenuPosition(null); }}>
+            View Dependencies
          </button>
-          <a
-            href={getDownloadUrl(projectName!, packageName!, t.name)}
-            className="btn btn-secondary btn-small"
-            download
-          >
-            Download
-          </a>
        </div>
-      ),
-    },
-  ];
+      </div>
+    );
+  };

  if (loading && !tagsData) {
    return <div className="loading">Loading...</div>;
@@ -451,6 +586,19 @@ function PackagePage() {
          <div className="page-header__title-row">
            <h1>{packageName}</h1>
            {pkg && <Badge variant="default">{pkg.format}</Badge>}
+            {user && canWrite && !isSystemProject && (
+              <button
+                className="btn btn-primary btn-small header-upload-btn"
+                onClick={() => setShowUploadModal(true)}
+              >
+                <svg width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2" style={{ marginRight: '6px' }}>
+                  <path d="M21 15v4a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2v-4" />
+                  <polyline points="17 8 12 3 7 8" />
+                  <line x1="12" y1="3" x2="12" y2="15" />
+                </svg>
+                Upload
+              </button>
+            )}
          </div>
          {pkg?.description && <p className="description">{pkg.description}</p>}
          <div className="page-header__meta">
@@ -468,14 +616,14 @@ function PackagePage() {
          </div>
          {pkg && (pkg.tag_count !== undefined || pkg.artifact_count !== undefined) && (
            <div className="package-header-stats">
-              {pkg.tag_count !== undefined && (
+              {!isSystemProject && pkg.tag_count !== undefined && (
                <span className="stat-item">
                  <strong>{pkg.tag_count}</strong> tags
                </span>
              )}
              {pkg.artifact_count !== undefined && (
                <span className="stat-item">
-                  <strong>{pkg.artifact_count}</strong> artifacts
+                  <strong>{pkg.artifact_count}</strong> {isSystemProject ? 'versions' : 'artifacts'}
                </span>
              )}
              {pkg.total_size !== undefined && pkg.total_size > 0 && (
@@ -483,7 +631,7 @@ function PackagePage() {
                  <strong>{formatBytes(pkg.total_size)}</strong> total
                </span>
              )}
-              {pkg.latest_tag && (
+              {!isSystemProject && pkg.latest_tag && (
                <span className="stat-item">
                  Latest: <strong className="accent">{pkg.latest_tag}</strong>
                </span>
@@ -496,44 +644,9 @@ function PackagePage() {
      {error && <div className="error-message">{error}</div>}
      {uploadSuccess && <div className="success-message">{uploadSuccess}</div>}

-      {user && (
-        <div className="upload-section card">
-          <h3>Upload Artifact</h3>
-          {canWrite ? (
-            <div className="upload-form">
-              <div className="form-group">
-                <label htmlFor="upload-tag">Tag (optional)</label>
-                <input
-                  id="upload-tag"
-                  type="text"
-                  value={uploadTag}
-                  onChange={(e) => setUploadTag(e.target.value)}
-                  placeholder="v1.0.0, latest, stable..."
-                />
-              </div>
-              <DragDropUpload
-                projectName={projectName!}
-                packageName={packageName!}
-                tag={uploadTag || undefined}
-                onUploadComplete={handleUploadComplete}
-                onUploadError={handleUploadError}
-              />
-            </div>
-          ) : (
-            <DragDropUpload
-              projectName={projectName!}
-              packageName={packageName!}
-              disabled={true}
-              disabledReason="You have read-only access to this project and cannot upload artifacts."
-              onUploadComplete={handleUploadComplete}
-              onUploadError={handleUploadError}
-            />
-          )}
-        </div>
-      )}

      <div className="section-header">
-        <h2>Tags / Versions</h2>
+        <h2>{isSystemProject ? 'Versions' : 'Tags / Versions'}</h2>
      </div>

      <div className="list-controls">
@@ -577,110 +690,6 @@ function PackagePage() {
        />
      )}

-      {/* Dependencies Section */}
-      {tags.length > 0 && (
-        <div className="dependencies-section card">
-          <div className="dependencies-header">
-            <h3>Dependencies</h3>
-            <div className="dependencies-controls">
-              {selectedTag && (
-                <>
-                  <button
-                    className="btn btn-secondary btn-small"
-                    onClick={fetchEnsureFile}
-                    disabled={ensureFileLoading}
-                    title="View orchard.ensure file"
-                  >
-                    <svg width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2" style={{ marginRight: '6px' }}>
-                      <path d="M14 2H6a2 2 0 0 0-2 2v16a2 2 0 0 0 2 2h12a2 2 0 0 0 2-2V8z"></path>
-                      <polyline points="14 2 14 8 20 8"></polyline>
-                      <line x1="16" y1="13" x2="8" y2="13"></line>
-                      <line x1="16" y1="17" x2="8" y2="17"></line>
-                      <polyline points="10 9 9 9 8 9"></polyline>
-                    </svg>
-                    {ensureFileLoading ? 'Loading...' : 'View Ensure File'}
-                  </button>
-                  <button
-                    className="btn btn-secondary btn-small"
-                    onClick={() => setShowGraph(true)}
-                    title="View full dependency tree"
-                  >
-                    <svg width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2" style={{ marginRight: '6px' }}>
-                      <circle cx="12" cy="12" r="3"></circle>
-                      <circle cx="4" cy="4" r="2"></circle>
-                      <circle cx="20" cy="4" r="2"></circle>
-                      <circle cx="4" cy="20" r="2"></circle>
-                      <circle cx="20" cy="20" r="2"></circle>
-                      <line x1="9.5" y1="9.5" x2="5.5" y2="5.5"></line>
-                      <line x1="14.5" y1="9.5" x2="18.5" y2="5.5"></line>
-                      <line x1="9.5" y1="14.5" x2="5.5" y2="18.5"></line>
-                      <line x1="14.5" y1="14.5" x2="18.5" y2="18.5"></line>
-                    </svg>
-                    View Graph
-                  </button>
-                </>
-              )}
-            </div>
-          </div>
-          <div className="dependencies-tag-select">
-            {selectedTag && (
-              <select
-                className="tag-selector"
-                value={selectedTag.id}
-                onChange={(e) => {
-                  const tag = tags.find(t => t.id === e.target.value);
-                  if (tag) setSelectedTag(tag);
-                }}
-              >
-                {tags.map(t => (
-                  <option key={t.id} value={t.id}>
-                    {t.name}{t.version ? ` (${t.version})` : ''}
-                  </option>
-                ))}
-              </select>
-            )}
-          </div>
-
-          {depsLoading ? (
-            <div className="deps-loading">Loading dependencies...</div>
-          ) : depsError ? (
-            <div className="deps-error">{depsError}</div>
-          ) : dependencies.length === 0 ? (
-            <div className="deps-empty">
-              {selectedTag ? (
-                <span><strong>{selectedTag.name}</strong> has no dependencies</span>
-              ) : (
-                <span>No dependencies</span>
-              )}
-            </div>
-          ) : (
-            <div className="deps-list">
-              <div className="deps-summary">
-                <strong>{selectedTag?.name}</strong> has {dependencies.length} {dependencies.length === 1 ? 'dependency' : 'dependencies'}:
-              </div>
-              <ul className="deps-items">
-                {dependencies.map((dep) => (
-                  <li key={dep.id} className="dep-item">
-                    <Link
-                      to={`/project/${dep.project}/${dep.package}`}
-                      className="dep-link"
-                    >
-                      {dep.project}/{dep.package}
-                    </Link>
-                    <span className="dep-constraint">
-                      @ {dep.version || dep.tag}
-                    </span>
-                    <span className="dep-status dep-status--ok" title="Package exists">
-                      &#10003;
-                    </span>
-                  </li>
-                ))}
-              </ul>
-            </div>
-          )}
-        </div>
-      )}
-
      {/* Used By (Reverse Dependencies) Section */}
      <div className="used-by-section card">
        <h3>Used By</h3>
@@ -737,78 +746,6 @@ function PackagePage() {
        )}
      </div>

-      <div className="download-by-id-section card">
-        <h3>Download by Artifact ID</h3>
-        <div className="download-by-id-form">
-          <input
-            type="text"
-            value={artifactIdInput}
-            onChange={(e) => setArtifactIdInput(e.target.value.toLowerCase().replace(/[^a-f0-9]/g, '').slice(0, 64))}
-            placeholder="Enter SHA256 artifact ID (64 hex characters)"
-            className="artifact-id-input"
-          />
-          <a
-            href={artifactIdInput.length === 64 ? getDownloadUrl(projectName!, packageName!, `artifact:${artifactIdInput}`) : '#'}
-            className={`btn btn-primary ${artifactIdInput.length !== 64 ? 'btn-disabled' : ''}`}
-            download
-            onClick={(e) => {
-              if (artifactIdInput.length !== 64) {
-                e.preventDefault();
-              }
-            }}
-          >
-            Download
-          </a>
-        </div>
-        {artifactIdInput.length > 0 && artifactIdInput.length !== 64 && (
-          <p className="validation-hint">Artifact ID must be exactly 64 hex characters ({artifactIdInput.length}/64)</p>
-        )}
-      </div>
-
-      {user && canWrite && (
-        <div className="create-tag-section card">
-          <h3>Create / Update Tag</h3>
-          <p className="section-description">Point a tag at any existing artifact by its ID</p>
-          <form onSubmit={handleCreateTag} className="create-tag-form">
-            <div className="form-row">
-              <div className="form-group">
-                <label htmlFor="create-tag-name">Tag Name</label>
-                <input
-                  id="create-tag-name"
-                  type="text"
-                  value={createTagName}
-                  onChange={(e) => setCreateTagName(e.target.value)}
-                  placeholder="latest, stable, v1.0.0..."
-                  disabled={createTagLoading}
-                />
-              </div>
-              <div className="form-group form-group--wide">
-                <label htmlFor="create-tag-artifact">Artifact ID</label>
-                <input
-                  id="create-tag-artifact"
-                  type="text"
-                  value={createTagArtifactId}
-                  onChange={(e) => setCreateTagArtifactId(e.target.value.toLowerCase().replace(/[^a-f0-9]/g, '').slice(0, 64))}
-                  placeholder="SHA256 hash (64 hex characters)"
-                  className="artifact-id-input"
-                  disabled={createTagLoading}
-                />
-              </div>
-              <button
-                type="submit"
-                className="btn btn-primary"
-                disabled={createTagLoading || !createTagName.trim() || createTagArtifactId.length !== 64}
-              >
-                {createTagLoading ? 'Creating...' : 'Create Tag'}
-              </button>
-            </div>
-            {createTagArtifactId.length > 0 && createTagArtifactId.length !== 64 && (
-              <p className="validation-hint">Artifact ID must be exactly 64 hex characters ({createTagArtifactId.length}/64)</p>
-            )}
-          </form>
-        </div>
-      )}
-
      <div className="usage-section card">
        <h3>Usage</h3>
        <p>Download artifacts using:</p>
@@ -831,6 +768,118 @@ function PackagePage() {
        />
      )}

+      {/* Upload Modal */}
+      {showUploadModal && (
+        <div className="modal-overlay" onClick={() => setShowUploadModal(false)}>
+          <div className="upload-modal" onClick={(e) => e.stopPropagation()}>
+            <div className="modal-header">
+              <h3>Upload Artifact</h3>
+              <button
+                className="modal-close"
+                onClick={() => setShowUploadModal(false)}
+                title="Close"
+              >
+                <svg width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2">
+                  <line x1="18" y1="6" x2="6" y2="18"></line>
+                  <line x1="6" y1="6" x2="18" y2="18"></line>
+                </svg>
+              </button>
+            </div>
+            <div className="modal-body">
+              <div className="form-group">
+                <label htmlFor="upload-tag">Tag (optional)</label>
+                <input
+                  id="upload-tag"
+                  type="text"
+                  value={uploadTag}
+                  onChange={(e) => setUploadTag(e.target.value)}
+                  placeholder="v1.0.0, latest, stable..."
+                />
+              </div>
+              <DragDropUpload
+                projectName={projectName!}
+                packageName={packageName!}
+                tag={uploadTag || undefined}
+                onUploadComplete={(result) => {
+                  handleUploadComplete(result);
+                  setShowUploadModal(false);
+                  setUploadTag('');
+                }}
+                onUploadError={handleUploadError}
+              />
+            </div>
+          </div>
+        </div>
+      )}
+
+      {/* Create/Update Tag Modal */}
+      {showCreateTagModal && (
+        <div className="modal-overlay" onClick={() => setShowCreateTagModal(false)}>
+          <div className="create-tag-modal" onClick={(e) => e.stopPropagation()}>
+            <div className="modal-header">
+              <h3>Create / Update Tag</h3>
+              <button
+                className="modal-close"
+                onClick={() => { setShowCreateTagModal(false); setCreateTagName(''); setCreateTagArtifactId(''); }}
+                title="Close"
+              >
+                <svg width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2">
+                  <line x1="18" y1="6" x2="6" y2="18"></line>
+                  <line x1="6" y1="6" x2="18" y2="18"></line>
+                </svg>
+              </button>
+            </div>
+            <div className="modal-body">
+              <p className="modal-description">Point a tag at an artifact by its ID</p>
+              <form onSubmit={(e) => { handleCreateTag(e); setShowCreateTagModal(false); }}>
+                <div className="form-group">
+                  <label htmlFor="modal-tag-name">Tag Name</label>
+                  <input
+                    id="modal-tag-name"
+                    type="text"
+                    value={createTagName}
+                    onChange={(e) => setCreateTagName(e.target.value)}
+                    placeholder="latest, stable, v1.0.0..."
+                    disabled={createTagLoading}
+                  />
+                </div>
+                <div className="form-group">
+                  <label htmlFor="modal-artifact-id">Artifact ID</label>
+                  <input
+                    id="modal-artifact-id"
+                    type="text"
+                    value={createTagArtifactId}
+                    onChange={(e) => setCreateTagArtifactId(e.target.value.toLowerCase().replace(/[^a-f0-9]/g, '').slice(0, 64))}
+                    placeholder="SHA256 hash (64 hex characters)"
+                    className="artifact-id-input"
+                    disabled={createTagLoading}
+                  />
+                  {createTagArtifactId.length > 0 && createTagArtifactId.length !== 64 && (
+                    <p className="validation-hint">{createTagArtifactId.length}/64 characters</p>
+                  )}
+                </div>
+                <div className="modal-actions">
+                  <button
+                    type="button"
+                    className="btn btn-secondary"
+                    onClick={() => { setShowCreateTagModal(false); setCreateTagName(''); setCreateTagArtifactId(''); }}
+                  >
+                    Cancel
+                  </button>
+                  <button
+                    type="submit"
+                    className="btn btn-primary"
+                    disabled={createTagLoading || !createTagName.trim() || createTagArtifactId.length !== 64}
+                  >
+                    {createTagLoading ? 'Creating...' : 'Create Tag'}
+                  </button>
+                </div>
+              </form>
+            </div>
+          </div>
+        </div>
+      )}
+
      {/* Ensure File Modal */}
      {showEnsureFile && (
        <div className="modal-overlay" onClick={() => setShowEnsureFile(false)}>
@@ -872,6 +921,107 @@ function PackagePage() {
          </div>
        </div>
      )}
+
+      {/* Dependencies Modal */}
+      {showDepsModal && selectedTag && (
+        <div className="modal-overlay" onClick={() => setShowDepsModal(false)}>
+          <div className="deps-modal" onClick={(e) => e.stopPropagation()}>
+            <div className="modal-header">
+              <h3>Dependencies for {selectedTag.version || selectedTag.name}</h3>
+              <button
+                className="modal-close"
+                onClick={() => setShowDepsModal(false)}
+                title="Close"
+              >
+                <svg width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2">
+                  <line x1="18" y1="6" x2="6" y2="18"></line>
+                  <line x1="6" y1="6" x2="18" y2="18"></line>
+                </svg>
+              </button>
+            </div>
+            <div className="modal-body">
+              <div className="deps-modal-controls">
+                <button
+                  className="btn btn-secondary btn-small"
+                  onClick={fetchEnsureFile}
+                  disabled={ensureFileLoading}
+                >
+                  View Ensure File
+                </button>
+                <button
+                  className="btn btn-secondary btn-small"
+                  onClick={() => { setShowDepsModal(false); setShowGraph(true); }}
+                >
+                  View Graph
+                </button>
+              </div>
+              {depsLoading ? (
+                <div className="deps-loading">Loading dependencies...</div>
+              ) : depsError ? (
+                <div className="deps-error">{depsError}</div>
+              ) : dependencies.length === 0 ? (
+                <div className="deps-empty">No dependencies</div>
+              ) : (
+                <div className="deps-list">
+                  <div className="deps-summary">
+                    {dependencies.length} {dependencies.length === 1 ? 'dependency' : 'dependencies'}:
+                  </div>
+                  <ul className="deps-items">
+                    {dependencies.map((dep) => (
+                      <li key={dep.id} className="dep-item">
+                        <Link
+                          to={`/project/${dep.project}/${dep.package}`}
+                          className="dep-link"
+                          onClick={() => setShowDepsModal(false)}
+                        >
+                          {dep.project}/{dep.package}
+                        </Link>
+                        <span className="dep-constraint">
+                          @ {dep.version || dep.tag}
+                        </span>
+                        <span className="dep-status dep-status--ok" title="Package exists">
+                          &#10003;
+                        </span>
+                      </li>
+                    ))}
+                  </ul>
+                </div>
+              )}
+            </div>
+          </div>
+        </div>
+      )}
+
+      {/* Artifact ID Modal */}
+      {showArtifactIdModal && viewArtifactId && (
+        <div className="modal-overlay" onClick={() => setShowArtifactIdModal(false)}>
+          <div className="artifact-id-modal" onClick={(e) => e.stopPropagation()}>
+            <div className="modal-header">
+              <h3>Artifact ID</h3>
+              <button
+                className="modal-close"
+                onClick={() => setShowArtifactIdModal(false)}
+                title="Close"
+              >
+                <svg width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2">
+                  <line x1="18" y1="6" x2="6" y2="18"></line>
+                  <line x1="6" y1="6" x2="18" y2="18"></line>
+                </svg>
+              </button>
+            </div>
+            <div className="modal-body">
+              <p className="modal-description">SHA256 hash identifying this artifact:</p>
+              <div className="artifact-id-display">
+                <code>{viewArtifactId}</code>
+                <CopyButton text={viewArtifactId} />
+              </div>
+            </div>
+          </div>
+        </div>
+      )}
+
+      {/* Action Menu Dropdown */}
+      {renderActionMenu()}
    </div>
  );
 }
--- a/frontend/src/pages/ProjectPage.tsx
+++ b/frontend/src/pages/ProjectPage.tsx
@@ -195,6 +195,9 @@ function ProjectPage() {
            <Badge variant={project.is_public ? 'public' : 'private'}>
              {project.is_public ? 'Public' : 'Private'}
            </Badge>
+            {project.is_system && (
+              <Badge variant="warning">System Cache</Badge>
+            )}
            {accessLevel && (
              <Badge variant={accessLevel === 'admin' ? 'success' : accessLevel === 'write' ? 'info' : 'default'}>
                {isOwner ? 'Owner' : accessLevel.charAt(0).toUpperCase() + accessLevel.slice(1)}
@@ -211,7 +214,7 @@ function ProjectPage() {
          </div>
        </div>
        <div className="page-header__actions">
-          {canAdmin && !project.team_id && (
+          {canAdmin && !project.team_id && !project.is_system && (
            <button
              className="btn btn-secondary"
              onClick={() => navigate(`/project/${projectName}/settings`)}
@@ -224,11 +227,11 @@ function ProjectPage() {
              Settings
            </button>
          )}
-          {canWrite ? (
+          {canWrite && !project.is_system ? (
            <button className="btn btn-primary" onClick={() => setShowForm(!showForm)}>
              {showForm ? 'Cancel' : '+ New Package'}
            </button>
-          ) : user ? (
+          ) : user && !project.is_system ? (
            <span className="text-muted" title="You have read-only access to this project">
              Read-only access
            </span>
@@ -291,18 +294,20 @@ function ProjectPage() {
          placeholder="Filter packages..."
          className="list-controls__search"
        />
-        <select
-          className="list-controls__select"
-          value={format}
-          onChange={(e) => handleFormatChange(e.target.value)}
-        >
-          <option value="">All formats</option>
-          {FORMAT_OPTIONS.map((f) => (
-            <option key={f} value={f}>
-              {f}
-            </option>
-          ))}
-        </select>
+        {!project?.is_system && (
+          <select
+            className="list-controls__select"
+            value={format}
+            onChange={(e) => handleFormatChange(e.target.value)}
+          >
+            <option value="">All formats</option>
+            {FORMAT_OPTIONS.map((f) => (
+              <option key={f} value={f}>
+                {f}
+              </option>
+            ))}
+          </select>
+        )}
      </div>

      {hasActiveFilters && (
@@ -338,19 +343,19 @@ function ProjectPage() {
              className: 'cell-description',
              render: (pkg) => pkg.description || '—',
            },
-            {
+            ...(!project?.is_system ? [{
              key: 'format',
              header: 'Format',
-              render: (pkg) => <Badge variant="default">{pkg.format}</Badge>,
-            },
-            {
+              render: (pkg: Package) => <Badge variant="default">{pkg.format}</Badge>,
+            }] : []),
+            ...(!project?.is_system ? [{
              key: 'tag_count',
              header: 'Tags',
-              render: (pkg) => pkg.tag_count ?? '—',
-            },
+              render: (pkg: Package) => pkg.tag_count ?? '—',
+            }] : []),
            {
              key: 'artifact_count',
-              header: 'Artifacts',
+              header: project?.is_system ? 'Versions' : 'Artifacts',
              render: (pkg) => pkg.artifact_count ?? '—',
            },
            {
@@ -359,12 +364,12 @@ function ProjectPage() {
              render: (pkg) =>
                pkg.total_size !== undefined && pkg.total_size > 0 ? formatBytes(pkg.total_size) : '—',
            },
-            {
+            ...(!project?.is_system ? [{
              key: 'latest_tag',
              header: 'Latest',
-              render: (pkg) =>
+              render: (pkg: Package) =>
                pkg.latest_tag ? <strong style={{ color: 'var(--accent-primary)' }}>{pkg.latest_tag}</strong> : '—',
-            },
+            }] : []),
            {
              key: 'created_at',
              header: 'Created',
--- a/frontend/src/types.ts
+++ b/frontend/src/types.ts
@@ -6,6 +6,7 @@ export interface Project {
  name: string;
  description: string | null;
  is_public: boolean;
+  is_system?: boolean;  // True for system cache projects (_npm, _pypi, etc.)
  created_at: string;
  updated_at: string;
  created_by: string;
@@ -503,3 +504,56 @@ export interface TeamMemberCreate {
 export interface TeamMemberUpdate {
  role: TeamRole;
 }
+
+// Upstream Source types
+export type SourceType = 'npm' | 'pypi' | 'maven' | 'docker' | 'helm' | 'nuget' | 'deb' | 'rpm' | 'generic';
+export type AuthType = 'none' | 'basic' | 'bearer' | 'api_key';
+
+export interface UpstreamSource {
+  id: string;
+  name: string;
+  source_type: SourceType;
+  url: string;
+  enabled: boolean;
+  auth_type: AuthType;
+  username: string | null;
+  has_password: boolean;
+  has_headers: boolean;
+  priority: number;
+  source: 'database' | 'env';
+  created_at: string | null;
+  updated_at: string | null;
+}
+
+export interface UpstreamSourceCreate {
+  name: string;
+  source_type: SourceType;
+  url: string;
+  enabled?: boolean;
+  auth_type?: AuthType;
+  username?: string;
+  password?: string;
+  headers?: Record<string, string>;
+  priority?: number;
+}
+
+export interface UpstreamSourceUpdate {
+  name?: string;
+  source_type?: SourceType;
+  url?: string;
+  enabled?: boolean;
+  auth_type?: AuthType;
+  username?: string;
+  password?: string;
+  headers?: Record<string, string> | null;
+  priority?: number;
+}
+
+export interface UpstreamSourceTestResult {
+  success: boolean;
+  status_code: number | null;
+  elapsed_ms: number;
+  error: string | null;
+  source_id: string;
+  source_name: string;
+}
--- a/helm/orchard/templates/deployment.yaml
+++ b/helm/orchard/templates/deployment.yaml
@@ -128,6 +128,10 @@ spec:
              value: {{ .Values.orchard.rateLimit.login | quote }}
            {{- end }}
            {{- end }}
+            {{- if .Values.orchard.purgeSeedData }}
+            - name: ORCHARD_PURGE_SEED_DATA
+              value: "true"
+            {{- end }}
            {{- if .Values.orchard.database.poolSize }}
            - name: ORCHARD_DATABASE_POOL_SIZE
              value: {{ .Values.orchard.database.poolSize | quote }}
--- a/helm/orchard/values-stage.yaml
+++ b/helm/orchard/values-stage.yaml
@@ -91,6 +91,7 @@ affinity: {}
 # Orchard server configuration
 orchard:
  env: "development"  # Allows seed data for testing
+  purgeSeedData: true  # Remove public seed data (npm-public, pypi-public, etc.)
  server:
    host: "0.0.0.0"
    port: 8080
--- a/migrations/010_upstream_caching.sql
+++ b/migrations/010_upstream_caching.sql
@@ -0,0 +1,137 @@
+-- Migration 010: Upstream Artifact Caching
+-- Adds support for caching artifacts from upstream registries (npm, PyPI, Maven, etc.)
+-- Part of "The cache that never forgets" epic for hermetic builds
+
+-- =============================================================================
+-- upstream_sources: Configure upstream registries for artifact caching
+-- =============================================================================
+CREATE TABLE IF NOT EXISTS upstream_sources (
+    id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
+    name VARCHAR(255) NOT NULL UNIQUE,
+    source_type VARCHAR(50) NOT NULL DEFAULT 'generic',
+    url VARCHAR(2048) NOT NULL,
+    enabled BOOLEAN NOT NULL DEFAULT FALSE,
+    is_public BOOLEAN NOT NULL DEFAULT TRUE,
+    auth_type VARCHAR(20) NOT NULL DEFAULT 'none',
+    username VARCHAR(255),
+    password_encrypted BYTEA,
+    headers_encrypted BYTEA,
+    priority INTEGER NOT NULL DEFAULT 100,
+    created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
+    updated_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
+
+    -- Source type must be one of the supported types
+    CONSTRAINT check_source_type CHECK (
+        source_type IN ('npm', 'pypi', 'maven', 'docker', 'helm', 'nuget', 'deb', 'rpm', 'generic')
+    ),
+
+    -- Auth type must be valid
+    CONSTRAINT check_auth_type CHECK (
+        auth_type IN ('none', 'basic', 'bearer', 'api_key')
+    ),
+
+    -- Priority must be positive
+    CONSTRAINT check_priority_positive CHECK (priority > 0)
+);
+
+-- Indexes for upstream_sources
+CREATE INDEX IF NOT EXISTS idx_upstream_sources_enabled ON upstream_sources(enabled);
+CREATE INDEX IF NOT EXISTS idx_upstream_sources_source_type ON upstream_sources(source_type);
+CREATE INDEX IF NOT EXISTS idx_upstream_sources_is_public ON upstream_sources(is_public);
+CREATE INDEX IF NOT EXISTS idx_upstream_sources_priority ON upstream_sources(priority);
+
+-- Comments for upstream_sources
+COMMENT ON TABLE upstream_sources IS 'Configuration for upstream artifact registries (npm, PyPI, Maven, etc.)';
+COMMENT ON COLUMN upstream_sources.name IS 'Unique human-readable name (e.g., npm-public, artifactory-private)';
+COMMENT ON COLUMN upstream_sources.source_type IS 'Type of registry: npm, pypi, maven, docker, helm, nuget, deb, rpm, generic';
+COMMENT ON COLUMN upstream_sources.url IS 'Base URL of the upstream registry';
+COMMENT ON COLUMN upstream_sources.enabled IS 'Whether this source is active for caching';
+COMMENT ON COLUMN upstream_sources.is_public IS 'True if this is a public internet source (for air-gap mode)';
+COMMENT ON COLUMN upstream_sources.auth_type IS 'Authentication type: none, basic, bearer, api_key';
+COMMENT ON COLUMN upstream_sources.username IS 'Username for basic auth';
+COMMENT ON COLUMN upstream_sources.password_encrypted IS 'Fernet-encrypted password/token';
+COMMENT ON COLUMN upstream_sources.headers_encrypted IS 'Fernet-encrypted custom headers (JSON)';
+COMMENT ON COLUMN upstream_sources.priority IS 'Priority for source selection (lower = higher priority)';
+
+-- =============================================================================
+-- cache_settings: Global cache configuration (singleton table)
+-- =============================================================================
+CREATE TABLE IF NOT EXISTS cache_settings (
+    id INTEGER PRIMARY KEY DEFAULT 1,
+    allow_public_internet BOOLEAN NOT NULL DEFAULT TRUE,
+    auto_create_system_projects BOOLEAN NOT NULL DEFAULT TRUE,
+    created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
+    updated_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
+
+    -- Singleton constraint
+    CONSTRAINT check_cache_settings_singleton CHECK (id = 1)
+);
+
+-- Insert default row
+INSERT INTO cache_settings (id, allow_public_internet, auto_create_system_projects)
+VALUES (1, TRUE, TRUE)
+ON CONFLICT (id) DO NOTHING;
+
+-- Comments for cache_settings
+COMMENT ON TABLE cache_settings IS 'Global cache settings (singleton table)';
+COMMENT ON COLUMN cache_settings.allow_public_internet IS 'Air-gap mode: when false, blocks all public internet sources';
+COMMENT ON COLUMN cache_settings.auto_create_system_projects IS 'Auto-create system projects (_npm, _pypi, etc.) on first cache';
+
+-- =============================================================================
+-- cached_urls: Track URL to artifact mappings for provenance
+-- =============================================================================
+CREATE TABLE IF NOT EXISTS cached_urls (
+    id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
+    url VARCHAR(4096) NOT NULL,
+    url_hash VARCHAR(64) NOT NULL,
+    artifact_id VARCHAR(64) NOT NULL REFERENCES artifacts(id),
+    source_id UUID REFERENCES upstream_sources(id) ON DELETE SET NULL,
+    fetched_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT CURRENT_TIMESTAMP,
+    response_headers JSONB DEFAULT '{}',
+    created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
+
+    -- URL hash must be unique (same URL = same cached artifact)
+    CONSTRAINT unique_url_hash UNIQUE (url_hash)
+);
+
+-- Indexes for cached_urls
+CREATE INDEX IF NOT EXISTS idx_cached_urls_url_hash ON cached_urls(url_hash);
+CREATE INDEX IF NOT EXISTS idx_cached_urls_artifact_id ON cached_urls(artifact_id);
+CREATE INDEX IF NOT EXISTS idx_cached_urls_source_id ON cached_urls(source_id);
+CREATE INDEX IF NOT EXISTS idx_cached_urls_fetched_at ON cached_urls(fetched_at);
+
+-- Comments for cached_urls
+COMMENT ON TABLE cached_urls IS 'Tracks which URLs have been cached and maps to artifacts';
+COMMENT ON COLUMN cached_urls.url IS 'Original URL that was fetched';
+COMMENT ON COLUMN cached_urls.url_hash IS 'SHA256 hash of URL for fast lookup';
+COMMENT ON COLUMN cached_urls.artifact_id IS 'The cached artifact (by SHA256 content hash)';
+COMMENT ON COLUMN cached_urls.source_id IS 'Which upstream source provided this (null if manual)';
+COMMENT ON COLUMN cached_urls.fetched_at IS 'When the URL was fetched from upstream';
+COMMENT ON COLUMN cached_urls.response_headers IS 'Original response headers from upstream (for debugging)';
+
+-- =============================================================================
+-- Add is_system column to projects table for system cache projects
+-- =============================================================================
+DO $$
+BEGIN
+    IF NOT EXISTS (
+        SELECT 1 FROM information_schema.columns
+        WHERE table_name = 'projects' AND column_name = 'is_system'
+    ) THEN
+        ALTER TABLE projects ADD COLUMN is_system BOOLEAN NOT NULL DEFAULT FALSE;
+        CREATE INDEX IF NOT EXISTS idx_projects_is_system ON projects(is_system);
+    END IF;
+END $$;
+
+COMMENT ON COLUMN projects.is_system IS 'True for system cache projects (_npm, _pypi, etc.)';
+
+-- =============================================================================
+-- Seed default upstream sources (disabled by default for safety)
+-- =============================================================================
+INSERT INTO upstream_sources (id, name, source_type, url, enabled, is_public, auth_type, priority)
+VALUES
+    (gen_random_uuid(), 'npm-public', 'npm', 'https://registry.npmjs.org', FALSE, TRUE, 'none', 100),
+    (gen_random_uuid(), 'pypi-public', 'pypi', 'https://pypi.org/simple', FALSE, TRUE, 'none', 100),
+    (gen_random_uuid(), 'maven-central', 'maven', 'https://repo1.maven.org/maven2', FALSE, TRUE, 'none', 100),
+    (gen_random_uuid(), 'docker-hub', 'docker', 'https://registry-1.docker.io', FALSE, TRUE, 'none', 100)
+ON CONFLICT (name) DO NOTHING;
--- a/migrations/011_pypi_cache_tasks.sql
+++ b/migrations/011_pypi_cache_tasks.sql
@@ -0,0 +1,55 @@
+-- Migration: 011_pypi_cache_tasks
+-- Description: Add table for tracking PyPI dependency caching tasks
+-- Date: 2026-02-02
+
+-- Table for tracking PyPI cache tasks with retry support
+CREATE TABLE pypi_cache_tasks (
+    id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
+
+    -- What to cache
+    package_name VARCHAR(255) NOT NULL,
+    version_constraint VARCHAR(255),
+
+    -- Origin tracking
+    parent_task_id UUID REFERENCES pypi_cache_tasks(id) ON DELETE SET NULL,
+    depth INTEGER NOT NULL DEFAULT 0,
+    triggered_by_artifact VARCHAR(64) REFERENCES artifacts(id) ON DELETE SET NULL,
+
+    -- Status
+    status VARCHAR(20) NOT NULL DEFAULT 'pending',
+    attempts INTEGER NOT NULL DEFAULT 0,
+    max_attempts INTEGER NOT NULL DEFAULT 3,
+
+    -- Results
+    cached_artifact_id VARCHAR(64) REFERENCES artifacts(id) ON DELETE SET NULL,
+    error_message TEXT,
+
+    -- Timing
+    created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW(),
+    started_at TIMESTAMP WITH TIME ZONE,
+    completed_at TIMESTAMP WITH TIME ZONE,
+    next_retry_at TIMESTAMP WITH TIME ZONE,
+
+    -- Constraints
+    CONSTRAINT check_task_status CHECK (status IN ('pending', 'in_progress', 'completed', 'failed')),
+    CONSTRAINT check_depth_non_negative CHECK (depth >= 0),
+    CONSTRAINT check_attempts_non_negative CHECK (attempts >= 0)
+);
+
+-- Index for finding tasks ready to process (pending with retry time passed)
+CREATE INDEX idx_pypi_cache_tasks_status_retry ON pypi_cache_tasks(status, next_retry_at);
+
+-- Index for deduplication check (is this package already queued?)
+CREATE INDEX idx_pypi_cache_tasks_package_status ON pypi_cache_tasks(package_name, status);
+
+-- Index for tracing dependency chains
+CREATE INDEX idx_pypi_cache_tasks_parent ON pypi_cache_tasks(parent_task_id);
+
+-- Index for finding tasks by artifact that triggered them
+CREATE INDEX idx_pypi_cache_tasks_triggered_by ON pypi_cache_tasks(triggered_by_artifact);
+
+-- Index for finding tasks by cached artifact
+CREATE INDEX idx_pypi_cache_tasks_cached_artifact ON pypi_cache_tasks(cached_artifact_id);
+
+-- Index for sorting by depth and creation time (processing order)
+CREATE INDEX idx_pypi_cache_tasks_depth_created ON pypi_cache_tasks(depth, created_at);
Author	SHA1	Message	Date
Mondo Diaz	d274f3f375	Add robust PyPI dependency caching with task queue Replace unbounded thread spawning with managed worker pool: - New pypi_cache_tasks table tracks caching jobs - Thread pool with 5 workers (configurable via ORCHARD_PYPI_CACHE_WORKERS) - Automatic retries with exponential backoff (30s, 60s, then fail) - Deduplication to prevent duplicate caching attempts New API endpoints for visibility and control: - GET /pypi/cache/status - queue health summary - GET /pypi/cache/failed - list failed tasks with errors - POST /pypi/cache/retry/{package} - retry single package - POST /pypi/cache/retry-all - retry all failed packages This fixes silent failures in background dependency caching where packages would fail to cache without any tracking or retry mechanism.	2026-02-02 11:16:02 -06:00
Mondo Diaz	490b05438d	Add design doc for PyPI cache robustness improvements	2026-02-02 11:06:51 -06:00
Mondo Diaz	3c2ab70ef0	Fix proactive dependency caching HTTPS redirect issue When background threads fetch from our own proxy using the request's base_url, it returns http:// but ingress requires https://. The 308 redirect was dropping trailing slashes, causing requests to hit the frontend catch-all route instead of /pypi/simple/. Force HTTPS explicitly in the background caching function to avoid the redirect entirely.	2026-01-30 18:59:31 -06:00
Mondo Diaz	109a593f83	Add debug logging for proactive caching regex failures	2026-01-30 18:43:09 -06:00
Mondo Diaz	1d727b3f8c	Fix proactive caching regex to match both hyphens and underscores PEP 503 normalizes package names to use hyphens, but wheel filenames may use underscores (e.g., typing_extensions-4.0.0-py3-none-any.whl). Convert the search pattern to match either separator.	2026-01-30 18:25:30 -06:00
Mondo Diaz	47aa0afe91	Fix proactive caching failing on HTTP->HTTPS redirects The background dependency caching was getting 308 redirects because request.base_url returns http:// but the ingress redirects to https://. Enable follow_redirects=True in httpx client to handle this.	2026-01-30 18:11:08 -06:00
Mondo Diaz	f992fc540e	Add proactive dependency caching for PyPI packages When a PyPI package is cached, its dependencies are now automatically fetched in background threads. This ensures the entire dependency tree is cached even if pip already has some packages installed locally. Features: - Background threads fetch each dependency without blocking the response - Uses our own proxy endpoint to cache, which recursively caches transitive deps - Max depth of 10 to prevent infinite loops - Daemon threads so they don't block process shutdown	2026-01-30 17:45:30 -06:00
Mondo Diaz	044a6c1d27	Fix duplicate dependency constraint causing 500 errors - Deduplicate dependencies by package name before inserting - Some packages (like anyio) list the same dep (trio) multiple times with different version constraints for different extras - The unique constraint on (artifact_id, project, package) rejected these - Also removed debug logging from dependencies.py	2026-01-30 17:43:49 -06:00
Mondo Diaz	62c77dc16d	Add detailed debug logging to _resolve_dependency_to_artifact	2026-01-30 17:29:19 -06:00
Mondo Diaz	7c05360eed	Add debug logging to resolve_dependencies	2026-01-30 17:21:04 -06:00
Mondo Diaz	76878279e9	Add backfill script for PyPI package dependencies Script extracts Requires-Dist metadata from cached PyPI packages and stores them in artifact_dependencies table. Usage: docker exec <container> python -m backend.scripts.backfill_pypi_dependencies docker exec <container> python -m backend.scripts.backfill_pypi_dependencies --dry-run	2026-01-30 15:38:45 -06:00
Mondo Diaz	e1b01abf9b	Add PEP 440 version constraint matching for dependency resolution - Parse version constraints like >=1.9, <2.0 using packaging library - Find the latest version that satisfies the constraint - Support wildcard (*) to get latest version - Fall back to exact version and tag matching	2026-01-30 15:34:19 -06:00
Mondo Diaz	d07936b666	Fix ensure file modal z-index when opened from deps modal	2026-01-30 15:32:06 -06:00
Mondo Diaz	47b3eb439d	Extract and store dependencies from PyPI packages - Add functions to parse Requires-Dist metadata from wheel and sdist files - Store extracted dependencies in artifact_dependencies table - Fix streaming response for cached artifacts (proper tuple unpacking) - Fix version uniqueness check to use version string instead of artifact_id - Skip creating versions for .metadata files	2026-01-30 15:14:52 -06:00
Mondo Diaz	c5f75e4fd6	Add is_system to all ProjectResponse constructions in routes	2026-01-30 13:34:26 -06:00
Mondo Diaz	ff31379649	Fix: ensure existing _pypi project gets is_system=true	2026-01-30 13:33:31 -06:00
Mondo Diaz	424b1e5770	Add is_system field to ProjectResponse schema	2026-01-30 13:11:11 -06:00
Mondo Diaz	7b5b0c78d8	Hide Tags and Latest columns for system projects in package table	2026-01-30 12:55:28 -06:00
Mondo Diaz	924826f07a	Improve system project UX and make dependencies a modal - Hide tag count stat for system projects (show "versions" instead of "artifacts") - Hide "Latest" tag stat for system projects - Change "Create/Update Tag" to only show for non-system projects - Add "View Artifact ID" menu option with modal showing the SHA256 hash - Move dependencies section to a modal (opened via "View Dependencies" menu) - Add deps-modal and artifact-id-modal CSS styles	2026-01-30 12:36:40 -06:00
Mondo Diaz	fe6c6c52d2	Fix PyPI proxy UX and package stats calculation - Fix artifact_count and total_size calculation to use Tags instead of Uploads, so PyPI cached packages show their stats correctly - Fix PackagePage dropdown menu positioning (use fixed position with backdrop) - Add system project detection for projects starting with "_" - Show Version as primary column for system projects, hide Tag column - Hide upload button for system projects (they're cache-only) - Rename section header to "Versions" for system projects - Fix test_projects_sort_by_name to exclude system projects from sort comparison	2026-01-30 12:16:05 -06:00
Mondo Diaz	701e11ce83	Hide format filter and column for system projects System projects like _pypi only contain packages of one format, so the format filter dropdown and column are redundant.	2026-01-30 11:55:09 -06:00
Mondo Diaz	ff9e02606e	Hide Settings and New Package buttons for system projects System projects should be system-controlled only. Users should not be able to create packages or change settings on system cache projects.	2026-01-30 11:54:02 -06:00
Mondo Diaz	f3afdd3bbf	Improve PyPI proxy and Package page UX PyPI proxy improvements: - Set package format to "pypi" instead of "generic" - Extract version from filename and create PackageVersion record - Support .whl, .tar.gz, and .zip filename formats Package page UX overhaul: - Move upload to header button with modal - Simplify table: combine Tag/Version, remove Type and Artifact ID columns - Add row action menu (⋯) with: Copy ID, Ensure File, Create Tag, Dependencies - Remove cluttered "Download by Artifact ID" and "Create/Update Tag" sections - Add modals for upload and create tag actions - Cleaner, more scannable table layout	2026-01-30 11:52:37 -06:00
Mondo Diaz	4b73196664	Show team name instead of individual user in Owner column Projects owned by teams now display the team name in the Owner column for better organizational continuity when team members change. Falls back to created_by if no team is assigned.	2026-01-30 11:25:01 -06:00
Mondo Diaz	7ef66745f1	Add "(coming soon)" label for unsupported upstream source types Only pypi and generic are currently supported. Other types now show "(coming soon)" in both the dropdown and the sources table.	2026-01-30 11:03:44 -06:00
Mondo Diaz	2dc7fe5a7b	Fix PyPI proxy: use correct storage method and make project public - Use storage.get_stream(s3_key) instead of non-existent get_artifact_stream() - Make _pypi project public (is_public=True) so cached packages are visible	2026-01-30 10:59:50 -06:00
Mondo Diaz	534e4b964f	Fix Project and Tag model fields in PyPI proxy Use correct model fields: - Project: is_public, is_system, created_by (not visibility) - Tag: add required created_by field	2026-01-30 10:29:25 -06:00
Mondo Diaz	757e43fc34	Fix Artifact model field names in PyPI proxy Use correct Artifact model fields: - original_name instead of filename - Add required created_by and s3_key fields - Include checksum fields from storage result	2026-01-30 09:58:15 -06:00
Mondo Diaz	d78092de55	Fix PyPI proxy to use correct storage.store() method The code was calling storage.store_artifact() which doesn't exist. Changed to use storage.store() which handles content-addressable storage with automatic deduplication.	2026-01-30 09:41:34 -06:00
Mondo Diaz	0fa991f536	Allow full path in PyPI upstream source URL Users can now configure the full path including /simple in their upstream source URL (e.g., https://example.com/api/pypi/repo/simple) instead of having the code append /simple/ automatically. This matches pip's --index-url format, making configuration more intuitive and copy/paste friendly.	2026-01-30 09:24:05 -06:00
Mondo Diaz	00fb2729e4	Fix test_rewrite_relative_links assertion to expect correct URL The test was checking for the wrong URL pattern. When urljoin resolves ../../packages/ab/cd/... relative to /api/pypi/pypi-remote/simple/requests/, it correctly produces /api/pypi/pypi-remote/packages/ab/cd/... (not /api/pypi/packages/...).	2026-01-30 08:51:30 -06:00
Mondo Diaz	8ae4d7a685	Improve PyPI proxy test assertions for all status codes Tests now verify the correct response for each scenario: - 200: HTML content-type - 404: "not found" error message - 503: "No PyPI upstream sources configured" error message	2026-01-29 19:35:20 -06:00
Mondo Diaz	4b887d1aad	Fix PyPI proxy tests to work with or without upstream sources - Tests now accept 200/404/503 responses since upstream sources may or may not be configured in the test environment - Added upstream_base_url parameter to _rewrite_package_links test - Added test for relative URL resolution (Artifactory-style URLs)	2026-01-29 19:34:33 -06:00
Mondo Diaz	4dc54ace8a	Fix HTTPS scheme detection behind reverse proxy When behind a reverse proxy that terminates SSL, the server sees HTTP requests internally. Added _get_base_url() helper that respects the X-Forwarded-Proto header to generate correct external HTTPS URLs. This fixes links in the PyPI simple index showing http:// instead of https:// when accessed via HTTPS through a load balancer.	2026-01-29 18:02:21 -06:00
Mondo Diaz	64bfd3902f	Fix relative URL handling in PyPI proxy Artifactory and other registries may return relative URLs in their Simple API responses (e.g., ../../packages/...). The proxy now resolves these to absolute URLs using urljoin() before encoding them in the upstream parameter. This fixes package downloads failing when the upstream registry uses relative URLs in its package index.	2026-01-29 18:01:19 -06:00
Mondo Diaz	bdfed77cb1	Remove dead code from pypi_proxy.py - Remove unused imports (UpstreamClient, UpstreamClientConfig, UpstreamHTTPError, UpstreamConnectionError, UpstreamTimeoutError) - Simplify matched_source selection logic, removing dead conditional that always evaluated to True due to 'or True'	2026-01-29 16:42:53 -06:00
Mondo Diaz	140f6c926a	Fix httpx.Timeout configuration in PyPI proxy httpx.Timeout requires either a default value or all four parameters. Changed to httpx.Timeout(default, connect=X) format.	2026-01-29 16:40:06 -06:00
Mondo Diaz	dcd405679a	Merge branch 'feature/transparent-proxy' into 'main' Add transparent PyPI proxy and improve upstream sources UI Closes #108 See merge request esv/bsf/bsf-integration/orchard/orchard-mvp!56	2026-01-29 16:12:57 -06:00
Mondo Diaz	97498b2f86	Add transparent PyPI proxy and improve upstream sources UI	2026-01-29 16:12:57 -06:00
Mondo Diaz	e8cf2462b7	Merge branch 'fix/upstream-caching-bugs-2' into 'main' Simplify cache management UI and improve test status display (#107) See merge request esv/bsf/bsf-integration/orchard/orchard-mvp!55	2026-01-29 14:25:19 -06:00
Mondo Diaz	038ad4ed1b	Simplify cache management UI and improve test status display (#107 )	2026-01-29 14:25:19 -06:00
Mondo Diaz	858b45d434	Merge branch 'fix/purge-seed-data-user-id' into 'main' Fix purge_seed_data type mismatch for access_permissions.user_id (#107) See merge request esv/bsf/bsf-integration/orchard/orchard-mvp!54	2026-01-29 13:48:21 -06:00
Mondo Diaz	95470b2bf6	Fix purge_seed_data type mismatch for access_permissions.user_id (#107 )	2026-01-29 13:48:21 -06:00
Mondo Diaz	c512d85f9e	Merge branch 'fix/upstream-caching-bugs' into 'main' Remove public internet features and fix upstream source UI (#107) See merge request esv/bsf/bsf-integration/orchard/orchard-mvp!53	2026-01-29 13:26:29 -06:00
Mondo Diaz	82f67539bd	Remove public internet features and fix upstream source UI (#107 )	2026-01-29 13:26:28 -06:00
Mondo Diaz	e93e7e7021	Merge branch 'feature/proxy-schema' into 'main' Add upstream caching infrastructure and refactor CI pipeline See merge request esv/bsf/bsf-integration/orchard/orchard-mvp!52	2026-01-29 11:55:15 -06:00
Mondo Diaz	1d51c856b0	Add upstream caching infrastructure and refactor CI pipeline	2026-01-29 11:55:15 -06:00
Mondo Diaz	c92895ffe9	Merge branch 'fix/migration-rollback' into 'main' Add rollback after failed migration to allow subsequent migrations to run See merge request esv/bsf/bsf-integration/orchard/orchard-mvp!51	2026-01-28 15:23:51 -06:00
Mondo Diaz	b147af43d2	Add rollback after failed migration to allow subsequent migrations to run	2026-01-28 15:23:51 -06:00