2 Commits

Author SHA1 Message Date
Mondo Diaz
ebf9926809 Fix YAML anchor ordering in CI pipeline 2026-01-29 11:37:57 -06:00
Mondo Diaz
a3a49ac9c3 Add upstream caching infrastructure and refactor CI pipeline
Upstream Caching (Epic #68-#75, #105):
- Add upstream_sources and cache_settings tables with migrations
- Add cache management API endpoints (CRUD for sources, settings)
- Add environment variable overrides for upstream sources and cache settings
- Add encryption module for storing credentials securely
- Add frontend Admin Cache Management page
- Add is_system field to projects for system cache distinction
- Add purge_seed_data for transitioning to production-like environments

CI Pipeline Refactoring:
- Remove reset jobs (reset_stage_pre, reset_stage)
- Add ephemeral orchard-test deployment for main branch testing
- Run integration tests on ephemeral deployment before promoting to stage
- Stage is now long-running pre-prod (smoke tests only)
- Disable prosper_setup for tag pipelines
2026-01-29 11:28:59 -06:00
31 changed files with 865 additions and 3835 deletions

View File

@@ -7,43 +7,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## [Unreleased]
### Added
- Added transparent PyPI proxy implementing PEP 503 Simple API (#108)
- `GET /pypi/simple/` - package index (proxied from upstream)
- `GET /pypi/simple/{package}/` - version list with rewritten download links
- `GET /pypi/simple/{package}/{filename}` - download with automatic caching
- Allows `pip install --index-url https://orchard.../pypi/simple/ <package>`
- Artifacts cached on first access through configured upstream sources
- Added `POST /api/v1/cache/resolve` endpoint to cache packages by coordinates instead of URL (#108)
### Changed
- Upstream sources table text is now centered under column headers (#108)
- ENV badge now appears inline with source name instead of separate column (#108)
- Test and Edit buttons now have more prominent button styling (#108)
- Reduced footer padding for cleaner layout (#108)
### Fixed
- Fixed purge_seed_data crash when deleting access permissions - was comparing UUID to VARCHAR column (#107)
### Changed
- Upstream source connectivity test no longer follows redirects, fixing "Exceeded maximum allowed redirects" error with Artifactory proxies (#107)
- Test runs automatically after saving a new or updated upstream source (#107)
- Test status now shows as colored dots (green=success, red=error) instead of text badges (#107)
- Clicking red dot shows error details in a modal (#107)
- Source name column no longer wraps text for better table layout (#107)
- Renamed "Cache Management" page to "Upstream Sources" (#107)
- Moved Delete button from table row to edit modal for cleaner table layout (#107)
### Removed
- Removed `is_public` field from upstream sources - all sources are now treated as internal/private (#107)
- Removed `allow_public_internet` (air-gap mode) setting from cache settings - not needed for enterprise proxy use case (#107)
- Removed seeding of public registry URLs (npm-public, pypi-public, maven-central, docker-hub) (#107)
- Removed "Public" badge and checkbox from upstream sources UI (#107)
- Removed "Allow Public Internet" toggle from cache settings UI (#107)
- Removed "Global Settings" section from cache management UI - auto-create system projects is always enabled (#107)
- Removed unused CacheSettings frontend types and API functions (#107)
### Added
- Added `ORCHARD_PURGE_SEED_DATA` environment variable support to stage helm values to remove seed data from long-running deployments (#107)
- Added frontend system projects visual distinction (#105)
- "Cache" badge for system projects in project list
- "System Cache" badge on project detail page

View File

@@ -61,14 +61,10 @@ class Settings(BaseSettings):
# Cache settings
cache_encryption_key: str = "" # Fernet key for encrypting upstream credentials (auto-generated if empty)
# Global cache settings override (None = use DB value, True/False = override DB)
# Global cache settings overrides (None = use DB value, True/False = override DB)
cache_allow_public_internet: Optional[bool] = None # Override allow_public_internet (air-gap mode)
cache_auto_create_system_projects: Optional[bool] = None # Override auto_create_system_projects
# PyPI Cache Worker settings
pypi_cache_workers: int = 5 # Number of concurrent cache workers
pypi_cache_max_depth: int = 10 # Maximum recursion depth for dependency caching
pypi_cache_max_attempts: int = 3 # Maximum retry attempts for failed cache tasks
# JWT Authentication settings (optional, for external identity providers)
jwt_enabled: bool = False # Enable JWT token validation
jwt_secret: str = "" # Secret key for HS256, or leave empty for RS256 with JWKS
@@ -93,24 +89,6 @@ class Settings(BaseSettings):
def is_production(self) -> bool:
return self.env.lower() == "production"
@property
def PORT(self) -> int:
"""Alias for server_port for compatibility."""
return self.server_port
# Uppercase aliases for PyPI cache settings (for backward compatibility)
@property
def PYPI_CACHE_WORKERS(self) -> int:
return self.pypi_cache_workers
@property
def PYPI_CACHE_MAX_DEPTH(self) -> int:
return self.pypi_cache_max_depth
@property
def PYPI_CACHE_MAX_ATTEMPTS(self) -> int:
return self.pypi_cache_max_attempts
class Config:
env_prefix = "ORCHARD_"
case_sensitive = False
@@ -130,6 +108,7 @@ class EnvUpstreamSource:
url: str,
source_type: str = "generic",
enabled: bool = True,
is_public: bool = True,
auth_type: str = "none",
username: Optional[str] = None,
password: Optional[str] = None,
@@ -139,6 +118,7 @@ class EnvUpstreamSource:
self.url = url
self.source_type = source_type
self.enabled = enabled
self.is_public = is_public
self.auth_type = auth_type
self.username = username
self.password = password
@@ -208,6 +188,7 @@ def parse_upstream_sources_from_env() -> list[EnvUpstreamSource]:
url=url,
source_type=data.get("TYPE", "generic").lower(),
enabled=parse_bool(data.get("ENABLED"), True),
is_public=parse_bool(data.get("IS_PUBLIC"), True),
auth_type=data.get("AUTH_TYPE", "none").lower(),
username=data.get("USERNAME"),
password=data.get("PASSWORD"),

View File

@@ -462,6 +462,7 @@ def _run_migrations():
source_type VARCHAR(50) NOT NULL DEFAULT 'generic',
url VARCHAR(2048) NOT NULL,
enabled BOOLEAN NOT NULL DEFAULT FALSE,
is_public BOOLEAN NOT NULL DEFAULT TRUE,
auth_type VARCHAR(20) NOT NULL DEFAULT 'none',
username VARCHAR(255),
password_encrypted BYTEA,
@@ -479,6 +480,7 @@ def _run_migrations():
);
CREATE INDEX IF NOT EXISTS idx_upstream_sources_enabled ON upstream_sources(enabled);
CREATE INDEX IF NOT EXISTS idx_upstream_sources_source_type ON upstream_sources(source_type);
CREATE INDEX IF NOT EXISTS idx_upstream_sources_is_public ON upstream_sources(is_public);
CREATE INDEX IF NOT EXISTS idx_upstream_sources_priority ON upstream_sources(priority);
""",
),
@@ -487,13 +489,14 @@ def _run_migrations():
sql="""
CREATE TABLE IF NOT EXISTS cache_settings (
id INTEGER PRIMARY KEY DEFAULT 1,
allow_public_internet BOOLEAN NOT NULL DEFAULT TRUE,
auto_create_system_projects BOOLEAN NOT NULL DEFAULT TRUE,
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
CONSTRAINT check_cache_settings_singleton CHECK (id = 1)
);
INSERT INTO cache_settings (id, auto_create_system_projects)
VALUES (1, TRUE)
INSERT INTO cache_settings (id, allow_public_internet, auto_create_system_projects)
VALUES (1, TRUE, TRUE)
ON CONFLICT (id) DO NOTHING;
""",
),
@@ -519,50 +522,13 @@ def _run_migrations():
Migration(
name="020_seed_default_upstream_sources",
sql="""
-- Originally seeded public sources, but these are no longer used.
-- Migration 023 deletes any previously seeded sources.
-- This migration is now a no-op for fresh installs.
SELECT 1;
""",
),
Migration(
name="021_remove_is_public_from_upstream_sources",
sql="""
DO $$
BEGIN
-- Drop the index if it exists
DROP INDEX IF EXISTS idx_upstream_sources_is_public;
-- Drop the column if it exists
IF EXISTS (
SELECT 1 FROM information_schema.columns
WHERE table_name = 'upstream_sources' AND column_name = 'is_public'
) THEN
ALTER TABLE upstream_sources DROP COLUMN is_public;
END IF;
END $$;
""",
),
Migration(
name="022_remove_allow_public_internet_from_cache_settings",
sql="""
DO $$
BEGIN
IF EXISTS (
SELECT 1 FROM information_schema.columns
WHERE table_name = 'cache_settings' AND column_name = 'allow_public_internet'
) THEN
ALTER TABLE cache_settings DROP COLUMN allow_public_internet;
END IF;
END $$;
""",
),
Migration(
name="023_delete_seeded_public_sources",
sql="""
-- Delete the seeded public sources that were added by migration 020
DELETE FROM upstream_sources
WHERE name IN ('npm-public', 'pypi-public', 'maven-central', 'docker-hub');
INSERT INTO upstream_sources (id, name, source_type, url, enabled, is_public, auth_type, priority)
VALUES
(gen_random_uuid(), 'npm-public', 'npm', 'https://registry.npmjs.org', FALSE, TRUE, 'none', 100),
(gen_random_uuid(), 'pypi-public', 'pypi', 'https://pypi.org/simple', FALSE, TRUE, 'none', 100),
(gen_random_uuid(), 'maven-central', 'maven', 'https://repo1.maven.org/maven2', FALSE, TRUE, 'none', 100),
(gen_random_uuid(), 'docker-hub', 'docker', 'https://registry-1.docker.io', FALSE, TRUE, 'none', 100)
ON CONFLICT (name) DO NOTHING;
""",
),
]

View File

@@ -10,20 +10,11 @@ Handles:
- Conflict detection
"""
import re
import yaml
from typing import List, Dict, Any, Optional, Set, Tuple
from sqlalchemy.orm import Session
from sqlalchemy import and_
# Import packaging for PEP 440 version matching
try:
from packaging.specifiers import SpecifierSet, InvalidSpecifier
from packaging.version import Version, InvalidVersion
HAS_PACKAGING = True
except ImportError:
HAS_PACKAGING = False
from .models import (
Project,
Package,
@@ -313,87 +304,6 @@ def get_reverse_dependencies(
)
def _is_version_constraint(version_str: str) -> bool:
"""Check if a version string contains constraint operators."""
if not version_str:
return False
# Check for common constraint operators
return any(op in version_str for op in ['>=', '<=', '!=', '~=', '>', '<', '==', '*'])
def _resolve_version_constraint(
db: Session,
package: Package,
constraint: str,
) -> Optional[Tuple[str, str, int]]:
"""
Resolve a version constraint (e.g., '>=1.9') to a specific version.
Uses PEP 440 version matching to find the best matching version.
Args:
db: Database session
package: Package to search versions in
constraint: Version constraint string (e.g., '>=1.9', '<2.0,>=1.5')
Returns:
Tuple of (artifact_id, resolved_version, size) or None if not found
"""
if not HAS_PACKAGING:
# Fallback: if packaging not available, can't do constraint matching
return None
# Handle wildcard - return latest version
if constraint == '*':
# Get the latest version by created_at
latest = db.query(PackageVersion).filter(
PackageVersion.package_id == package.id,
).order_by(PackageVersion.created_at.desc()).first()
if latest:
artifact = db.query(Artifact).filter(Artifact.id == latest.artifact_id).first()
if artifact:
return (artifact.id, latest.version, artifact.size)
return None
try:
specifier = SpecifierSet(constraint)
except InvalidSpecifier:
# Invalid constraint, try as exact version
return None
# Get all versions for this package
all_versions = db.query(PackageVersion).filter(
PackageVersion.package_id == package.id,
).all()
if not all_versions:
return None
# Find matching versions
matching = []
for pv in all_versions:
try:
v = Version(pv.version)
if v in specifier:
matching.append((pv, v))
except InvalidVersion:
# Skip invalid versions
continue
if not matching:
return None
# Sort by version (descending) and return the latest matching
matching.sort(key=lambda x: x[1], reverse=True)
best_match = matching[0][0]
artifact = db.query(Artifact).filter(Artifact.id == best_match.artifact_id).first()
if artifact:
return (artifact.id, best_match.version, artifact.size)
return None
def _resolve_dependency_to_artifact(
db: Session,
project_name: str,
@@ -404,17 +314,11 @@ def _resolve_dependency_to_artifact(
"""
Resolve a dependency constraint to an artifact ID.
Supports:
- Exact version matching (e.g., '1.2.3')
- Version constraints (e.g., '>=1.9', '<2.0,>=1.5')
- Tag matching
- Wildcard ('*' for any version)
Args:
db: Database session
project_name: Project name
package_name: Package name
version: Version or version constraint
version: Version constraint (exact)
tag: Tag constraint
Returns:
@@ -433,13 +337,7 @@ def _resolve_dependency_to_artifact(
return None
if version:
# Check if this is a version constraint (>=, <, etc.) or exact version
if _is_version_constraint(version):
result = _resolve_version_constraint(db, package, version)
if result:
return result
else:
# Look up by exact version
# Look up by version
pkg_version = db.query(PackageVersion).filter(
PackageVersion.package_id == package.id,
PackageVersion.version == version,

View File

@@ -11,11 +11,9 @@ from slowapi.errors import RateLimitExceeded
from .config import get_settings
from .database import init_db, SessionLocal
from .routes import router
from .pypi_proxy import router as pypi_router
from .seed import seed_database
from .auth import create_default_admin
from .rate_limit import limiter
from .pypi_cache_worker import init_cache_worker_pool, shutdown_cache_worker_pool
settings = get_settings()
logging.basicConfig(level=logging.INFO)
@@ -50,13 +48,8 @@ async def lifespan(app: FastAPI):
else:
logger.info(f"Running in {settings.env} mode - skipping seed data")
# Initialize PyPI cache worker pool
init_cache_worker_pool()
yield
# Shutdown: cleanup
shutdown_cache_worker_pool()
# Shutdown: cleanup if needed
app = FastAPI(
@@ -72,7 +65,6 @@ app.add_exception_handler(RateLimitExceeded, _rate_limit_exceeded_handler)
# Include API routes
app.include_router(router)
app.include_router(pypi_router)
# Serve static files (React build) if the directory exists
static_dir = os.path.join(os.path.dirname(__file__), "..", "..", "frontend", "dist")

View File

@@ -667,6 +667,7 @@ class UpstreamSource(Base):
source_type = Column(String(50), default="generic", nullable=False)
url = Column(String(2048), nullable=False)
enabled = Column(Boolean, default=False, nullable=False)
is_public = Column(Boolean, default=True, nullable=False)
auth_type = Column(String(20), default="none", nullable=False)
username = Column(String(255))
password_encrypted = Column(LargeBinary)
@@ -683,6 +684,7 @@ class UpstreamSource(Base):
__table_args__ = (
Index("idx_upstream_sources_enabled", "enabled"),
Index("idx_upstream_sources_source_type", "source_type"),
Index("idx_upstream_sources_is_public", "is_public"),
Index("idx_upstream_sources_priority", "priority"),
CheckConstraint(
"source_type IN ('npm', 'pypi', 'maven', 'docker', 'helm', 'nuget', 'deb', 'rpm', 'generic')",
@@ -745,12 +747,13 @@ class UpstreamSource(Base):
class CacheSettings(Base):
"""Global cache settings (singleton table).
Controls behavior of the upstream caching system.
Controls behavior of the upstream caching system including air-gap mode.
"""
__tablename__ = "cache_settings"
id = Column(Integer, primary_key=True, default=1)
allow_public_internet = Column(Boolean, default=True, nullable=False)
auto_create_system_projects = Column(Boolean, default=True, nullable=False)
created_at = Column(DateTime(timezone=True), default=datetime.utcnow)
updated_at = Column(
@@ -803,70 +806,3 @@ class CachedUrl(Base):
return hashlib.sha256(url.encode("utf-8")).hexdigest()
class PyPICacheTask(Base):
"""Task for caching a PyPI package and its dependencies.
Tracks the status of background caching operations with retry support.
Used by the PyPI proxy to ensure reliable dependency caching.
"""
__tablename__ = "pypi_cache_tasks"
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
# What to cache
package_name = Column(String(255), nullable=False)
version_constraint = Column(String(255))
# Origin tracking
parent_task_id = Column(
UUID(as_uuid=True),
ForeignKey("pypi_cache_tasks.id", ondelete="SET NULL"),
)
depth = Column(Integer, nullable=False, default=0)
triggered_by_artifact = Column(
String(64),
ForeignKey("artifacts.id", ondelete="SET NULL"),
)
# Status
status = Column(String(20), nullable=False, default="pending")
attempts = Column(Integer, nullable=False, default=0)
max_attempts = Column(Integer, nullable=False, default=3)
# Results
cached_artifact_id = Column(
String(64),
ForeignKey("artifacts.id", ondelete="SET NULL"),
)
error_message = Column(Text)
# Timing
created_at = Column(DateTime(timezone=True), nullable=False, default=datetime.utcnow)
started_at = Column(DateTime(timezone=True))
completed_at = Column(DateTime(timezone=True))
next_retry_at = Column(DateTime(timezone=True))
# Relationships
parent_task = relationship(
"PyPICacheTask",
remote_side=[id],
backref="child_tasks",
)
__table_args__ = (
Index("idx_pypi_cache_tasks_status_retry", "status", "next_retry_at"),
Index("idx_pypi_cache_tasks_package_status", "package_name", "status"),
Index("idx_pypi_cache_tasks_parent", "parent_task_id"),
Index("idx_pypi_cache_tasks_triggered_by", "triggered_by_artifact"),
Index("idx_pypi_cache_tasks_cached_artifact", "cached_artifact_id"),
Index("idx_pypi_cache_tasks_depth_created", "depth", "created_at"),
CheckConstraint(
"status IN ('pending', 'in_progress', 'completed', 'failed')",
name="check_task_status",
),
CheckConstraint("depth >= 0", name="check_depth_non_negative"),
CheckConstraint("attempts >= 0", name="check_attempts_non_negative"),
)

View File

@@ -194,8 +194,7 @@ def purge_seed_data(db: Session) -> dict:
synchronize_session=False
)
# Delete any access permissions for this user
# Note: AccessPermission.user_id is VARCHAR (username), not UUID
db.query(AccessPermission).filter(AccessPermission.user_id == user.username).delete(
db.query(AccessPermission).filter(AccessPermission.user_id == user.id).delete(
synchronize_session=False
)
db.delete(user)

View File

@@ -1,576 +0,0 @@
"""
PyPI cache worker module.
Manages a thread pool for background caching of PyPI packages and their dependencies.
Replaces unbounded thread spawning with a managed queue-based approach.
"""
import logging
import re
import threading
import time
from concurrent.futures import ThreadPoolExecutor
from datetime import datetime, timedelta
from typing import List, Optional
from uuid import UUID
import httpx
from sqlalchemy import or_
from sqlalchemy.orm import Session
from .config import get_settings
settings = get_settings()
from .database import SessionLocal
from .models import PyPICacheTask, Package, Project, Tag
logger = logging.getLogger(__name__)
# Module-level worker pool state
_cache_worker_pool: Optional[ThreadPoolExecutor] = None
_cache_worker_running: bool = False
_dispatcher_thread: Optional[threading.Thread] = None
def init_cache_worker_pool(max_workers: Optional[int] = None):
"""
Initialize the cache worker pool. Called on app startup.
Args:
max_workers: Number of concurrent workers. Defaults to PYPI_CACHE_WORKERS setting.
"""
global _cache_worker_pool, _cache_worker_running, _dispatcher_thread
if _cache_worker_pool is not None:
logger.warning("Cache worker pool already initialized")
return
workers = max_workers or settings.PYPI_CACHE_WORKERS
_cache_worker_pool = ThreadPoolExecutor(
max_workers=workers,
thread_name_prefix="pypi-cache-",
)
_cache_worker_running = True
# Start the dispatcher thread
_dispatcher_thread = threading.Thread(
target=_cache_dispatcher_loop,
daemon=True,
name="pypi-cache-dispatcher",
)
_dispatcher_thread.start()
logger.info(f"PyPI cache worker pool initialized with {workers} workers")
def shutdown_cache_worker_pool(wait: bool = True, timeout: float = 30.0):
"""
Shutdown the cache worker pool gracefully.
Args:
wait: Whether to wait for pending tasks to complete.
timeout: Maximum time to wait for shutdown.
"""
global _cache_worker_pool, _cache_worker_running, _dispatcher_thread
if _cache_worker_pool is None:
return
logger.info("Shutting down PyPI cache worker pool...")
_cache_worker_running = False
# Wait for dispatcher to stop
if _dispatcher_thread and _dispatcher_thread.is_alive():
_dispatcher_thread.join(timeout=5.0)
# Shutdown thread pool
_cache_worker_pool.shutdown(wait=wait, cancel_futures=not wait)
_cache_worker_pool = None
_dispatcher_thread = None
logger.info("PyPI cache worker pool shut down")
def _cache_dispatcher_loop():
"""
Main dispatcher loop: poll DB for pending tasks and submit to worker pool.
"""
logger.info("PyPI cache dispatcher started")
while _cache_worker_running:
try:
db = SessionLocal()
try:
tasks = _get_ready_tasks(db, limit=10)
for task in tasks:
# Mark in_progress before submitting
task.status = "in_progress"
task.started_at = datetime.utcnow()
db.commit()
# Submit to worker pool
_cache_worker_pool.submit(_process_cache_task, task.id)
# Sleep if no work (avoid busy loop)
if not tasks:
time.sleep(2.0)
else:
# Small delay between batches to avoid overwhelming
time.sleep(0.1)
finally:
db.close()
except Exception as e:
logger.error(f"PyPI cache dispatcher error: {e}")
time.sleep(5.0)
logger.info("PyPI cache dispatcher stopped")
def _get_ready_tasks(db: Session, limit: int = 10) -> List[PyPICacheTask]:
"""
Get tasks ready to process.
Returns pending tasks that are either new or ready for retry.
Orders by depth (shallow first) then creation time (FIFO).
"""
now = datetime.utcnow()
return (
db.query(PyPICacheTask)
.filter(
PyPICacheTask.status == "pending",
or_(
PyPICacheTask.next_retry_at == None, # New tasks
PyPICacheTask.next_retry_at <= now, # Retry tasks ready
),
)
.order_by(
PyPICacheTask.depth.asc(), # Prefer shallow deps first
PyPICacheTask.created_at.asc(), # FIFO within same depth
)
.limit(limit)
.all()
)
def _process_cache_task(task_id: UUID):
"""
Process a single cache task. Called by worker pool.
Args:
task_id: The ID of the task to process.
"""
db = SessionLocal()
try:
task = db.query(PyPICacheTask).filter(PyPICacheTask.id == task_id).first()
if not task:
logger.warning(f"PyPI cache task {task_id} not found")
return
logger.info(
f"Processing cache task: {task.package_name} "
f"(depth={task.depth}, attempt={task.attempts + 1})"
)
# Check if already cached by another task (dedup)
existing_artifact = _find_cached_package(db, task.package_name)
if existing_artifact:
logger.info(f"Package {task.package_name} already cached, skipping")
_mark_task_completed(db, task, cached_artifact_id=existing_artifact)
return
# Check depth limit
max_depth = settings.PYPI_CACHE_MAX_DEPTH
if task.depth >= max_depth:
_mark_task_failed(db, task, f"Max depth {max_depth} exceeded")
return
# Do the actual caching
result = _fetch_and_cache_package(task.package_name, task.version_constraint)
if result["success"]:
_mark_task_completed(db, task, cached_artifact_id=result.get("artifact_id"))
logger.info(f"Successfully cached {task.package_name}")
else:
_handle_task_failure(db, task, result["error"])
except Exception as e:
logger.exception(f"Error processing cache task {task_id}")
db = SessionLocal() # Get fresh session after exception
try:
task = db.query(PyPICacheTask).filter(PyPICacheTask.id == task_id).first()
if task:
_handle_task_failure(db, task, str(e))
finally:
db.close()
finally:
db.close()
def _find_cached_package(db: Session, package_name: str) -> Optional[str]:
"""
Check if a package is already cached.
Args:
db: Database session.
package_name: Normalized package name.
Returns:
Artifact ID if cached, None otherwise.
"""
# Normalize package name (PEP 503)
normalized = re.sub(r"[-_.]+", "-", package_name).lower()
# Check if _pypi project has this package with at least one tag
system_project = db.query(Project).filter(Project.name == "_pypi").first()
if not system_project:
return None
package = (
db.query(Package)
.filter(
Package.project_id == system_project.id,
Package.name == normalized,
)
.first()
)
if not package:
return None
# Check if package has any tags (cached files)
tag = db.query(Tag).filter(Tag.package_id == package.id).first()
if tag:
return tag.artifact_id
return None
def _fetch_and_cache_package(
package_name: str,
version_constraint: Optional[str] = None,
) -> dict:
"""
Fetch and cache a PyPI package by making requests through our own proxy.
Args:
package_name: The package name to cache.
version_constraint: Optional version constraint (currently not used for selection).
Returns:
Dict with "success" bool, "artifact_id" on success, "error" on failure.
"""
# Normalize package name (PEP 503)
normalized_name = re.sub(r"[-_.]+", "-", package_name).lower()
# Build the URL to our own proxy
# Use localhost since we're making internal requests
base_url = f"http://localhost:{settings.PORT}"
try:
with httpx.Client(timeout=60.0, follow_redirects=True) as client:
# Step 1: Get the simple index page
simple_url = f"{base_url}/pypi/simple/{normalized_name}/"
logger.debug(f"Fetching index: {simple_url}")
response = client.get(simple_url)
if response.status_code == 404:
return {"success": False, "error": f"Package {package_name} not found on upstream"}
if response.status_code != 200:
return {"success": False, "error": f"Failed to get index: HTTP {response.status_code}"}
# Step 2: Parse HTML to find downloadable files
html = response.text
# Create pattern that matches both normalized (hyphens) and original (underscores)
name_pattern = re.sub(r"[-_]+", "[-_]+", normalized_name)
# Look for wheel files first (preferred)
wheel_pattern = rf'href="([^"]*{name_pattern}[^"]*\.whl[^"]*)"'
matches = re.findall(wheel_pattern, html, re.IGNORECASE)
if not matches:
# Fall back to sdist
sdist_pattern = rf'href="([^"]*{name_pattern}[^"]*\.tar\.gz[^"]*)"'
matches = re.findall(sdist_pattern, html, re.IGNORECASE)
if not matches:
logger.warning(
f"No downloadable files found for {package_name}. "
f"Pattern: {wheel_pattern}, HTML preview: {html[:500]}"
)
return {"success": False, "error": "No downloadable files found"}
# Get the last match (usually latest version)
download_url = matches[-1]
# Make URL absolute if needed
if download_url.startswith("/"):
download_url = f"{base_url}{download_url}"
elif not download_url.startswith("http"):
download_url = f"{base_url}/pypi/simple/{normalized_name}/{download_url}"
# Step 3: Download the file through our proxy (this caches it)
logger.debug(f"Downloading: {download_url}")
response = client.get(download_url)
if response.status_code != 200:
return {"success": False, "error": f"Download failed: HTTP {response.status_code}"}
# Get artifact ID from response header
artifact_id = response.headers.get("X-Checksum-SHA256")
return {"success": True, "artifact_id": artifact_id}
except httpx.TimeoutException as e:
return {"success": False, "error": f"Timeout: {e}"}
except httpx.ConnectError as e:
return {"success": False, "error": f"Connection failed: {e}"}
except Exception as e:
return {"success": False, "error": str(e)}
def _mark_task_completed(
db: Session,
task: PyPICacheTask,
cached_artifact_id: Optional[str] = None,
):
"""Mark a task as completed."""
task.status = "completed"
task.completed_at = datetime.utcnow()
task.cached_artifact_id = cached_artifact_id
task.error_message = None
db.commit()
def _mark_task_failed(db: Session, task: PyPICacheTask, error: str):
"""Mark a task as permanently failed."""
task.status = "failed"
task.completed_at = datetime.utcnow()
task.error_message = error[:1000] if error else None
db.commit()
logger.warning(f"PyPI cache task failed permanently: {task.package_name} - {error}")
def _handle_task_failure(db: Session, task: PyPICacheTask, error: str):
"""
Handle a failed cache attempt with exponential backoff.
Args:
db: Database session.
task: The failed task.
error: Error message.
"""
task.attempts += 1
task.error_message = error[:1000] if error else None
max_attempts = task.max_attempts or settings.PYPI_CACHE_MAX_ATTEMPTS
if task.attempts >= max_attempts:
# Give up after max attempts
task.status = "failed"
task.completed_at = datetime.utcnow()
logger.warning(
f"PyPI cache task failed permanently: {task.package_name} - {error} "
f"(after {task.attempts} attempts)"
)
else:
# Schedule retry with exponential backoff
# Attempt 1 failed → retry in 30s
# Attempt 2 failed → retry in 60s
# Attempt 3 failed → permanent failure (if max_attempts=3)
backoff_seconds = 30 * (2 ** (task.attempts - 1))
task.status = "pending"
task.next_retry_at = datetime.utcnow() + timedelta(seconds=backoff_seconds)
logger.info(
f"PyPI cache task will retry: {task.package_name} in {backoff_seconds}s "
f"(attempt {task.attempts}/{max_attempts})"
)
db.commit()
def enqueue_cache_task(
db: Session,
package_name: str,
version_constraint: Optional[str] = None,
parent_task_id: Optional[UUID] = None,
depth: int = 0,
triggered_by_artifact: Optional[str] = None,
) -> Optional[PyPICacheTask]:
"""
Enqueue a package for caching.
Performs deduplication: won't create a task if one already exists
for the same package in pending/in_progress state, or if the package
is already cached.
Args:
db: Database session.
package_name: The package name to cache.
version_constraint: Optional version constraint.
parent_task_id: Parent task that spawned this one.
depth: Recursion depth.
triggered_by_artifact: Artifact that declared this dependency.
Returns:
The created or existing task, or None if already cached.
"""
# Normalize package name (PEP 503)
normalized = re.sub(r"[-_.]+", "-", package_name).lower()
# Check for existing pending/in_progress task
existing_task = (
db.query(PyPICacheTask)
.filter(
PyPICacheTask.package_name == normalized,
PyPICacheTask.status.in_(["pending", "in_progress"]),
)
.first()
)
if existing_task:
logger.debug(f"Task already exists for {normalized}: {existing_task.id}")
return existing_task
# Check if already cached
if _find_cached_package(db, normalized):
logger.debug(f"Package {normalized} already cached, skipping task creation")
return None
# Create new task
task = PyPICacheTask(
package_name=normalized,
version_constraint=version_constraint,
parent_task_id=parent_task_id,
depth=depth,
triggered_by_artifact=triggered_by_artifact,
max_attempts=settings.PYPI_CACHE_MAX_ATTEMPTS,
)
db.add(task)
db.flush()
logger.info(f"Enqueued cache task for {normalized} (depth={depth})")
return task
def get_cache_status(db: Session) -> dict:
"""
Get summary of cache task queue status.
Returns:
Dict with counts by status.
"""
from sqlalchemy import func
stats = (
db.query(PyPICacheTask.status, func.count(PyPICacheTask.id))
.group_by(PyPICacheTask.status)
.all()
)
return {
"pending": next((s[1] for s in stats if s[0] == "pending"), 0),
"in_progress": next((s[1] for s in stats if s[0] == "in_progress"), 0),
"completed": next((s[1] for s in stats if s[0] == "completed"), 0),
"failed": next((s[1] for s in stats if s[0] == "failed"), 0),
}
def get_failed_tasks(db: Session, limit: int = 50) -> List[dict]:
"""
Get list of failed tasks for debugging.
Args:
db: Database session.
limit: Maximum number of tasks to return.
Returns:
List of failed task info dicts.
"""
tasks = (
db.query(PyPICacheTask)
.filter(PyPICacheTask.status == "failed")
.order_by(PyPICacheTask.completed_at.desc())
.limit(limit)
.all()
)
return [
{
"id": str(task.id),
"package": task.package_name,
"error": task.error_message,
"attempts": task.attempts,
"depth": task.depth,
"failed_at": task.completed_at.isoformat() if task.completed_at else None,
}
for task in tasks
]
def retry_failed_task(db: Session, package_name: str) -> Optional[PyPICacheTask]:
"""
Reset a failed task to retry.
Args:
db: Database session.
package_name: The package name to retry.
Returns:
The reset task, or None if not found.
"""
normalized = re.sub(r"[-_.]+", "-", package_name).lower()
task = (
db.query(PyPICacheTask)
.filter(
PyPICacheTask.package_name == normalized,
PyPICacheTask.status == "failed",
)
.first()
)
if not task:
return None
task.status = "pending"
task.attempts = 0
task.next_retry_at = None
task.error_message = None
task.started_at = None
task.completed_at = None
db.commit()
logger.info(f"Reset failed task for retry: {normalized}")
return task
def retry_all_failed_tasks(db: Session) -> int:
"""
Reset all failed tasks to retry.
Args:
db: Database session.
Returns:
Number of tasks reset.
"""
count = (
db.query(PyPICacheTask)
.filter(PyPICacheTask.status == "failed")
.update(
{
"status": "pending",
"attempts": 0,
"next_retry_at": None,
"error_message": None,
"started_at": None,
"completed_at": None,
}
)
)
db.commit()
logger.info(f"Reset {count} failed tasks for retry")
return count

View File

@@ -1,868 +0,0 @@
"""
Transparent PyPI proxy implementing PEP 503 (Simple API).
Provides endpoints that allow pip to use Orchard as a PyPI index URL.
Artifacts are cached on first access through configured upstream sources.
"""
import hashlib
import logging
import re
import tarfile
import zipfile
from io import BytesIO
from typing import Optional, List, Tuple
from urllib.parse import urljoin, urlparse, quote, unquote
import httpx
from fastapi import APIRouter, BackgroundTasks, Depends, HTTPException, Request, Response
from fastapi.responses import StreamingResponse, HTMLResponse
from sqlalchemy.orm import Session
from .database import get_db
from .models import UpstreamSource, CachedUrl, Artifact, Project, Package, Tag, PackageVersion, ArtifactDependency
from .storage import S3Storage, get_storage
from .config import get_env_upstream_sources
from .pypi_cache_worker import (
enqueue_cache_task,
get_cache_status,
get_failed_tasks,
retry_failed_task,
retry_all_failed_tasks,
)
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/pypi", tags=["pypi-proxy"])
def _parse_requires_dist(requires_dist: str) -> Tuple[str, Optional[str]]:
"""Parse a Requires-Dist line into (package_name, version_constraint).
Examples:
"requests (>=2.25.0)" -> ("requests", ">=2.25.0")
"typing-extensions; python_version < '3.8'" -> ("typing-extensions", None)
"numpy>=1.21.0" -> ("numpy", ">=1.21.0")
"certifi" -> ("certifi", None)
Returns:
Tuple of (normalized_package_name, version_constraint or None)
"""
# Remove any environment markers (after semicolon)
if ';' in requires_dist:
requires_dist = requires_dist.split(';')[0].strip()
# Match patterns like "package (>=1.0)" or "package>=1.0" or "package"
# Pattern breakdown: package name, optional whitespace, optional version in parens or directly
match = re.match(
r'^([a-zA-Z0-9][-a-zA-Z0-9._]*)\s*(?:\(([^)]+)\)|([<>=!~][^\s;]+))?',
requires_dist.strip()
)
if not match:
return None, None
package_name = match.group(1)
# Version can be in parentheses (group 2) or directly after name (group 3)
version_constraint = match.group(2) or match.group(3)
# Normalize package name (PEP 503)
normalized_name = re.sub(r'[-_.]+', '-', package_name).lower()
# Clean up version constraint
if version_constraint:
version_constraint = version_constraint.strip()
return normalized_name, version_constraint
def _extract_requires_from_metadata(metadata_content: str) -> List[Tuple[str, Optional[str]]]:
"""Extract all Requires-Dist entries from METADATA/PKG-INFO content.
Args:
metadata_content: The content of a METADATA or PKG-INFO file
Returns:
List of (package_name, version_constraint) tuples
"""
dependencies = []
for line in metadata_content.split('\n'):
if line.startswith('Requires-Dist:'):
# Extract the value after "Requires-Dist:"
value = line[len('Requires-Dist:'):].strip()
pkg_name, version = _parse_requires_dist(value)
if pkg_name:
dependencies.append((pkg_name, version))
return dependencies
def _extract_metadata_from_wheel(content: bytes) -> Optional[str]:
"""Extract METADATA file content from a wheel (zip) file.
Wheel files have structure: {package}-{version}.dist-info/METADATA
Args:
content: The wheel file content as bytes
Returns:
METADATA file content as string, or None if not found
"""
try:
with zipfile.ZipFile(BytesIO(content)) as zf:
# Find the .dist-info directory
for name in zf.namelist():
if name.endswith('.dist-info/METADATA'):
return zf.read(name).decode('utf-8', errors='replace')
except Exception as e:
logger.warning(f"Failed to extract metadata from wheel: {e}")
return None
def _extract_metadata_from_sdist(content: bytes, filename: str) -> Optional[str]:
"""Extract PKG-INFO file content from a source distribution (.tar.gz).
Source distributions have structure: {package}-{version}/PKG-INFO
Args:
content: The tarball content as bytes
filename: The original filename (used to determine package name)
Returns:
PKG-INFO file content as string, or None if not found
"""
try:
with tarfile.open(fileobj=BytesIO(content), mode='r:gz') as tf:
# Find PKG-INFO in the root directory of the archive
for member in tf.getmembers():
if member.name.endswith('/PKG-INFO') and member.name.count('/') == 1:
f = tf.extractfile(member)
if f:
return f.read().decode('utf-8', errors='replace')
except Exception as e:
logger.warning(f"Failed to extract metadata from sdist {filename}: {e}")
return None
def _extract_dependencies(content: bytes, filename: str) -> List[Tuple[str, Optional[str]]]:
"""Extract dependencies from a PyPI package file.
Supports wheel (.whl) and source distribution (.tar.gz) formats.
Args:
content: The package file content as bytes
filename: The original filename
Returns:
List of (package_name, version_constraint) tuples
"""
metadata = None
if filename.endswith('.whl'):
metadata = _extract_metadata_from_wheel(content)
elif filename.endswith('.tar.gz'):
metadata = _extract_metadata_from_sdist(content, filename)
if metadata:
return _extract_requires_from_metadata(metadata)
return []
# Timeout configuration for proxy requests
PROXY_CONNECT_TIMEOUT = 30.0
PROXY_READ_TIMEOUT = 60.0
def _extract_pypi_version(filename: str) -> Optional[str]:
"""Extract version from PyPI filename.
Handles formats like:
- cowsay-6.1-py3-none-any.whl
- cowsay-1.0.tar.gz
- some_package-1.2.3.post1-cp39-cp39-linux_x86_64.whl
"""
# Remove extension
if filename.endswith('.whl'):
# Wheel: name-version-pytag-abitag-platform.whl
parts = filename[:-4].split('-')
if len(parts) >= 2:
return parts[1]
elif filename.endswith('.tar.gz'):
# Source: name-version.tar.gz
base = filename[:-7]
# Find the last hyphen that precedes a version-like string
match = re.match(r'^(.+)-(\d+.*)$', base)
if match:
return match.group(2)
elif filename.endswith('.zip'):
# Egg/zip: name-version.zip
base = filename[:-4]
match = re.match(r'^(.+)-(\d+.*)$', base)
if match:
return match.group(2)
return None
def _get_pypi_upstream_sources(db: Session) -> list[UpstreamSource]:
"""Get all enabled upstream sources configured for PyPI."""
# Get database sources
db_sources = (
db.query(UpstreamSource)
.filter(
UpstreamSource.source_type == "pypi",
UpstreamSource.enabled == True,
)
.order_by(UpstreamSource.priority)
.all()
)
# Get env sources
env_sources = [
s for s in get_env_upstream_sources()
if s.source_type == "pypi" and s.enabled
]
# Combine and sort by priority
all_sources = list(db_sources) + list(env_sources)
return sorted(all_sources, key=lambda s: s.priority)
def _build_auth_headers(source) -> dict:
"""Build authentication headers for an upstream source."""
headers = {}
if hasattr(source, 'auth_type'):
if source.auth_type == "bearer":
password = source.get_password() if hasattr(source, 'get_password') else getattr(source, 'password', None)
if password:
headers["Authorization"] = f"Bearer {password}"
elif source.auth_type == "api_key":
custom_headers = source.get_headers() if hasattr(source, 'get_headers') else {}
if custom_headers:
headers.update(custom_headers)
return headers
def _get_basic_auth(source) -> Optional[tuple[str, str]]:
"""Get basic auth credentials if applicable."""
if hasattr(source, 'auth_type') and source.auth_type == "basic":
username = getattr(source, 'username', None)
if username:
password = source.get_password() if hasattr(source, 'get_password') else getattr(source, 'password', '')
return (username, password or '')
return None
def _get_base_url(request: Request) -> str:
"""
Get the external base URL, respecting X-Forwarded-Proto header.
When behind a reverse proxy that terminates SSL, the request.base_url
will show http:// even though the external URL is https://. This function
checks the X-Forwarded-Proto header to determine the correct scheme.
"""
base_url = str(request.base_url).rstrip('/')
# Check for X-Forwarded-Proto header (set by reverse proxies)
forwarded_proto = request.headers.get('x-forwarded-proto')
if forwarded_proto:
# Replace the scheme with the forwarded protocol
parsed = urlparse(base_url)
base_url = f"{forwarded_proto}://{parsed.netloc}{parsed.path}"
return base_url
def _rewrite_package_links(html: str, base_url: str, package_name: str, upstream_base_url: str) -> str:
"""
Rewrite download links in a PyPI simple page to go through our proxy.
Args:
html: The HTML content from upstream
base_url: Our server's base URL
package_name: The package name for the URL path
upstream_base_url: The upstream URL used to fetch this page (for resolving relative URLs)
Returns:
HTML with rewritten download links
"""
# Pattern to match href attributes in anchor tags
# PyPI simple pages have links like:
# <a href="https://files.pythonhosted.org/packages/.../file.tar.gz#sha256=...">file.tar.gz</a>
# Or relative URLs from Artifactory like:
# <a href="../../packages/packages/62/35/.../requests-0.10.0.tar.gz#sha256=...">
def replace_href(match):
original_url = match.group(1)
# Resolve relative URLs to absolute using the upstream base URL
if not original_url.startswith(('http://', 'https://')):
# Split off fragment before resolving
url_without_fragment = original_url.split('#')[0]
fragment_part = original_url[len(url_without_fragment):]
absolute_url = urljoin(upstream_base_url, url_without_fragment) + fragment_part
else:
absolute_url = original_url
# Extract the filename from the URL
parsed = urlparse(absolute_url)
path_parts = parsed.path.split('/')
filename = path_parts[-1] if path_parts else ''
# Keep the hash fragment if present
fragment = f"#{parsed.fragment}" if parsed.fragment else ""
# Encode the absolute URL (without fragment) for safe transmission
encoded_url = quote(absolute_url.split('#')[0], safe='')
# Build new URL pointing to our proxy
new_url = f"{base_url}/pypi/simple/{package_name}/{filename}?upstream={encoded_url}{fragment}"
return f'href="{new_url}"'
# Match href="..." patterns
rewritten = re.sub(r'href="([^"]+)"', replace_href, html)
return rewritten
@router.get("/simple/")
async def pypi_simple_index(
request: Request,
db: Session = Depends(get_db),
):
"""
PyPI Simple API index - lists all packages.
Proxies to the first available upstream PyPI source.
"""
sources = _get_pypi_upstream_sources(db)
if not sources:
raise HTTPException(
status_code=503,
detail="No PyPI upstream sources configured"
)
# Try each source in priority order
last_error = None
for source in sources:
try:
headers = {"User-Agent": "Orchard-PyPI-Proxy/1.0"}
headers.update(_build_auth_headers(source))
auth = _get_basic_auth(source)
# Use URL as-is - users should provide full path including /simple
simple_url = source.url.rstrip('/') + '/'
timeout = httpx.Timeout(PROXY_READ_TIMEOUT, connect=PROXY_CONNECT_TIMEOUT)
with httpx.Client(timeout=timeout, follow_redirects=False) as client:
response = client.get(
simple_url,
headers=headers,
auth=auth,
)
# Handle redirects manually to avoid loops
if response.status_code in (301, 302, 303, 307, 308):
redirect_url = response.headers.get('location')
if redirect_url:
# Follow the redirect once
response = client.get(
redirect_url,
headers=headers,
auth=auth,
follow_redirects=False,
)
if response.status_code == 200:
# Return the index as-is (links are to package pages, not files)
# We could rewrite these too, but for now just proxy
content = response.text
# Rewrite package links to go through our proxy
base_url = _get_base_url(request)
content = re.sub(
r'href="([^"]+)/"',
lambda m: f'href="{base_url}/pypi/simple/{m.group(1)}/"',
content
)
return HTMLResponse(content=content)
last_error = f"HTTP {response.status_code}"
except httpx.ConnectError as e:
last_error = f"Connection failed: {e}"
logger.warning(f"PyPI proxy: failed to connect to {source.url}: {e}")
except httpx.TimeoutException as e:
last_error = f"Timeout: {e}"
logger.warning(f"PyPI proxy: timeout connecting to {source.url}: {e}")
except Exception as e:
last_error = str(e)
logger.warning(f"PyPI proxy: error fetching from {source.url}: {e}")
raise HTTPException(
status_code=502,
detail=f"Failed to fetch package index from upstream: {last_error}"
)
@router.get("/simple/{package_name}/")
async def pypi_package_versions(
request: Request,
package_name: str,
db: Session = Depends(get_db),
):
"""
PyPI Simple API package page - lists all versions/files for a package.
Proxies to upstream and rewrites download links to go through our cache.
"""
sources = _get_pypi_upstream_sources(db)
if not sources:
raise HTTPException(
status_code=503,
detail="No PyPI upstream sources configured"
)
base_url = _get_base_url(request)
# Normalize package name (PEP 503)
normalized_name = re.sub(r'[-_.]+', '-', package_name).lower()
# Try each source in priority order
last_error = None
for source in sources:
try:
headers = {"User-Agent": "Orchard-PyPI-Proxy/1.0"}
headers.update(_build_auth_headers(source))
auth = _get_basic_auth(source)
# Use URL as-is - users should provide full path including /simple
package_url = source.url.rstrip('/') + f'/{normalized_name}/'
final_url = package_url # Track final URL after redirects
timeout = httpx.Timeout(PROXY_READ_TIMEOUT, connect=PROXY_CONNECT_TIMEOUT)
with httpx.Client(timeout=timeout, follow_redirects=False) as client:
response = client.get(
package_url,
headers=headers,
auth=auth,
)
# Handle redirects manually
redirect_count = 0
while response.status_code in (301, 302, 303, 307, 308) and redirect_count < 5:
redirect_url = response.headers.get('location')
if not redirect_url:
break
# Make redirect URL absolute if needed
if not redirect_url.startswith('http'):
redirect_url = urljoin(final_url, redirect_url)
final_url = redirect_url # Update final URL
response = client.get(
redirect_url,
headers=headers,
auth=auth,
follow_redirects=False,
)
redirect_count += 1
if response.status_code == 200:
content = response.text
# Rewrite download links to go through our proxy
# Pass final_url so relative URLs can be resolved correctly
content = _rewrite_package_links(content, base_url, normalized_name, final_url)
return HTMLResponse(content=content)
if response.status_code == 404:
# Package not found in this source, try next
last_error = f"Package not found in {source.name}"
continue
last_error = f"HTTP {response.status_code}"
except httpx.ConnectError as e:
last_error = f"Connection failed: {e}"
logger.warning(f"PyPI proxy: failed to connect to {source.url}: {e}")
except httpx.TimeoutException as e:
last_error = f"Timeout: {e}"
logger.warning(f"PyPI proxy: timeout connecting to {source.url}: {e}")
except Exception as e:
last_error = str(e)
logger.warning(f"PyPI proxy: error fetching {package_name} from {source.url}: {e}")
raise HTTPException(
status_code=404,
detail=f"Package '{package_name}' not found: {last_error}"
)
@router.get("/simple/{package_name}/{filename}")
async def pypi_download_file(
request: Request,
package_name: str,
filename: str,
upstream: Optional[str] = None,
db: Session = Depends(get_db),
storage: S3Storage = Depends(get_storage),
):
"""
Download a package file, caching it in Orchard.
Args:
package_name: The package name
filename: The filename to download
upstream: URL-encoded upstream URL to fetch from
"""
if not upstream:
raise HTTPException(
status_code=400,
detail="Missing 'upstream' query parameter with source URL"
)
# Decode the upstream URL
upstream_url = unquote(upstream)
# Check if we already have this URL cached
url_hash = hashlib.sha256(upstream_url.encode()).hexdigest()
cached_url = db.query(CachedUrl).filter(CachedUrl.url_hash == url_hash).first()
if cached_url:
# Serve from cache
artifact = db.query(Artifact).filter(Artifact.id == cached_url.artifact_id).first()
if artifact:
logger.info(f"PyPI proxy: serving cached {filename} (artifact {artifact.id[:12]})")
# Stream from S3
try:
stream, content_length, _ = storage.get_stream(artifact.s3_key)
def stream_content():
"""Generator that yields chunks from the S3 stream."""
try:
for chunk in stream.iter_chunks():
yield chunk
finally:
stream.close()
return StreamingResponse(
stream_content(),
media_type=artifact.content_type or "application/octet-stream",
headers={
"Content-Disposition": f'attachment; filename="{filename}"',
"Content-Length": str(content_length),
"X-Checksum-SHA256": artifact.id,
"X-Cache": "HIT",
}
)
except Exception as e:
logger.error(f"PyPI proxy: error streaming cached artifact: {e}")
# Fall through to fetch from upstream
# Not cached - fetch from upstream
sources = _get_pypi_upstream_sources(db)
# Use the first available source for authentication headers
# Note: The upstream URL may point to files.pythonhosted.org or other CDNs,
# not the configured source URL directly, so we can't strictly validate the host
matched_source = sources[0] if sources else None
try:
headers = {"User-Agent": "Orchard-PyPI-Proxy/1.0"}
if matched_source:
headers.update(_build_auth_headers(matched_source))
auth = _get_basic_auth(matched_source) if matched_source else None
timeout = httpx.Timeout(300.0, connect=PROXY_CONNECT_TIMEOUT) # 5 minutes for large files
# Fetch the file
logger.info(f"PyPI proxy: fetching {filename} from {upstream_url}")
with httpx.Client(timeout=timeout, follow_redirects=False) as client:
response = client.get(
upstream_url,
headers=headers,
auth=auth,
)
# Handle redirects manually
redirect_count = 0
while response.status_code in (301, 302, 303, 307, 308) and redirect_count < 5:
redirect_url = response.headers.get('location')
if not redirect_url:
break
if not redirect_url.startswith('http'):
redirect_url = urljoin(upstream_url, redirect_url)
logger.info(f"PyPI proxy: following redirect to {redirect_url}")
# Don't send auth to different hosts
redirect_headers = {"User-Agent": "Orchard-PyPI-Proxy/1.0"}
redirect_auth = None
if urlparse(redirect_url).netloc == urlparse(upstream_url).netloc:
redirect_headers.update(headers)
redirect_auth = auth
response = client.get(
redirect_url,
headers=redirect_headers,
auth=redirect_auth,
follow_redirects=False,
)
redirect_count += 1
if response.status_code != 200:
raise HTTPException(
status_code=response.status_code,
detail=f"Upstream returned {response.status_code}"
)
content = response.content
content_type = response.headers.get('content-type', 'application/octet-stream')
# Store in S3 (computes hash and deduplicates automatically)
from io import BytesIO
result = storage.store(BytesIO(content))
sha256 = result.sha256
size = result.size
logger.info(f"PyPI proxy: downloaded {filename}, {size} bytes, sha256={sha256[:12]}")
# Check if artifact already exists
existing = db.query(Artifact).filter(Artifact.id == sha256).first()
if existing:
# Increment ref count
existing.ref_count += 1
db.flush()
else:
# Create artifact record
new_artifact = Artifact(
id=sha256,
original_name=filename,
content_type=content_type,
size=size,
ref_count=1,
created_by="pypi-proxy",
s3_key=result.s3_key,
checksum_md5=result.md5,
checksum_sha1=result.sha1,
s3_etag=result.s3_etag,
)
db.add(new_artifact)
db.flush()
# Create/get system project and package
system_project = db.query(Project).filter(Project.name == "_pypi").first()
if not system_project:
system_project = Project(
name="_pypi",
description="System project for cached PyPI packages",
is_public=True,
is_system=True,
created_by="pypi-proxy",
)
db.add(system_project)
db.flush()
elif not system_project.is_system:
# Ensure existing project is marked as system
system_project.is_system = True
db.flush()
# Normalize package name
normalized_name = re.sub(r'[-_.]+', '-', package_name).lower()
package = db.query(Package).filter(
Package.project_id == system_project.id,
Package.name == normalized_name,
).first()
if not package:
package = Package(
project_id=system_project.id,
name=normalized_name,
description=f"PyPI package: {normalized_name}",
format="pypi",
)
db.add(package)
db.flush()
# Create tag with filename
existing_tag = db.query(Tag).filter(
Tag.package_id == package.id,
Tag.name == filename,
).first()
if not existing_tag:
tag = Tag(
package_id=package.id,
name=filename,
artifact_id=sha256,
created_by="pypi-proxy",
)
db.add(tag)
# Extract and create version
# Only create version for actual package files, not .metadata files
version = _extract_pypi_version(filename)
if version and not filename.endswith('.metadata'):
# Check by version string (the unique constraint is on package_id + version)
existing_version = db.query(PackageVersion).filter(
PackageVersion.package_id == package.id,
PackageVersion.version == version,
).first()
if not existing_version:
pkg_version = PackageVersion(
package_id=package.id,
artifact_id=sha256,
version=version,
version_source="filename",
created_by="pypi-proxy",
)
db.add(pkg_version)
# Cache the URL mapping
existing_cached = db.query(CachedUrl).filter(CachedUrl.url_hash == url_hash).first()
if not existing_cached:
cached_url_record = CachedUrl(
url_hash=url_hash,
url=upstream_url,
artifact_id=sha256,
)
db.add(cached_url_record)
# Extract and store dependencies
dependencies = _extract_dependencies(content, filename)
unique_deps = []
if dependencies:
# Deduplicate dependencies by package name (keep first occurrence)
seen_packages = set()
for dep_name, dep_version in dependencies:
if dep_name not in seen_packages:
seen_packages.add(dep_name)
unique_deps.append((dep_name, dep_version))
logger.info(f"PyPI proxy: extracted {len(unique_deps)} dependencies from {filename} (deduped from {len(dependencies)})")
for dep_name, dep_version in unique_deps:
# Check if this dependency already exists for this artifact
existing_dep = db.query(ArtifactDependency).filter(
ArtifactDependency.artifact_id == sha256,
ArtifactDependency.dependency_project == "_pypi",
ArtifactDependency.dependency_package == dep_name,
).first()
if not existing_dep:
dep = ArtifactDependency(
artifact_id=sha256,
dependency_project="_pypi",
dependency_package=dep_name,
version_constraint=dep_version if dep_version else "*",
)
db.add(dep)
# Proactively cache dependencies via task queue
if unique_deps:
for dep_name, dep_version in unique_deps:
enqueue_cache_task(
db,
package_name=dep_name,
version_constraint=dep_version,
parent_task_id=None, # Top-level, triggered by user download
depth=0,
triggered_by_artifact=sha256,
)
logger.info(f"PyPI proxy: queued {len(unique_deps)} dependencies for caching")
db.commit()
# Return the file
return Response(
content=content,
media_type=content_type,
headers={
"Content-Disposition": f'attachment; filename="{filename}"',
"Content-Length": str(size),
"X-Checksum-SHA256": sha256,
"X-Cache": "MISS",
}
)
except httpx.ConnectError as e:
raise HTTPException(status_code=502, detail=f"Connection failed: {e}")
except httpx.TimeoutException as e:
raise HTTPException(status_code=504, detail=f"Timeout: {e}")
except HTTPException:
raise
except Exception as e:
logger.exception(f"PyPI proxy: error downloading {filename}")
raise HTTPException(status_code=500, detail=str(e))
# =============================================================================
# Cache Status and Management Endpoints
# =============================================================================
@router.get("/cache/status")
async def pypi_cache_status(db: Session = Depends(get_db)):
"""
Get summary of the PyPI cache task queue.
Returns counts of tasks by status (pending, in_progress, completed, failed).
"""
return get_cache_status(db)
@router.get("/cache/failed")
async def pypi_cache_failed(
limit: int = 50,
db: Session = Depends(get_db),
):
"""
Get list of failed cache tasks for debugging.
Args:
limit: Maximum number of tasks to return (default 50).
"""
return get_failed_tasks(db, limit=limit)
@router.post("/cache/retry/{package_name}")
async def pypi_cache_retry(
package_name: str,
db: Session = Depends(get_db),
):
"""
Reset a failed cache task to retry.
Args:
package_name: The package name to retry.
"""
task = retry_failed_task(db, package_name)
if not task:
raise HTTPException(
status_code=404,
detail=f"No failed cache task found for package '{package_name}'"
)
return {"message": f"Retry queued for {task.package_name}", "task_id": str(task.id)}
@router.post("/cache/retry-all")
async def pypi_cache_retry_all(db: Session = Depends(get_db)):
"""
Reset all failed cache tasks to retry.
Returns the count of tasks that were reset.
"""
count = retry_all_failed_tasks(db)
return {"message": f"Queued {count} tasks for retry", "count": count}

View File

@@ -1680,7 +1680,6 @@ def create_project(
name=db_project.name,
description=db_project.description,
is_public=db_project.is_public,
is_system=db_project.is_system,
created_at=db_project.created_at,
updated_at=db_project.updated_at,
created_by=db_project.created_by,
@@ -1705,7 +1704,6 @@ def get_project(
name=project.name,
description=project.description,
is_public=project.is_public,
is_system=project.is_system,
created_at=project.created_at,
updated_at=project.updated_at,
created_by=project.created_by,
@@ -2706,7 +2704,6 @@ def list_team_projects(
name=p.name,
description=p.description,
is_public=p.is_public,
is_system=p.is_system,
created_at=p.created_at,
updated_at=p.updated_at,
created_by=p.created_by,
@@ -2830,15 +2827,14 @@ def list_packages(
db.query(func.count(Tag.id)).filter(Tag.package_id == pkg.id).scalar() or 0
)
# Get unique artifact count and total size via tags
# (PyPI proxy creates tags without uploads, so query from tags)
# Get unique artifact count and total size via uploads
artifact_stats = (
db.query(
func.count(func.distinct(Tag.artifact_id)),
func.count(func.distinct(Upload.artifact_id)),
func.coalesce(func.sum(Artifact.size), 0),
)
.join(Artifact, Tag.artifact_id == Artifact.id)
.filter(Tag.package_id == pkg.id)
.join(Artifact, Upload.artifact_id == Artifact.id)
.filter(Upload.package_id == pkg.id)
.first()
)
artifact_count = artifact_stats[0] if artifact_stats else 0
@@ -2934,15 +2930,14 @@ def get_package(
db.query(func.count(Tag.id)).filter(Tag.package_id == pkg.id).scalar() or 0
)
# Get unique artifact count and total size via tags
# (PyPI proxy creates tags without uploads, so query from tags)
# Get unique artifact count and total size via uploads
artifact_stats = (
db.query(
func.count(func.distinct(Tag.artifact_id)),
func.count(func.distinct(Upload.artifact_id)),
func.coalesce(func.sum(Artifact.size), 0),
)
.join(Artifact, Tag.artifact_id == Artifact.id)
.filter(Tag.package_id == pkg.id)
.join(Artifact, Upload.artifact_id == Artifact.id)
.filter(Upload.package_id == pkg.id)
.first()
)
artifact_count = artifact_stats[0] if artifact_stats else 0
@@ -6285,14 +6280,14 @@ def get_package_stats(
db.query(func.count(Tag.id)).filter(Tag.package_id == package.id).scalar() or 0
)
# Artifact stats via tags (tags exist for both user uploads and PyPI proxy)
# Artifact stats via uploads
artifact_stats = (
db.query(
func.count(func.distinct(Tag.artifact_id)),
func.count(func.distinct(Upload.artifact_id)),
func.coalesce(func.sum(Artifact.size), 0),
)
.join(Artifact, Tag.artifact_id == Artifact.id)
.filter(Tag.package_id == package.id)
.join(Artifact, Upload.artifact_id == Artifact.id)
.filter(Upload.package_id == package.id)
.first()
)
artifact_count = artifact_stats[0] if artifact_stats else 0
@@ -7871,6 +7866,7 @@ from .upstream import (
UpstreamTimeoutError,
UpstreamHTTPError,
UpstreamSSLError,
AirGapError,
FileSizeExceededError as UpstreamFileSizeExceededError,
SourceNotFoundError,
SourceDisabledError,
@@ -8025,6 +8021,10 @@ def cache_artifact(
- Optionally creates tag in user project
- Records URL mapping for provenance
**Air-Gap Mode:**
When `allow_public_internet` is false, only URLs matching private
(non-public) upstream sources are allowed.
**Example (curl):**
```bash
curl -X POST "http://localhost:8080/api/v1/cache" \\
@@ -8118,6 +8118,8 @@ def cache_artifact(
cache_request.url,
expected_hash=cache_request.expected_hash,
)
except AirGapError as e:
raise HTTPException(status_code=403, detail=str(e))
except SourceDisabledError as e:
raise HTTPException(status_code=503, detail=str(e))
except UpstreamHTTPError as e:
@@ -8310,200 +8312,6 @@ def _create_user_cache_reference(
return f"{user_project_name}/{user_package_name}"
# --- Cache Resolve Endpoint ---
from .schemas import CacheResolveRequest
@router.post(
"/api/v1/cache/resolve",
response_model=CacheResponse,
tags=["cache"],
summary="Cache an artifact by package coordinates",
)
def cache_resolve(
request: Request,
resolve_request: CacheResolveRequest,
db: Session = Depends(get_db),
storage: S3Storage = Depends(get_storage),
current_user: User = Depends(get_current_user),
):
"""
Cache an artifact by package coordinates (no URL required).
The server finds the appropriate download URL based on source_type
and configured upstream sources. Currently supports PyPI packages.
**Request Body:**
- `source_type` (required): Type of source (pypi, npm, maven, etc.)
- `package` (required): Package name
- `version` (required): Package version
- `user_project` (optional): Also create reference in this user project
- `user_package` (optional): Package name in user project
- `user_tag` (optional): Tag name in user project
**Example (curl):**
```bash
curl -X POST "http://localhost:8080/api/v1/cache/resolve" \\
-H "Authorization: Bearer <api-key>" \\
-H "Content-Type: application/json" \\
-d '{
"source_type": "pypi",
"package": "requests",
"version": "2.31.0"
}'
```
"""
import re
import httpx
from urllib.parse import quote, unquote
if resolve_request.source_type != "pypi":
raise HTTPException(
status_code=501,
detail=f"Cache resolve for '{resolve_request.source_type}' not yet implemented. Currently only 'pypi' is supported."
)
# Get PyPI upstream sources
sources = (
db.query(UpstreamSource)
.filter(
UpstreamSource.source_type == "pypi",
UpstreamSource.enabled == True,
)
.order_by(UpstreamSource.priority)
.all()
)
# Also get env sources
env_sources = [
s for s in get_env_upstream_sources()
if s.source_type == "pypi" and s.enabled
]
all_sources = list(sources) + list(env_sources)
all_sources = sorted(all_sources, key=lambda s: s.priority)
if not all_sources:
raise HTTPException(
status_code=503,
detail="No PyPI upstream sources configured"
)
# Normalize package name (PEP 503)
normalized_package = re.sub(r'[-_.]+', '-', resolve_request.package).lower()
# Query the Simple API to find the download URL
download_url = None
matched_filename = None
last_error = None
for source in all_sources:
try:
headers = {"User-Agent": "Orchard-CacheResolver/1.0"}
# Build auth if needed
if hasattr(source, 'auth_type'):
if source.auth_type == "bearer":
password = source.get_password() if hasattr(source, 'get_password') else getattr(source, 'password', None)
if password:
headers["Authorization"] = f"Bearer {password}"
elif source.auth_type == "api_key":
custom_headers = source.get_headers() if hasattr(source, 'get_headers') else {}
if custom_headers:
headers.update(custom_headers)
auth = None
if hasattr(source, 'auth_type') and source.auth_type == "basic":
username = getattr(source, 'username', None)
if username:
password = source.get_password() if hasattr(source, 'get_password') else getattr(source, 'password', '')
auth = (username, password or '')
source_url = getattr(source, 'url', '')
package_url = source_url.rstrip('/') + f'/simple/{normalized_package}/'
timeout = httpx.Timeout(connect=30.0, read=60.0)
with httpx.Client(timeout=timeout, follow_redirects=True) as client:
response = client.get(package_url, headers=headers, auth=auth)
if response.status_code == 404:
last_error = f"Package not found in {getattr(source, 'name', 'source')}"
continue
if response.status_code != 200:
last_error = f"HTTP {response.status_code} from {getattr(source, 'name', 'source')}"
continue
# Parse HTML to find the version
html = response.text
# Look for links containing the version
# Pattern: href="...{package}-{version}...#sha256=..."
version_pattern = re.escape(resolve_request.version)
link_pattern = rf'href="([^"]+{normalized_package}[^"]*{version_pattern}[^"]*)"'
matches = re.findall(link_pattern, html, re.IGNORECASE)
if not matches:
# Try with original package name
link_pattern = rf'href="([^"]+{re.escape(resolve_request.package)}[^"]*{version_pattern}[^"]*)"'
matches = re.findall(link_pattern, html, re.IGNORECASE)
if matches:
# Prefer .tar.gz or .whl files
for match in matches:
url = match.split('#')[0] # Remove hash fragment
if url.endswith('.tar.gz') or url.endswith('.whl'):
download_url = url
# Extract filename
matched_filename = url.split('/')[-1]
break
if not download_url:
# Use first match
download_url = matches[0].split('#')[0]
matched_filename = download_url.split('/')[-1]
break
last_error = f"Version {resolve_request.version} not found for {resolve_request.package}"
except httpx.ConnectError as e:
last_error = f"Connection failed: {e}"
logger.warning(f"Cache resolve: failed to connect to {getattr(source, 'url', 'source')}: {e}")
except httpx.TimeoutException as e:
last_error = f"Timeout: {e}"
logger.warning(f"Cache resolve: timeout connecting to {getattr(source, 'url', 'source')}: {e}")
except Exception as e:
last_error = str(e)
logger.warning(f"Cache resolve: error: {e}")
if not download_url:
raise HTTPException(
status_code=404,
detail=f"Could not find {resolve_request.package}=={resolve_request.version}: {last_error}"
)
# Now cache the artifact using the existing cache_artifact logic
# Construct a CacheRequest
cache_request = CacheRequest(
url=download_url,
source_type="pypi",
package_name=normalized_package,
tag=matched_filename or resolve_request.version,
user_project=resolve_request.user_project,
user_package=resolve_request.user_package,
user_tag=resolve_request.user_tag,
)
# Call the cache logic
return cache_artifact(
request=request,
cache_request=cache_request,
db=db,
storage=storage,
current_user=current_user,
)
# --- Upstream Sources Admin API ---
from .schemas import (
@@ -8525,6 +8333,7 @@ def _env_source_to_response(env_source) -> UpstreamSourceResponse:
source_type=env_source.source_type,
url=env_source.url,
enabled=env_source.enabled,
is_public=env_source.is_public,
auth_type=env_source.auth_type,
username=env_source.username,
has_password=bool(env_source.password),
@@ -8608,6 +8417,7 @@ def list_upstream_sources(
source_type=s.source_type,
url=s.url,
enabled=s.enabled,
is_public=s.is_public,
auth_type=s.auth_type,
username=s.username,
has_password=s.has_password(),
@@ -8656,6 +8466,7 @@ def create_upstream_source(
"source_type": "npm",
"url": "https://npm.internal.corp",
"enabled": true,
"is_public": false,
"auth_type": "basic",
"username": "reader",
"password": "secret123",
@@ -8677,6 +8488,7 @@ def create_upstream_source(
source_type=source_create.source_type,
url=source_create.url,
enabled=source_create.enabled,
is_public=source_create.is_public,
auth_type=source_create.auth_type,
username=source_create.username,
priority=source_create.priority,
@@ -8716,6 +8528,7 @@ def create_upstream_source(
source_type=source.source_type,
url=source.url,
enabled=source.enabled,
is_public=source.is_public,
auth_type=source.auth_type,
username=source.username,
has_password=source.has_password(),
@@ -8763,6 +8576,7 @@ def get_upstream_source(
source_type=source.source_type,
url=source.url,
enabled=source.enabled,
is_public=source.is_public,
auth_type=source.auth_type,
username=source.username,
has_password=source.has_password(),
@@ -8849,6 +8663,10 @@ def update_upstream_source(
changes["enabled"] = {"old": source.enabled, "new": source_update.enabled}
source.enabled = source_update.enabled
if source_update.is_public is not None and source_update.is_public != source.is_public:
changes["is_public"] = {"old": source.is_public, "new": source_update.is_public}
source.is_public = source_update.is_public
if source_update.auth_type is not None and source_update.auth_type != source.auth_type:
changes["auth_type"] = {"old": source.auth_type, "new": source_update.auth_type}
source.auth_type = source_update.auth_type
@@ -8901,6 +8719,7 @@ def update_upstream_source(
source_type=source.source_type,
url=source.url,
enabled=source.enabled,
is_public=source.is_public,
auth_type=source.auth_type,
username=source.username,
has_password=source.has_password(),
@@ -9041,10 +8860,12 @@ def get_cache_settings(
Admin-only endpoint for viewing cache configuration.
**Settings:**
- `allow_public_internet`: When false, blocks all requests to sources marked `is_public=true` (air-gap mode)
- `auto_create_system_projects`: When true, system projects (`_npm`, etc.) are created automatically on first cache
**Environment variable overrides:**
Settings can be overridden via environment variables:
- `ORCHARD_CACHE_ALLOW_PUBLIC_INTERNET`: Overrides `allow_public_internet`
- `ORCHARD_CACHE_AUTO_CREATE_SYSTEM_PROJECTS`: Overrides `auto_create_system_projects`
When an env var override is active, the `*_env_override` field will contain the override value.
@@ -9053,6 +8874,12 @@ def get_cache_settings(
db_settings = _get_cache_settings(db)
# Apply env var overrides
allow_public_internet = db_settings.allow_public_internet
allow_public_internet_env_override = None
if app_settings.cache_allow_public_internet is not None:
allow_public_internet = app_settings.cache_allow_public_internet
allow_public_internet_env_override = app_settings.cache_allow_public_internet
auto_create_system_projects = db_settings.auto_create_system_projects
auto_create_system_projects_env_override = None
if app_settings.cache_auto_create_system_projects is not None:
@@ -9060,7 +8887,9 @@ def get_cache_settings(
auto_create_system_projects_env_override = app_settings.cache_auto_create_system_projects
return CacheSettingsResponse(
allow_public_internet=allow_public_internet,
auto_create_system_projects=auto_create_system_projects,
allow_public_internet_env_override=allow_public_internet_env_override,
auto_create_system_projects_env_override=auto_create_system_projects_env_override,
created_at=db_settings.created_at,
updated_at=db_settings.updated_at,
@@ -9086,11 +8915,16 @@ def update_cache_settings(
Supports partial updates - only provided fields are updated.
**Settings:**
- `allow_public_internet`: When false, enables air-gap mode (blocks public sources)
- `auto_create_system_projects`: When false, system projects must be created manually
**Note:** Environment variables can override these settings. When overridden,
the `*_env_override` fields in the response indicate the effective value.
Updates to the database will be saved but won't take effect until the env var is removed.
**Warning:** Changing `allow_public_internet` to false will immediately block
all cache requests to public sources. This is a security-sensitive setting
and is logged prominently.
"""
app_settings = get_settings()
settings = _get_cache_settings(db)
@@ -9098,6 +8932,26 @@ def update_cache_settings(
# Track changes for audit log
changes = {}
if settings_update.allow_public_internet is not None:
if settings_update.allow_public_internet != settings.allow_public_internet:
changes["allow_public_internet"] = {
"old": settings.allow_public_internet,
"new": settings_update.allow_public_internet,
}
settings.allow_public_internet = settings_update.allow_public_internet
# Log prominently for security audit
if not settings_update.allow_public_internet:
logger.warning(
f"AIR-GAP MODE ENABLED by {current_user.username} - "
f"all public internet access is now blocked"
)
else:
logger.warning(
f"AIR-GAP MODE DISABLED by {current_user.username} - "
f"public internet access is now allowed"
)
if settings_update.auto_create_system_projects is not None:
if settings_update.auto_create_system_projects != settings.auto_create_system_projects:
changes["auto_create_system_projects"] = {
@@ -9107,9 +8961,11 @@ def update_cache_settings(
settings.auto_create_system_projects = settings_update.auto_create_system_projects
if changes:
# Audit log with security flag for air-gap changes
is_security_change = "allow_public_internet" in changes
_log_audit(
db,
action="cache_settings.update",
action="cache_settings.update" if not is_security_change else "cache_settings.security_update",
resource="cache-settings",
user_id=current_user.username,
source_ip=request.client.host if request.client else None,
@@ -9120,6 +8976,12 @@ def update_cache_settings(
db.refresh(settings)
# Apply env var overrides for the response
allow_public_internet = settings.allow_public_internet
allow_public_internet_env_override = None
if app_settings.cache_allow_public_internet is not None:
allow_public_internet = app_settings.cache_allow_public_internet
allow_public_internet_env_override = app_settings.cache_allow_public_internet
auto_create_system_projects = settings.auto_create_system_projects
auto_create_system_projects_env_override = None
if app_settings.cache_auto_create_system_projects is not None:
@@ -9127,7 +8989,9 @@ def update_cache_settings(
auto_create_system_projects_env_override = app_settings.cache_auto_create_system_projects
return CacheSettingsResponse(
allow_public_internet=allow_public_internet,
auto_create_system_projects=auto_create_system_projects,
allow_public_internet_env_override=allow_public_internet_env_override,
auto_create_system_projects_env_override=auto_create_system_projects_env_override,
created_at=settings.created_at,
updated_at=settings.updated_at,

View File

@@ -33,7 +33,6 @@ class ProjectResponse(BaseModel):
name: str
description: Optional[str]
is_public: bool
is_system: bool = False
created_at: datetime
updated_at: datetime
created_by: str
@@ -1215,6 +1214,7 @@ class UpstreamSourceCreate(BaseModel):
source_type: str = "generic"
url: str
enabled: bool = False
is_public: bool = True
auth_type: str = "none"
username: Optional[str] = None
password: Optional[str] = None # Write-only
@@ -1271,6 +1271,7 @@ class UpstreamSourceUpdate(BaseModel):
source_type: Optional[str] = None
url: Optional[str] = None
enabled: Optional[bool] = None
is_public: Optional[bool] = None
auth_type: Optional[str] = None
username: Optional[str] = None
password: Optional[str] = None # Write-only, None = keep existing, empty string = clear
@@ -1330,6 +1331,7 @@ class UpstreamSourceResponse(BaseModel):
source_type: str
url: str
enabled: bool
is_public: bool
auth_type: str
username: Optional[str]
has_password: bool # True if password is set
@@ -1345,7 +1347,9 @@ class UpstreamSourceResponse(BaseModel):
class CacheSettingsResponse(BaseModel):
"""Global cache settings response"""
allow_public_internet: bool
auto_create_system_projects: bool
allow_public_internet_env_override: Optional[bool] = None # Set if overridden by env var
auto_create_system_projects_env_override: Optional[bool] = None # Set if overridden by env var
created_at: Optional[datetime] = None # May be None for legacy data
updated_at: Optional[datetime] = None # May be None for legacy data
@@ -1356,6 +1360,7 @@ class CacheSettingsResponse(BaseModel):
class CacheSettingsUpdate(BaseModel):
"""Update cache settings (partial)"""
allow_public_internet: Optional[bool] = None
auto_create_system_projects: Optional[bool] = None
@@ -1433,41 +1438,4 @@ class CacheResponse(BaseModel):
user_reference: Optional[str] = None # e.g., "my-app/npm-deps:lodash-4.17.21"
class CacheResolveRequest(BaseModel):
"""Request to cache an artifact by package coordinates (no URL required).
The server will construct the appropriate URL based on source_type and
configured upstream sources.
"""
source_type: str
package: str
version: str
user_project: Optional[str] = None
user_package: Optional[str] = None
user_tag: Optional[str] = None
@field_validator('source_type')
@classmethod
def validate_source_type(cls, v: str) -> str:
if v not in SOURCE_TYPES:
raise ValueError(f"source_type must be one of: {', '.join(SOURCE_TYPES)}")
return v
@field_validator('package')
@classmethod
def validate_package(cls, v: str) -> str:
v = v.strip()
if not v:
raise ValueError("package cannot be empty")
return v
@field_validator('version')
@classmethod
def validate_version(cls, v: str) -> str:
v = v.strip()
if not v:
raise ValueError("version cannot be empty")
return v

View File

@@ -57,6 +57,10 @@ class UpstreamSSLError(UpstreamError):
pass
class AirGapError(UpstreamError):
"""Request blocked due to air-gap mode."""
pass
class FileSizeExceededError(UpstreamError):
@@ -152,6 +156,12 @@ class UpstreamClient:
# Sort sources by priority (lower = higher priority)
self.sources = sorted(self.sources, key=lambda s: s.priority)
def _get_allow_public_internet(self) -> bool:
"""Get the allow_public_internet setting."""
if self.cache_settings is None:
return True # Default to allowing if no settings provided
return self.cache_settings.allow_public_internet
def _match_source(self, url: str) -> Optional[UpstreamSource]:
"""
Find the upstream source that matches the given URL.
@@ -278,6 +288,7 @@ class UpstreamClient:
FetchResult with content, hash, size, and headers.
Raises:
AirGapError: If air-gap mode blocks the request.
SourceDisabledError: If the matching source is disabled.
UpstreamConnectionError: On connection failures.
UpstreamTimeoutError: On timeout.
@@ -290,6 +301,19 @@ class UpstreamClient:
# Match URL to source
source = self._match_source(url)
# Check air-gap mode
allow_public = self._get_allow_public_internet()
if not allow_public:
if source is None:
raise AirGapError(
f"Air-gap mode enabled: URL does not match any configured upstream source: {url}"
)
if source.is_public:
raise AirGapError(
f"Air-gap mode enabled: Cannot fetch from public source '{source.name}'"
)
# Check if source is enabled (if we have a match)
if source is not None and not source.enabled:
raise SourceDisabledError(
@@ -512,8 +536,7 @@ class UpstreamClient:
Test connectivity to an upstream source.
Performs a HEAD request to the source URL to verify connectivity
and authentication. Does not follow redirects - a 3xx response
is considered successful since it proves the server is reachable.
and authentication.
Args:
source: The upstream source to test.
@@ -541,7 +564,7 @@ class UpstreamClient:
source.url,
headers=headers,
auth=auth,
follow_redirects=False,
follow_redirects=True,
)
# Consider 2xx and 3xx as success, also 405 (Method Not Allowed)
# since some servers don't support HEAD
@@ -559,7 +582,5 @@ class UpstreamClient:
return (False, f"Connection timed out: {e}", None)
except httpx.ReadTimeout as e:
return (False, f"Read timed out: {e}", None)
except httpx.TooManyRedirects as e:
return (False, f"Too many redirects: {e}", None)
except Exception as e:
return (False, f"Error: {e}", None)

View File

@@ -1 +0,0 @@
# Scripts package

View File

@@ -1,262 +0,0 @@
#!/usr/bin/env python3
"""
Backfill script to extract dependencies from cached PyPI packages.
This script scans all artifacts in the _pypi project and extracts
Requires-Dist metadata from wheel and sdist files that don't already
have dependencies recorded.
Usage:
# From within the container:
python -m scripts.backfill_pypi_dependencies
# Or with docker exec:
docker exec orchard_orchard-server_1 python -m scripts.backfill_pypi_dependencies
# Dry run (preview only):
docker exec orchard_orchard-server_1 python -m scripts.backfill_pypi_dependencies --dry-run
"""
import argparse
import logging
import re
import sys
import tarfile
import zipfile
from io import BytesIO
from typing import List, Optional, Tuple
# Add parent directory to path for imports
sys.path.insert(0, "/app")
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
from backend.app.config import get_settings
from backend.app.models import (
Artifact,
ArtifactDependency,
Package,
Project,
Tag,
)
from backend.app.storage import get_storage
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s - %(levelname)s - %(message)s",
)
logger = logging.getLogger(__name__)
def parse_requires_dist(requires_dist: str) -> Tuple[Optional[str], Optional[str]]:
"""Parse a Requires-Dist line into (package_name, version_constraint)."""
# Remove any environment markers (after semicolon)
if ";" in requires_dist:
requires_dist = requires_dist.split(";")[0].strip()
# Match patterns like "package (>=1.0)" or "package>=1.0" or "package"
match = re.match(
r"^([a-zA-Z0-9][-a-zA-Z0-9._]*)\s*(?:\(([^)]+)\)|([<>=!~][^\s;]+))?",
requires_dist.strip(),
)
if not match:
return None, None
package_name = match.group(1)
version_constraint = match.group(2) or match.group(3)
# Normalize package name (PEP 503)
normalized_name = re.sub(r"[-_.]+", "-", package_name).lower()
if version_constraint:
version_constraint = version_constraint.strip()
return normalized_name, version_constraint
def extract_requires_from_metadata(metadata_content: str) -> List[Tuple[str, Optional[str]]]:
"""Extract all Requires-Dist entries from METADATA/PKG-INFO content."""
dependencies = []
for line in metadata_content.split("\n"):
if line.startswith("Requires-Dist:"):
value = line[len("Requires-Dist:"):].strip()
pkg_name, version = parse_requires_dist(value)
if pkg_name:
dependencies.append((pkg_name, version))
return dependencies
def extract_metadata_from_wheel(content: bytes) -> Optional[str]:
"""Extract METADATA file content from a wheel (zip) file."""
try:
with zipfile.ZipFile(BytesIO(content)) as zf:
for name in zf.namelist():
if name.endswith(".dist-info/METADATA"):
return zf.read(name).decode("utf-8", errors="replace")
except Exception as e:
logger.warning(f"Failed to extract metadata from wheel: {e}")
return None
def extract_metadata_from_sdist(content: bytes) -> Optional[str]:
"""Extract PKG-INFO file content from a source distribution (.tar.gz)."""
try:
with tarfile.open(fileobj=BytesIO(content), mode="r:gz") as tf:
for member in tf.getmembers():
if member.name.endswith("/PKG-INFO") and member.name.count("/") == 1:
f = tf.extractfile(member)
if f:
return f.read().decode("utf-8", errors="replace")
except Exception as e:
logger.warning(f"Failed to extract metadata from sdist: {e}")
return None
def extract_dependencies(content: bytes, filename: str) -> List[Tuple[str, Optional[str]]]:
"""Extract dependencies from a PyPI package file."""
metadata = None
if filename.endswith(".whl"):
metadata = extract_metadata_from_wheel(content)
elif filename.endswith(".tar.gz"):
metadata = extract_metadata_from_sdist(content)
if metadata:
return extract_requires_from_metadata(metadata)
return []
def backfill_dependencies(dry_run: bool = False):
"""Main backfill function."""
settings = get_settings()
# Create database connection
engine = create_engine(settings.database_url)
Session = sessionmaker(bind=engine)
db = Session()
# Create storage client
storage = get_storage()
try:
# Find the _pypi project
pypi_project = db.query(Project).filter(Project.name == "_pypi").first()
if not pypi_project:
logger.info("No _pypi project found. Nothing to backfill.")
return
# Get all packages in _pypi
packages = db.query(Package).filter(Package.project_id == pypi_project.id).all()
logger.info(f"Found {len(packages)} packages in _pypi project")
total_artifacts = 0
artifacts_with_deps = 0
artifacts_processed = 0
dependencies_added = 0
for package in packages:
# Get all tags (each tag points to an artifact)
tags = db.query(Tag).filter(Tag.package_id == package.id).all()
for tag in tags:
total_artifacts += 1
filename = tag.name
# Skip non-package files (like .metadata files)
if not (filename.endswith(".whl") or filename.endswith(".tar.gz")):
continue
# Check if this artifact already has dependencies
existing_deps = db.query(ArtifactDependency).filter(
ArtifactDependency.artifact_id == tag.artifact_id
).count()
if existing_deps > 0:
artifacts_with_deps += 1
continue
# Get the artifact
artifact = db.query(Artifact).filter(Artifact.id == tag.artifact_id).first()
if not artifact:
logger.warning(f"Artifact {tag.artifact_id} not found for tag {filename}")
continue
logger.info(f"Processing {package.name}/{filename}...")
if dry_run:
logger.info(f" [DRY RUN] Would extract dependencies from {filename}")
artifacts_processed += 1
continue
# Download the artifact from S3
try:
content = storage.get(artifact.s3_key)
except Exception as e:
logger.error(f" Failed to download {filename}: {e}")
continue
# Extract dependencies
deps = extract_dependencies(content, filename)
if deps:
logger.info(f" Found {len(deps)} dependencies")
for dep_name, dep_version in deps:
# Check if already exists (race condition protection)
existing = db.query(ArtifactDependency).filter(
ArtifactDependency.artifact_id == tag.artifact_id,
ArtifactDependency.dependency_project == "_pypi",
ArtifactDependency.dependency_package == dep_name,
).first()
if not existing:
dep = ArtifactDependency(
artifact_id=tag.artifact_id,
dependency_project="_pypi",
dependency_package=dep_name,
version_constraint=dep_version if dep_version else "*",
)
db.add(dep)
dependencies_added += 1
logger.info(f" + {dep_name} {dep_version or '*'}")
db.commit()
else:
logger.info(f" No dependencies found")
artifacts_processed += 1
logger.info("")
logger.info("=" * 50)
logger.info("Backfill complete!")
logger.info(f" Total artifacts: {total_artifacts}")
logger.info(f" Already had deps: {artifacts_with_deps}")
logger.info(f" Processed: {artifacts_processed}")
logger.info(f" Dependencies added: {dependencies_added}")
if dry_run:
logger.info(" (DRY RUN - no changes made)")
finally:
db.close()
def main():
parser = argparse.ArgumentParser(
description="Backfill dependencies for cached PyPI packages"
)
parser.add_argument(
"--dry-run",
action="store_true",
help="Preview what would be done without making changes",
)
args = parser.parse_args()
backfill_dependencies(dry_run=args.dry_run)
if __name__ == "__main__":
main()

View File

@@ -128,9 +128,7 @@ class TestProjectListingFilters:
assert response.status_code == 200
data = response.json()
# Filter out system projects (names starting with "_") as they may have
# collation-specific sort behavior and aren't part of the test data
names = [p["name"] for p in data["items"] if not p["name"].startswith("_")]
names = [p["name"] for p in data["items"]]
assert names == sorted(names)

View File

@@ -1,137 +0,0 @@
"""Integration tests for PyPI transparent proxy."""
import os
import pytest
import httpx
def get_base_url():
"""Get the base URL for the Orchard server from environment."""
return os.environ.get("ORCHARD_TEST_URL", "http://localhost:8080")
class TestPyPIProxyEndpoints:
"""Tests for PyPI proxy endpoints.
These endpoints are public (no auth required) since pip needs to use them.
"""
@pytest.mark.integration
def test_pypi_simple_index(self):
"""Test that /pypi/simple/ returns HTML response."""
with httpx.Client(base_url=get_base_url(), timeout=30.0) as client:
response = client.get("/pypi/simple/")
# Returns 200 if sources configured, 503 if not
assert response.status_code in (200, 503)
if response.status_code == 200:
assert "text/html" in response.headers.get("content-type", "")
else:
assert "No PyPI upstream sources configured" in response.json()["detail"]
@pytest.mark.integration
def test_pypi_package_endpoint(self):
"""Test that /pypi/simple/{package}/ returns appropriate response."""
with httpx.Client(base_url=get_base_url(), timeout=30.0) as client:
response = client.get("/pypi/simple/requests/")
# Returns 200 if sources configured and package found,
# 404 if package not found, 503 if no sources
assert response.status_code in (200, 404, 503)
if response.status_code == 200:
assert "text/html" in response.headers.get("content-type", "")
elif response.status_code == 404:
assert "not found" in response.json()["detail"].lower()
else: # 503
assert "No PyPI upstream sources configured" in response.json()["detail"]
@pytest.mark.integration
def test_pypi_download_missing_upstream_param(self):
"""Test that /pypi/simple/{package}/{filename} requires upstream param."""
with httpx.Client(base_url=get_base_url(), timeout=30.0) as client:
response = client.get("/pypi/simple/requests/requests-2.31.0.tar.gz")
assert response.status_code == 400
assert "upstream" in response.json()["detail"].lower()
class TestPyPILinkRewriting:
"""Tests for URL rewriting in PyPI proxy responses."""
def test_rewrite_package_links(self):
"""Test that download links are rewritten to go through proxy."""
from app.pypi_proxy import _rewrite_package_links
html = '''
<html>
<body>
<a href="https://files.pythonhosted.org/packages/ab/cd/requests-2.31.0.tar.gz#sha256=abc123">requests-2.31.0.tar.gz</a>
<a href="https://files.pythonhosted.org/packages/ef/gh/requests-2.31.0-py3-none-any.whl#sha256=def456">requests-2.31.0-py3-none-any.whl</a>
</body>
</html>
'''
# upstream_base_url is used to resolve relative URLs (not needed here since URLs are absolute)
result = _rewrite_package_links(
html,
"http://localhost:8080",
"requests",
"https://pypi.org/simple/requests/"
)
# Links should be rewritten to go through our proxy
assert "/pypi/simple/requests/requests-2.31.0.tar.gz?upstream=" in result
assert "/pypi/simple/requests/requests-2.31.0-py3-none-any.whl?upstream=" in result
# Original URLs should be encoded in upstream param
assert "files.pythonhosted.org" in result
# Hash fragments should be preserved
assert "#sha256=abc123" in result
assert "#sha256=def456" in result
def test_rewrite_relative_links(self):
"""Test that relative URLs are resolved to absolute URLs."""
from app.pypi_proxy import _rewrite_package_links
# Artifactory-style relative URLs
html = '''
<html>
<body>
<a href="../../packages/ab/cd/requests-2.31.0.tar.gz#sha256=abc123">requests-2.31.0.tar.gz</a>
</body>
</html>
'''
result = _rewrite_package_links(
html,
"https://orchard.example.com",
"requests",
"https://artifactory.example.com/api/pypi/pypi-remote/simple/requests/"
)
# The relative URL should be resolved to absolute
# ../../packages/ab/cd/... from /api/pypi/pypi-remote/simple/requests/ resolves to /api/pypi/pypi-remote/packages/ab/cd/...
assert "upstream=https%3A%2F%2Fartifactory.example.com%2Fapi%2Fpypi%2Fpypi-remote%2Fpackages" in result
# Hash fragment should be preserved
assert "#sha256=abc123" in result
class TestPyPIPackageNormalization:
"""Tests for PyPI package name normalization."""
@pytest.mark.integration
def test_package_name_normalized(self):
"""Test that package names are normalized per PEP 503.
Different capitalizations/separators should all be valid paths.
The endpoint normalizes to lowercase with hyphens before lookup.
"""
with httpx.Client(base_url=get_base_url(), timeout=30.0) as client:
# Test various name formats - all should be valid endpoint paths
for package_name in ["Requests", "some_package", "some-package"]:
response = client.get(f"/pypi/simple/{package_name}/")
# 200 = found, 404 = not found, 503 = no sources configured
assert response.status_code in (200, 404, 503), \
f"Unexpected status {response.status_code} for {package_name}"
# Verify response is appropriate for the status code
if response.status_code == 200:
assert "text/html" in response.headers.get("content-type", "")
elif response.status_code == 503:
assert "No PyPI upstream sources configured" in response.json()["detail"]

View File

@@ -1,263 +0,0 @@
"""Tests for PyPI cache worker module."""
import os
import pytest
import re
from datetime import datetime, timedelta
from unittest.mock import MagicMock, patch
from uuid import uuid4
import httpx
def get_base_url():
"""Get the base URL for the Orchard server from environment."""
return os.environ.get("ORCHARD_TEST_URL", "http://localhost:8080")
class TestPyPICacheTaskModel:
"""Tests for PyPICacheTask model."""
def test_model_creation(self):
"""Test that PyPICacheTask model can be instantiated with explicit values."""
from app.models import PyPICacheTask
task = PyPICacheTask(
package_name="requests",
version_constraint=">=2.25.0",
depth=0,
status="pending",
attempts=0,
max_attempts=3,
)
assert task.package_name == "requests"
assert task.version_constraint == ">=2.25.0"
assert task.depth == 0
assert task.status == "pending"
assert task.attempts == 0
assert task.max_attempts == 3
def test_model_fields_exist(self):
"""Test that PyPICacheTask has all expected fields."""
from app.models import PyPICacheTask
# Create with minimal required field
task = PyPICacheTask(package_name="urllib3")
# Verify all expected attributes exist (SQLAlchemy defaults apply on flush)
assert hasattr(task, "status")
assert hasattr(task, "depth")
assert hasattr(task, "attempts")
assert hasattr(task, "max_attempts")
assert hasattr(task, "version_constraint")
assert hasattr(task, "parent_task_id")
assert hasattr(task, "triggered_by_artifact")
class TestEnqueueCacheTask:
"""Tests for enqueue_cache_task function."""
def test_normalize_package_name(self):
"""Test that package names are normalized per PEP 503."""
# Test the normalization pattern used in the worker
test_cases = [
("Requests", "requests"),
("typing_extensions", "typing-extensions"),
("some.package", "some-package"),
("UPPER_CASE", "upper-case"),
("mixed-Case_name", "mixed-case-name"),
]
for input_name, expected in test_cases:
normalized = re.sub(r"[-_.]+", "-", input_name).lower()
assert normalized == expected, f"Failed for {input_name}"
class TestCacheWorkerFunctions:
"""Tests for cache worker helper functions."""
def test_exponential_backoff_calculation(self):
"""Test that exponential backoff is calculated correctly."""
# The formula is: 30 * (2 ** (attempts - 1))
# Attempt 1 failed → 30s
# Attempt 2 failed → 60s
# Attempt 3 failed → 120s
def calc_backoff(attempts):
return 30 * (2 ** (attempts - 1))
assert calc_backoff(1) == 30
assert calc_backoff(2) == 60
assert calc_backoff(3) == 120
class TestPyPICacheAPIEndpoints:
"""Integration tests for PyPI cache API endpoints."""
@pytest.mark.integration
def test_cache_status_endpoint(self):
"""Test GET /pypi/cache/status returns queue statistics."""
with httpx.Client(base_url=get_base_url(), timeout=30.0) as client:
response = client.get("/pypi/cache/status")
assert response.status_code == 200
data = response.json()
assert "pending" in data
assert "in_progress" in data
assert "completed" in data
assert "failed" in data
# All values should be non-negative integers
assert isinstance(data["pending"], int)
assert isinstance(data["in_progress"], int)
assert isinstance(data["completed"], int)
assert isinstance(data["failed"], int)
assert data["pending"] >= 0
assert data["in_progress"] >= 0
assert data["completed"] >= 0
assert data["failed"] >= 0
@pytest.mark.integration
def test_cache_failed_endpoint(self):
"""Test GET /pypi/cache/failed returns list of failed tasks."""
with httpx.Client(base_url=get_base_url(), timeout=30.0) as client:
response = client.get("/pypi/cache/failed")
assert response.status_code == 200
data = response.json()
assert isinstance(data, list)
# If there are failed tasks, verify structure
if data:
task = data[0]
assert "id" in task
assert "package" in task
assert "error" in task
assert "attempts" in task
assert "depth" in task
@pytest.mark.integration
def test_cache_failed_with_limit(self):
"""Test GET /pypi/cache/failed respects limit parameter."""
with httpx.Client(base_url=get_base_url(), timeout=30.0) as client:
response = client.get("/pypi/cache/failed?limit=5")
assert response.status_code == 200
data = response.json()
assert isinstance(data, list)
assert len(data) <= 5
@pytest.mark.integration
def test_cache_retry_nonexistent_package(self):
"""Test POST /pypi/cache/retry/{package} returns 404 for unknown package."""
with httpx.Client(base_url=get_base_url(), timeout=30.0) as client:
# Use a random package name that definitely doesn't exist
response = client.post(f"/pypi/cache/retry/nonexistent-package-{uuid4().hex[:8]}")
assert response.status_code == 404
# Check for "no failed" or "not found" in error message
detail = response.json()["detail"].lower()
assert "no failed" in detail or "not found" in detail
@pytest.mark.integration
def test_cache_retry_all_endpoint(self):
"""Test POST /pypi/cache/retry-all returns success."""
with httpx.Client(base_url=get_base_url(), timeout=30.0) as client:
response = client.post("/pypi/cache/retry-all")
assert response.status_code == 200
data = response.json()
assert "count" in data
assert "message" in data
assert isinstance(data["count"], int)
assert data["count"] >= 0
class TestCacheTaskDeduplication:
"""Tests for cache task deduplication logic."""
def test_find_cached_package_returns_none_for_uncached(self):
"""Test that _find_cached_package returns None for uncached packages."""
# This is a unit test pattern - mock the database
from unittest.mock import MagicMock
mock_db = MagicMock()
mock_db.query.return_value.filter.return_value.first.return_value = None
from app.pypi_cache_worker import _find_cached_package
result = _find_cached_package(mock_db, "nonexistent-package")
assert result is None
class TestCacheWorkerConfiguration:
"""Tests for cache worker configuration."""
def test_config_settings_exist(self):
"""Test that PyPI cache config settings are available."""
from app.config import get_settings
settings = get_settings()
# Check that settings exist and have reasonable defaults
assert hasattr(settings, "pypi_cache_workers")
assert hasattr(settings, "pypi_cache_max_depth")
assert hasattr(settings, "pypi_cache_max_attempts")
# Check aliases work
assert settings.PYPI_CACHE_WORKERS == settings.pypi_cache_workers
assert settings.PYPI_CACHE_MAX_DEPTH == settings.pypi_cache_max_depth
assert settings.PYPI_CACHE_MAX_ATTEMPTS == settings.pypi_cache_max_attempts
def test_config_default_values(self):
"""Test that PyPI cache config has sensible defaults."""
from app.config import get_settings
settings = get_settings()
# These are the defaults from our implementation
assert settings.pypi_cache_workers == 5
assert settings.pypi_cache_max_depth == 10
assert settings.pypi_cache_max_attempts == 3
class TestFetchAndCachePackage:
"""Tests for _fetch_and_cache_package function."""
def test_result_structure_success(self):
"""Test that success result has correct structure."""
# Mock a successful result
result = {"success": True, "artifact_id": "abc123"}
assert result["success"] is True
assert "artifact_id" in result
def test_result_structure_failure(self):
"""Test that failure result has correct structure."""
# Mock a failure result
result = {"success": False, "error": "Package not found"}
assert result["success"] is False
assert "error" in result
class TestWorkerPoolLifecycle:
"""Tests for worker pool initialization and shutdown."""
def test_init_shutdown_cycle(self):
"""Test that worker pool can be initialized and shut down cleanly."""
from app.pypi_cache_worker import (
init_cache_worker_pool,
shutdown_cache_worker_pool,
_cache_worker_pool,
_cache_worker_running,
)
# Note: We can't fully test this in isolation because the module
# has global state and may conflict with the running server.
# These tests verify the function signatures work.
# The pool should be initialized by main.py on startup
# We just verify the functions are callable
assert callable(init_cache_worker_pool)
assert callable(shutdown_cache_worker_pool)

View File

@@ -91,6 +91,7 @@ class TestUpstreamSourceModel:
assert hasattr(source, 'source_type')
assert hasattr(source, 'url')
assert hasattr(source, 'enabled')
assert hasattr(source, 'is_public')
assert hasattr(source, 'auth_type')
assert hasattr(source, 'username')
assert hasattr(source, 'password_encrypted')
@@ -106,6 +107,7 @@ class TestUpstreamSourceModel:
source_type="npm",
url="https://npm.example.com",
enabled=True,
is_public=False,
auth_type="basic",
username="admin",
priority=50,
@@ -114,6 +116,7 @@ class TestUpstreamSourceModel:
assert source.source_type == "npm"
assert source.url == "https://npm.example.com"
assert source.enabled is True
assert source.is_public is False
assert source.auth_type == "basic"
assert source.username == "admin"
assert source.priority == 50
@@ -257,6 +260,7 @@ class TestUpstreamSourceSchemas:
source_type="npm",
url="https://npm.example.com",
enabled=True,
is_public=False,
auth_type="basic",
username="admin",
password="secret",
@@ -277,6 +281,7 @@ class TestUpstreamSourceSchemas:
)
assert source.source_type == "generic"
assert source.enabled is False
assert source.is_public is True
assert source.auth_type == "none"
assert source.priority == 100
@@ -573,6 +578,7 @@ class TestUpstreamClientSourceMatching:
name="npm-public",
url="https://registry.npmjs.org",
enabled=True,
is_public=True,
auth_type="none",
priority=100,
)
@@ -597,6 +603,7 @@ class TestUpstreamClientSourceMatching:
name="npm-private",
url="https://registry.npmjs.org",
enabled=True,
is_public=False,
auth_type="basic",
priority=50,
)
@@ -604,6 +611,7 @@ class TestUpstreamClientSourceMatching:
name="npm-public",
url="https://registry.npmjs.org",
enabled=True,
is_public=True,
auth_type="none",
priority=100,
)
@@ -703,6 +711,89 @@ class TestUpstreamClientAuthHeaders:
assert auth is None
class TestUpstreamClientAirGapMode:
"""Tests for air-gap mode enforcement."""
def test_airgap_blocks_public_source(self):
"""Test that air-gap mode blocks public sources."""
from app.models import UpstreamSource, CacheSettings
from app.upstream import UpstreamClient, AirGapError
source = UpstreamSource(
name="npm-public",
url="https://registry.npmjs.org",
enabled=True,
is_public=True,
auth_type="none",
priority=100,
)
settings = CacheSettings(allow_public_internet=False)
client = UpstreamClient(sources=[source], cache_settings=settings)
with pytest.raises(AirGapError) as exc_info:
client.fetch("https://registry.npmjs.org/lodash")
assert "Air-gap mode enabled" in str(exc_info.value)
assert "public source" in str(exc_info.value)
def test_airgap_blocks_unmatched_url(self):
"""Test that air-gap mode blocks URLs not matching any source."""
from app.models import CacheSettings
from app.upstream import UpstreamClient, AirGapError
settings = CacheSettings(allow_public_internet=False)
client = UpstreamClient(sources=[], cache_settings=settings)
with pytest.raises(AirGapError) as exc_info:
client.fetch("https://example.com/file.tgz")
assert "Air-gap mode enabled" in str(exc_info.value)
assert "does not match any configured" in str(exc_info.value)
def test_airgap_allows_private_source(self):
"""Test that air-gap mode allows private sources."""
from app.models import UpstreamSource, CacheSettings
from app.upstream import UpstreamClient, SourceDisabledError
source = UpstreamSource(
name="npm-private",
url="https://npm.internal.corp",
enabled=False, # Disabled, but would pass air-gap check
is_public=False,
auth_type="none",
priority=100,
)
settings = CacheSettings(allow_public_internet=False)
client = UpstreamClient(sources=[source], cache_settings=settings)
# Should fail due to disabled source, not air-gap
with pytest.raises(SourceDisabledError):
client.fetch("https://npm.internal.corp/package.tgz")
def test_allow_public_internet_true(self):
"""Test that public internet is allowed when setting is true."""
from app.models import UpstreamSource, CacheSettings
from app.upstream import UpstreamClient, SourceDisabledError
source = UpstreamSource(
name="npm-public",
url="https://registry.npmjs.org",
enabled=False, # Disabled
is_public=True,
auth_type="none",
priority=100,
)
settings = CacheSettings(allow_public_internet=True)
client = UpstreamClient(sources=[source], cache_settings=settings)
# Should fail due to disabled source, not air-gap
with pytest.raises(SourceDisabledError):
client.fetch("https://registry.npmjs.org/lodash")
class TestUpstreamClientSourceDisabled:
"""Tests for disabled source handling."""
@@ -715,6 +806,7 @@ class TestUpstreamClientSourceDisabled:
name="npm-public",
url="https://registry.npmjs.org",
enabled=False,
is_public=True,
auth_type="none",
priority=100,
)
@@ -887,6 +979,13 @@ class TestUpstreamExceptions:
assert error.status_code == 404
assert error.response_headers == {"x-custom": "value"}
def test_airgap_error(self):
"""Test AirGapError."""
from app.upstream import AirGapError
error = AirGapError("Blocked by air-gap")
assert "Blocked by air-gap" in str(error)
def test_source_not_found_error(self):
"""Test SourceNotFoundError."""
from app.upstream import SourceNotFoundError
@@ -1321,6 +1420,7 @@ class TestUpstreamSourcesAdminAPI:
"source_type": "generic",
"url": "https://example.com/packages",
"enabled": False,
"is_public": False,
"auth_type": "none",
"priority": 200,
},
@@ -1332,6 +1432,7 @@ class TestUpstreamSourcesAdminAPI:
assert data["source_type"] == "generic"
assert data["url"] == "https://example.com/packages"
assert data["enabled"] is False
assert data["is_public"] is False
assert data["priority"] == 200
assert "id" in data
@@ -1351,6 +1452,7 @@ class TestUpstreamSourcesAdminAPI:
"source_type": "npm",
"url": "https://npm.internal.corp",
"enabled": False,
"is_public": False,
"auth_type": "basic",
"username": "reader",
"password": "secret123",
@@ -1856,6 +1958,7 @@ class TestEnvVarUpstreamSourcesParsing:
# Check defaults
assert test_source.source_type == "generic"
assert test_source.enabled is True
assert test_source.is_public is True
assert test_source.auth_type == "none"
assert test_source.priority == 100
finally:
@@ -1878,6 +1981,7 @@ class TestEnvSourceToResponse:
url="https://example.com",
source_type="npm",
enabled=True,
is_public=False,
auth_type="basic",
username="user",
password="pass",
@@ -1888,6 +1992,7 @@ class TestEnvSourceToResponse:
assert source.url == "https://example.com"
assert source.source_type == "npm"
assert source.enabled is True
assert source.is_public is False
assert source.auth_type == "basic"
assert source.username == "user"
assert source.password == "pass"

View File

@@ -1,251 +0,0 @@
# PyPI Cache Robustness Design
**Date:** 2026-02-02
**Status:** Approved
**Branch:** fix/pypi-proxy-timeout
## Problem
The current PyPI proxy proactive caching has reliability issues:
- Unbounded thread spawning for each dependency
- Silent failures (logged but not tracked or retried)
- No visibility into cache completeness
- Deps-of-deps often missing due to untracked failures
## Solution
Database-backed task queue with managed worker pool, automatic retries, and visibility API.
---
## Data Model
New table `pypi_cache_tasks`:
```sql
CREATE TABLE pypi_cache_tasks (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
-- What to cache
package_name VARCHAR(255) NOT NULL,
version_constraint VARCHAR(255),
-- Origin tracking
parent_task_id UUID REFERENCES pypi_cache_tasks(id) ON DELETE SET NULL,
depth INTEGER NOT NULL DEFAULT 0,
triggered_by_artifact VARCHAR(64),
-- Status
status VARCHAR(20) NOT NULL DEFAULT 'pending',
attempts INTEGER NOT NULL DEFAULT 0,
max_attempts INTEGER NOT NULL DEFAULT 3,
-- Results
cached_artifact_id VARCHAR(64),
error_message TEXT,
-- Timing
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
started_at TIMESTAMP WITH TIME ZONE,
completed_at TIMESTAMP WITH TIME ZONE,
next_retry_at TIMESTAMP WITH TIME ZONE
);
-- Indexes
CREATE INDEX idx_pypi_cache_tasks_status_retry ON pypi_cache_tasks(status, next_retry_at);
CREATE INDEX idx_pypi_cache_tasks_package_status ON pypi_cache_tasks(package_name, status);
CREATE INDEX idx_pypi_cache_tasks_parent ON pypi_cache_tasks(parent_task_id);
-- Constraints
ALTER TABLE pypi_cache_tasks ADD CONSTRAINT check_task_status
CHECK (status IN ('pending', 'in_progress', 'completed', 'failed'));
```
---
## Worker Architecture
### Thread Pool (5 workers default)
```python
_cache_worker_pool: ThreadPoolExecutor = None
_cache_worker_running: bool = False
def init_cache_worker_pool(max_workers: int = 5):
global _cache_worker_pool, _cache_worker_running
_cache_worker_pool = ThreadPoolExecutor(max_workers=max_workers, thread_name_prefix="pypi-cache-")
_cache_worker_running = True
threading.Thread(target=_cache_dispatcher_loop, daemon=True).start()
```
### Dispatcher Loop
- Polls DB every 2 seconds when idle
- Fetches batch of 10 ready tasks
- Marks tasks in_progress before submitting to pool
- Orders by depth (shallow first) then FIFO
### Task Processing
1. Dedup check - skip if package already cached
2. Dedup check - skip if pending/in_progress task exists for same package
3. Depth check - fail if >= 10 levels deep
4. Fetch package index page
5. Download best matching file (prefer wheels)
6. Store artifact, extract dependencies
7. Queue child tasks for each dependency
8. Mark completed or handle failure
---
## Retry Logic
Exponential backoff with 3 attempts:
| Attempt | Backoff |
|---------|---------|
| 1 fails | 30 seconds |
| 2 fails | 60 seconds |
| 3 fails | Permanent failure |
```python
backoff_seconds = 30 * (2 ** (attempts - 1))
task.next_retry_at = datetime.utcnow() + timedelta(seconds=backoff_seconds)
```
---
## API Endpoints
| Endpoint | Method | Purpose |
|----------|--------|---------|
| `/pypi/cache/status` | GET | Queue health summary |
| `/pypi/cache/failed` | GET | List failed tasks with errors |
| `/pypi/cache/retry/{package}` | POST | Retry single failed package |
| `/pypi/cache/retry-all` | POST | Retry all failed packages |
### Response Examples
**GET /pypi/cache/status**
```json
{
"pending": 12,
"in_progress": 3,
"completed": 847,
"failed": 5
}
```
**GET /pypi/cache/failed**
```json
[
{
"package": "some-obscure-pkg",
"error": "Timeout connecting to upstream",
"attempts": 3,
"failed_at": "2026-02-02T10:30:00Z"
}
]
```
---
## Integration Points
### Replace Thread Spawning (pypi_proxy.py)
```python
# OLD: _start_background_dependency_caching(base_url, unique_deps)
# NEW:
for dep_name, dep_version in unique_deps:
_enqueue_cache_task(
db,
package_name=dep_name,
version_constraint=dep_version,
parent_task_id=None,
depth=0,
triggered_by_artifact=sha256,
)
```
### App Startup (main.py)
```python
@app.on_event("startup")
async def startup():
init_cache_worker_pool(max_workers=settings.PYPI_CACHE_WORKERS)
@app.on_event("shutdown")
async def shutdown():
shutdown_cache_worker_pool()
```
### Configuration (config.py)
```python
PYPI_CACHE_WORKERS = int(os.getenv("ORCHARD_PYPI_CACHE_WORKERS", "5"))
PYPI_CACHE_MAX_DEPTH = int(os.getenv("ORCHARD_PYPI_CACHE_MAX_DEPTH", "10"))
PYPI_CACHE_MAX_ATTEMPTS = int(os.getenv("ORCHARD_PYPI_CACHE_MAX_ATTEMPTS", "3"))
```
---
## Files to Create/Modify
| File | Action |
|------|--------|
| `migrations/0XX_pypi_cache_tasks.sql` | Create - new table |
| `backend/app/models.py` | Modify - add PyPICacheTask model |
| `backend/app/pypi_cache_worker.py` | Create - worker pool + processing |
| `backend/app/pypi_proxy.py` | Modify - replace threads, add API |
| `backend/app/main.py` | Modify - init worker on startup |
| `backend/app/config.py` | Modify - add config variables |
| `backend/tests/test_pypi_cache_worker.py` | Create - unit tests |
| `backend/tests/integration/test_pypi_cache_api.py` | Create - API tests |
---
## Deduplication Strategy
### At Task Creation Time
```python
def _enqueue_cache_task(db, package_name, ...):
# Check for existing pending/in_progress task
existing_task = db.query(PyPICacheTask).filter(
PyPICacheTask.package_name == package_name,
PyPICacheTask.status.in_(["pending", "in_progress"])
).first()
if existing_task:
return existing_task
# Check if already cached
if _find_cached_package(db, package_name):
return None
# Create new task
...
```
### At Processing Time (safety check)
```python
def _process_cache_task(task_id):
# Double-check in case of race
if _find_cached_package(db, task.package_name):
_mark_task_completed(db, task, cached_artifact_id=existing.artifact_id)
return
```
---
## Success Criteria
- [ ] No unbounded thread creation
- [ ] All dependency caching attempts tracked in database
- [ ] Failed tasks automatically retry with backoff
- [ ] API provides visibility into queue status
- [ ] Manual retry capability for failed packages
- [ ] Existing pip install workflow unchanged (transparent)
- [ ] Tests cover worker, retry, and API functionality

View File

@@ -46,6 +46,8 @@ import {
UpstreamSourceCreate,
UpstreamSourceUpdate,
UpstreamSourceTestResult,
CacheSettings,
CacheSettingsUpdate,
} from './types';
const API_BASE = '/api/v1';
@@ -746,3 +748,21 @@ export async function testUpstreamSource(id: string): Promise<UpstreamSourceTest
});
return handleResponse<UpstreamSourceTestResult>(response);
}
// Cache Settings Admin API
export async function getCacheSettings(): Promise<CacheSettings> {
const response = await fetch(`${API_BASE}/admin/cache-settings`, {
credentials: 'include',
});
return handleResponse<CacheSettings>(response);
}
export async function updateCacheSettings(data: CacheSettingsUpdate): Promise<CacheSettings> {
const response = await fetch(`${API_BASE}/admin/cache-settings`, {
method: 'PUT',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify(data),
credentials: 'include',
});
return handleResponse<CacheSettings>(response);
}

View File

@@ -272,7 +272,7 @@
.footer {
background: var(--bg-secondary);
border-top: 1px solid var(--border-primary);
padding: 12px 0;
padding: 24px 0;
}
.footer-content {

View File

@@ -34,6 +34,74 @@
margin-bottom: 1rem;
}
/* Settings Section */
.settings-section {
background: var(--bg-secondary);
border: 1px solid var(--border-color);
border-radius: 8px;
padding: 1.5rem;
margin-bottom: 2rem;
}
.settings-grid {
display: flex;
flex-direction: column;
gap: 1rem;
}
.setting-item {
display: flex;
justify-content: space-between;
align-items: center;
padding: 1rem;
background: var(--bg-primary);
border: 1px solid var(--border-color);
border-radius: 4px;
}
.toggle-label {
display: flex;
flex-direction: column;
gap: 0.25rem;
}
.setting-name {
font-weight: 500;
color: var(--text-primary);
display: flex;
align-items: center;
gap: 0.5rem;
}
.setting-description {
font-size: 0.85rem;
color: var(--text-secondary);
}
.toggle-button {
padding: 0.5rem 1rem;
border: none;
border-radius: 4px;
cursor: pointer;
font-weight: 500;
min-width: 100px;
}
.toggle-button.on {
background-color: #28a745;
color: white;
}
.toggle-button.off {
background-color: #dc3545;
color: white;
}
.toggle-button:disabled {
opacity: 0.6;
cursor: not-allowed;
}
/* Sources Section */
.sources-section {
background: var(--bg-secondary);
@@ -65,7 +133,7 @@
.sources-table th,
.sources-table td {
padding: 0.75rem 1rem;
text-align: center;
text-align: left;
border-bottom: 1px solid var(--border-color);
}
@@ -88,12 +156,6 @@
.source-name {
font-weight: 500;
color: var(--text-primary);
white-space: nowrap;
}
/* Name column should be left-aligned */
.sources-table td:first-child {
text-align: left;
}
.url-cell {
@@ -103,10 +165,10 @@
overflow: hidden;
text-overflow: ellipsis;
white-space: nowrap;
text-align: left;
}
/* Badges */
.public-badge,
.env-badge,
.status-badge {
display: inline-block;
@@ -117,6 +179,11 @@
margin-left: 0.5rem;
}
.public-badge {
background-color: #e3f2fd;
color: #1976d2;
}
.env-badge {
background-color: #fff3e0;
color: #e65100;
@@ -132,12 +199,6 @@
color: #c62828;
}
.coming-soon-badge {
color: #9e9e9e;
font-style: italic;
font-size: 0.85em;
}
/* Actions */
.actions-cell {
white-space: nowrap;
@@ -151,67 +212,18 @@
margin-right: 0;
}
.test-cell {
text-align: center;
width: 2rem;
.test-result {
display: inline-block;
margin-left: 0.5rem;
font-size: 0.85rem;
}
.test-dot {
font-size: 1rem;
cursor: default;
}
.test-dot.success {
.test-result.success {
color: #2e7d32;
}
.test-dot.failure {
.test-result.failure {
color: #c62828;
cursor: pointer;
}
.test-dot.failure:hover {
color: #b71c1c;
}
.test-dot.testing {
color: #1976d2;
animation: pulse 1s infinite;
}
@keyframes pulse {
0%, 100% { opacity: 1; }
50% { opacity: 0.4; }
}
/* Error Modal */
.error-modal-content {
background: var(--bg-primary);
border-radius: 8px;
padding: 2rem;
width: 100%;
max-width: 500px;
}
.error-modal-content h3 {
margin-top: 0;
color: #c62828;
}
.error-modal-content .error-details {
background: var(--bg-tertiary);
padding: 1rem;
border-radius: 4px;
font-family: monospace;
font-size: 0.9rem;
word-break: break-word;
white-space: pre-wrap;
}
.error-modal-content .modal-actions {
display: flex;
justify-content: flex-end;
margin-top: 1.5rem;
}
/* Buttons */
@@ -255,22 +267,10 @@
}
.btn-sm {
padding: 0.25rem 0.75rem;
padding: 0.25rem 0.5rem;
font-size: 0.8rem;
}
.btn-secondary {
background-color: var(--bg-tertiary);
border-color: var(--border-color);
color: var(--text-primary);
font-weight: 500;
}
.btn-secondary:hover {
background-color: var(--bg-secondary);
border-color: var(--text-secondary);
}
.empty-message {
color: var(--text-secondary);
font-style: italic;
@@ -364,14 +364,9 @@
.form-actions {
display: flex;
justify-content: space-between;
align-items: center;
justify-content: flex-end;
gap: 0.5rem;
margin-top: 1.5rem;
padding-top: 1rem;
border-top: 1px solid var(--border-color);
}
.form-actions-right {
display: flex;
gap: 0.5rem;
}

View File

@@ -7,12 +7,13 @@ import {
updateUpstreamSource,
deleteUpstreamSource,
testUpstreamSource,
getCacheSettings,
updateCacheSettings,
} from '../api';
import { UpstreamSource, SourceType, AuthType } from '../types';
import { UpstreamSource, CacheSettings, SourceType, AuthType } from '../types';
import './AdminCachePage.css';
const SOURCE_TYPES: SourceType[] = ['npm', 'pypi', 'maven', 'docker', 'helm', 'nuget', 'deb', 'rpm', 'generic'];
const SUPPORTED_SOURCE_TYPES: Set<SourceType> = new Set(['pypi', 'generic']);
const AUTH_TYPES: AuthType[] = ['none', 'basic', 'bearer', 'api_key'];
function AdminCachePage() {
@@ -24,6 +25,11 @@ function AdminCachePage() {
const [loadingSources, setLoadingSources] = useState(true);
const [sourcesError, setSourcesError] = useState<string | null>(null);
// Cache settings state
const [settings, setSettings] = useState<CacheSettings | null>(null);
const [loadingSettings, setLoadingSettings] = useState(true);
const [settingsError, setSettingsError] = useState<string | null>(null);
// Create/Edit form state
const [showForm, setShowForm] = useState(false);
const [editingSource, setEditingSource] = useState<UpstreamSource | null>(null);
@@ -32,6 +38,7 @@ function AdminCachePage() {
source_type: 'generic' as SourceType,
url: '',
enabled: true,
is_public: true,
auth_type: 'none' as AuthType,
username: '',
password: '',
@@ -47,13 +54,12 @@ function AdminCachePage() {
// Delete confirmation state
const [deletingId, setDeletingId] = useState<string | null>(null);
// Settings update state
const [updatingSettings, setUpdatingSettings] = useState(false);
// Success message
const [successMessage, setSuccessMessage] = useState<string | null>(null);
// Error modal state
const [showErrorModal, setShowErrorModal] = useState(false);
const [selectedError, setSelectedError] = useState<{ sourceName: string; error: string } | null>(null);
useEffect(() => {
if (!authLoading && !user) {
navigate('/login', { state: { from: '/admin/cache' } });
@@ -63,6 +69,7 @@ function AdminCachePage() {
useEffect(() => {
if (user && user.is_admin) {
loadSources();
loadSettings();
}
}, [user]);
@@ -86,6 +93,19 @@ function AdminCachePage() {
}
}
async function loadSettings() {
setLoadingSettings(true);
setSettingsError(null);
try {
const data = await getCacheSettings();
setSettings(data);
} catch (err) {
setSettingsError(err instanceof Error ? err.message : 'Failed to load settings');
} finally {
setLoadingSettings(false);
}
}
function openCreateForm() {
setEditingSource(null);
setFormData({
@@ -93,6 +113,7 @@ function AdminCachePage() {
source_type: 'generic',
url: '',
enabled: true,
is_public: true,
auth_type: 'none',
username: '',
password: '',
@@ -109,6 +130,7 @@ function AdminCachePage() {
source_type: source.source_type,
url: source.url,
enabled: source.enabled,
is_public: source.is_public,
auth_type: source.auth_type,
username: source.username || '',
password: '',
@@ -133,8 +155,6 @@ function AdminCachePage() {
setFormError(null);
try {
let savedSourceId: string | null = null;
if (editingSource) {
// Update existing source
await updateUpstreamSource(editingSource.id, {
@@ -142,35 +162,30 @@ function AdminCachePage() {
source_type: formData.source_type,
url: formData.url.trim(),
enabled: formData.enabled,
is_public: formData.is_public,
auth_type: formData.auth_type,
username: formData.username.trim() || undefined,
password: formData.password || undefined,
priority: formData.priority,
});
savedSourceId = editingSource.id;
setSuccessMessage('Source updated successfully');
} else {
// Create new source
const newSource = await createUpstreamSource({
await createUpstreamSource({
name: formData.name.trim(),
source_type: formData.source_type,
url: formData.url.trim(),
enabled: formData.enabled,
is_public: formData.is_public,
auth_type: formData.auth_type,
username: formData.username.trim() || undefined,
password: formData.password || undefined,
priority: formData.priority,
});
savedSourceId = newSource.id;
setSuccessMessage('Source created successfully');
}
setShowForm(false);
await loadSources();
// Auto-test the source after save
if (savedSourceId) {
testSourceById(savedSourceId);
}
} catch (err) {
setFormError(err instanceof Error ? err.message : 'Failed to save source');
} finally {
@@ -196,28 +211,24 @@ function AdminCachePage() {
}
async function handleTest(source: UpstreamSource) {
testSourceById(source.id);
}
async function testSourceById(sourceId: string) {
setTestingId(sourceId);
setTestResults((prev) => ({ ...prev, [sourceId]: { success: true, message: 'Testing...' } }));
setTestingId(source.id);
setTestResults((prev) => ({ ...prev, [source.id]: { success: true, message: 'Testing...' } }));
try {
const result = await testUpstreamSource(sourceId);
const result = await testUpstreamSource(source.id);
setTestResults((prev) => ({
...prev,
[sourceId]: {
[source.id]: {
success: result.success,
message: result.success
? `OK (${result.elapsed_ms}ms)`
? `Connected (${result.elapsed_ms}ms)`
: result.error || `HTTP ${result.status_code}`,
},
}));
} catch (err) {
setTestResults((prev) => ({
...prev,
[sourceId]: {
[source.id]: {
success: false,
message: err instanceof Error ? err.message : 'Test failed',
},
@@ -227,9 +238,30 @@ function AdminCachePage() {
}
}
function showError(sourceName: string, error: string) {
setSelectedError({ sourceName, error });
setShowErrorModal(true);
async function handleSettingsToggle(field: 'allow_public_internet' | 'auto_create_system_projects') {
if (!settings) return;
// Check if env override is active
const isOverridden =
(field === 'allow_public_internet' && settings.allow_public_internet_env_override !== null) ||
(field === 'auto_create_system_projects' && settings.auto_create_system_projects_env_override !== null);
if (isOverridden) {
alert('This setting is overridden by an environment variable and cannot be changed via UI.');
return;
}
setUpdatingSettings(true);
try {
const update = { [field]: !settings[field] };
const newSettings = await updateCacheSettings(update);
setSettings(newSettings);
setSuccessMessage(`Setting "${field}" updated`);
} catch (err) {
setSettingsError(err instanceof Error ? err.message : 'Failed to update settings');
} finally {
setUpdatingSettings(false);
}
}
if (authLoading) {
@@ -246,13 +278,71 @@ function AdminCachePage() {
return (
<div className="admin-cache-page">
<h1>Upstream Sources</h1>
<h1>Cache Management</h1>
{successMessage && <div className="success-message">{successMessage}</div>}
{/* Cache Settings Section */}
<section className="settings-section">
<h2>Global Settings</h2>
{loadingSettings ? (
<p>Loading settings...</p>
) : settingsError ? (
<div className="error-message">{settingsError}</div>
) : settings ? (
<div className="settings-grid">
<div className="setting-item">
<label className="toggle-label">
<span className="setting-name">
Allow Public Internet
{settings.allow_public_internet_env_override !== null && (
<span className="env-badge" title="Overridden by environment variable">
ENV
</span>
)}
</span>
<span className="setting-description">
When disabled (air-gap mode), requests to public sources are blocked.
</span>
</label>
<button
className={`toggle-button ${settings.allow_public_internet ? 'on' : 'off'}`}
onClick={() => handleSettingsToggle('allow_public_internet')}
disabled={updatingSettings || settings.allow_public_internet_env_override !== null}
>
{settings.allow_public_internet ? 'Enabled' : 'Disabled'}
</button>
</div>
<div className="setting-item">
<label className="toggle-label">
<span className="setting-name">
Auto-create System Projects
{settings.auto_create_system_projects_env_override !== null && (
<span className="env-badge" title="Overridden by environment variable">
ENV
</span>
)}
</span>
<span className="setting-description">
Automatically create system projects (_npm, _pypi, etc.) on first cache request.
</span>
</label>
<button
className={`toggle-button ${settings.auto_create_system_projects ? 'on' : 'off'}`}
onClick={() => handleSettingsToggle('auto_create_system_projects')}
disabled={updatingSettings || settings.auto_create_system_projects_env_override !== null}
>
{settings.auto_create_system_projects ? 'Enabled' : 'Disabled'}
</button>
</div>
</div>
) : null}
</section>
{/* Upstream Sources Section */}
<section className="sources-section">
<div className="section-header">
<h2>Upstream Sources</h2>
<button className="btn btn-primary" onClick={openCreateForm}>
Add Source
</button>
@@ -273,7 +363,7 @@ function AdminCachePage() {
<th>URL</th>
<th>Priority</th>
<th>Status</th>
<th>Test</th>
<th>Source</th>
<th>Actions</th>
</tr>
</thead>
@@ -282,50 +372,51 @@ function AdminCachePage() {
<tr key={source.id} className={source.enabled ? '' : 'disabled-row'}>
<td>
<span className="source-name">{source.name}</span>
{source.source === 'env' && (
<span className="env-badge" title="Defined via environment variable">ENV</span>
)}
{source.is_public && <span className="public-badge">Public</span>}
</td>
<td>
{source.source_type}
{!SUPPORTED_SOURCE_TYPES.has(source.source_type) && (
<span className="coming-soon-badge"> (coming soon)</span>
)}
</td>
<td className="url-cell" title={source.url}>{source.url}</td>
<td>{source.source_type}</td>
<td className="url-cell">{source.url}</td>
<td>{source.priority}</td>
<td>
<span className={`status-badge ${source.enabled ? 'enabled' : 'disabled'}`}>
{source.enabled ? 'Enabled' : 'Disabled'}
</span>
</td>
<td className="test-cell">
{testingId === source.id ? (
<span className="test-dot testing" title="Testing..."></span>
) : testResults[source.id] ? (
testResults[source.id].success ? (
<span className="test-dot success" title={testResults[source.id].message}></span>
<td>
{source.source === 'env' ? (
<span className="env-badge" title="Defined via environment variable">
ENV
</span>
) : (
<span
className="test-dot failure"
title="Click to see error"
onClick={() => showError(source.name, testResults[source.id].message)}
></span>
)
) : null}
'Database'
)}
</td>
<td className="actions-cell">
<button
className="btn btn-sm btn-secondary"
className="btn btn-sm"
onClick={() => handleTest(source)}
disabled={testingId === source.id}
>
Test
{testingId === source.id ? 'Testing...' : 'Test'}
</button>
{source.source !== 'env' && (
<button className="btn btn-sm btn-secondary" onClick={() => openEditForm(source)}>
<>
<button className="btn btn-sm" onClick={() => openEditForm(source)}>
Edit
</button>
<button
className="btn btn-sm btn-danger"
onClick={() => handleDelete(source)}
disabled={deletingId === source.id}
>
{deletingId === source.id ? 'Deleting...' : 'Delete'}
</button>
</>
)}
{testResults[source.id] && (
<span className={`test-result ${testResults[source.id].success ? 'success' : 'failure'}`}>
{testResults[source.id].message}
</span>
)}
</td>
</tr>
@@ -365,7 +456,7 @@ function AdminCachePage() {
>
{SOURCE_TYPES.map((type) => (
<option key={type} value={type}>
{type}{!SUPPORTED_SOURCE_TYPES.has(type) ? ' (coming soon)' : ''}
{type}
</option>
))}
</select>
@@ -407,6 +498,16 @@ function AdminCachePage() {
Enabled
</label>
</div>
<div className="form-group checkbox-group">
<label>
<input
type="checkbox"
checked={formData.is_public}
onChange={(e) => setFormData({ ...formData, is_public: e.target.checked })}
/>
Public Internet Source
</label>
</div>
</div>
<div className="form-group">
@@ -461,20 +562,6 @@ function AdminCachePage() {
)}
<div className="form-actions">
{editingSource && (
<button
type="button"
className="btn btn-danger"
onClick={() => {
handleDelete(editingSource);
setShowForm(false);
}}
disabled={deletingId === editingSource.id}
>
{deletingId === editingSource.id ? 'Deleting...' : 'Delete'}
</button>
)}
<div className="form-actions-right">
<button type="button" className="btn" onClick={() => setShowForm(false)}>
Cancel
</button>
@@ -482,26 +569,10 @@ function AdminCachePage() {
{isSaving ? 'Saving...' : editingSource ? 'Update' : 'Create'}
</button>
</div>
</div>
</form>
</div>
</div>
)}
{/* Error Details Modal */}
{showErrorModal && selectedError && (
<div className="modal-overlay" onClick={() => setShowErrorModal(false)}>
<div className="error-modal-content" onClick={(e) => e.stopPropagation()}>
<h3>Connection Error: {selectedError.sourceName}</h3>
<div className="error-details">{selectedError.error}</div>
<div className="modal-actions">
<button className="btn" onClick={() => setShowErrorModal(false)}>
Close
</button>
</div>
</div>
</div>
)}
</div>
);
}

View File

@@ -249,7 +249,7 @@ function Home() {
key: 'created_by',
header: 'Owner',
className: 'cell-owner',
render: (project) => project.team_name || project.created_by,
render: (project) => project.created_by,
},
...(user
? [

View File

@@ -642,11 +642,6 @@ tr:hover .copy-btn {
padding: 20px;
}
/* Ensure file modal needs higher z-index when opened from deps modal */
.modal-overlay:has(.ensure-file-modal) {
z-index: 1100;
}
.ensure-file-modal {
background: var(--bg-secondary);
border: 1px solid var(--border-primary);
@@ -798,194 +793,4 @@ tr:hover .copy-btn {
.ensure-file-modal {
max-height: 90vh;
}
.action-menu-dropdown {
right: 0;
left: auto;
}
}
/* Header upload button */
.header-upload-btn {
margin-left: auto;
}
/* Tag/Version cell */
.tag-version-cell {
display: flex;
flex-direction: column;
gap: 4px;
}
.tag-version-cell .version-badge {
font-size: 0.75rem;
color: var(--text-muted);
}
/* Icon buttons */
.btn-icon {
display: flex;
align-items: center;
justify-content: center;
width: 32px;
height: 32px;
padding: 0;
background: transparent;
border: 1px solid transparent;
border-radius: var(--radius-sm);
color: var(--text-secondary);
cursor: pointer;
transition: all var(--transition-fast);
}
.btn-icon:hover {
background: var(--bg-hover);
color: var(--text-primary);
}
/* Action menu */
.action-buttons {
display: flex;
align-items: center;
gap: 4px;
}
.action-menu {
position: relative;
}
/* Action menu backdrop for click-outside */
.action-menu-backdrop {
position: fixed;
top: 0;
left: 0;
right: 0;
bottom: 0;
z-index: 999;
}
.action-menu-dropdown {
position: fixed;
z-index: 1000;
min-width: 180px;
padding: 4px 0;
background: var(--bg-secondary);
border: 1px solid var(--border-primary);
border-radius: var(--radius-md);
box-shadow: 0 4px 12px rgba(0, 0, 0, 0.15);
}
.action-menu-dropdown button {
display: block;
width: 100%;
padding: 8px 12px;
background: none;
border: none;
text-align: left;
font-size: 0.875rem;
color: var(--text-primary);
cursor: pointer;
transition: background var(--transition-fast);
}
.action-menu-dropdown button:hover {
background: var(--bg-hover);
}
/* Upload Modal */
.upload-modal,
.create-tag-modal {
background: var(--bg-secondary);
border-radius: var(--radius-lg);
width: 90%;
max-width: 500px;
max-height: 90vh;
overflow: hidden;
}
.modal-header {
display: flex;
align-items: center;
justify-content: space-between;
padding: 16px 20px;
border-bottom: 1px solid var(--border-primary);
}
.modal-header h3 {
margin: 0;
font-size: 1.125rem;
font-weight: 600;
}
.modal-body {
padding: 20px;
}
.modal-description {
margin-bottom: 16px;
color: var(--text-secondary);
font-size: 0.875rem;
}
.modal-actions {
display: flex;
justify-content: flex-end;
gap: 12px;
margin-top: 20px;
padding-top: 16px;
border-top: 1px solid var(--border-primary);
}
/* Dependencies Modal */
.deps-modal {
background: var(--bg-secondary);
border-radius: var(--radius-lg);
width: 90%;
max-width: 600px;
max-height: 80vh;
overflow: hidden;
display: flex;
flex-direction: column;
}
.deps-modal .modal-body {
overflow-y: auto;
flex: 1;
}
.deps-modal-controls {
display: flex;
gap: 8px;
margin-bottom: 16px;
}
/* Artifact ID Modal */
.artifact-id-modal {
background: var(--bg-secondary);
border-radius: var(--radius-lg);
width: 90%;
max-width: 500px;
}
.artifact-id-display {
display: flex;
align-items: center;
gap: 12px;
padding: 16px;
background: var(--bg-tertiary);
border-radius: var(--radius-md);
border: 1px solid var(--border-primary);
}
.artifact-id-display code {
font-family: 'JetBrains Mono', 'Fira Code', 'Consolas', monospace;
font-size: 0.8125rem;
color: var(--text-primary);
word-break: break-all;
flex: 1;
}
.artifact-id-display .copy-btn {
opacity: 1;
flex-shrink: 0;
}

View File

@@ -63,17 +63,12 @@ function PackagePage() {
const [accessDenied, setAccessDenied] = useState(false);
const [uploadTag, setUploadTag] = useState('');
const [uploadSuccess, setUploadSuccess] = useState<string | null>(null);
const [artifactIdInput, setArtifactIdInput] = useState('');
const [accessLevel, setAccessLevel] = useState<AccessLevel | null>(null);
const [createTagName, setCreateTagName] = useState('');
const [createTagArtifactId, setCreateTagArtifactId] = useState('');
const [createTagLoading, setCreateTagLoading] = useState(false);
// UI state
const [showUploadModal, setShowUploadModal] = useState(false);
const [showCreateTagModal, setShowCreateTagModal] = useState(false);
const [openMenuId, setOpenMenuId] = useState<string | null>(null);
const [menuPosition, setMenuPosition] = useState<{ top: number; left: number } | null>(null);
// Dependencies state
const [selectedTag, setSelectedTag] = useState<TagDetail | null>(null);
const [dependencies, setDependencies] = useState<Dependency[]>([]);
@@ -91,13 +86,6 @@ function PackagePage() {
// Dependency graph modal state
const [showGraph, setShowGraph] = useState(false);
// Dependencies modal state
const [showDepsModal, setShowDepsModal] = useState(false);
// Artifact ID modal state
const [showArtifactIdModal, setShowArtifactIdModal] = useState(false);
const [viewArtifactId, setViewArtifactId] = useState<string | null>(null);
// Ensure file modal state
const [showEnsureFile, setShowEnsureFile] = useState(false);
const [ensureFileContent, setEnsureFileContent] = useState<string | null>(null);
@@ -108,9 +96,6 @@ function PackagePage() {
// Derived permissions
const canWrite = accessLevel === 'write' || accessLevel === 'admin';
// Detect system projects (convention: name starts with "_")
const isSystemProject = projectName?.startsWith('_') ?? false;
// Get params from URL
const page = parseInt(searchParams.get('page') || '1', 10);
const search = searchParams.get('search') || '';
@@ -338,91 +323,7 @@ function PackagePage() {
setSelectedTag(tag);
};
const handleMenuOpen = (e: React.MouseEvent, tagId: string) => {
e.stopPropagation();
if (openMenuId === tagId) {
setOpenMenuId(null);
setMenuPosition(null);
} else {
const rect = e.currentTarget.getBoundingClientRect();
setMenuPosition({ top: rect.bottom + 4, left: rect.right - 180 });
setOpenMenuId(tagId);
}
};
// System projects show Version first, regular projects show Tag first
const columns = isSystemProject
? [
// System project columns: Version first, then Filename
{
key: 'version',
header: 'Version',
sortable: true,
render: (t: TagDetail) => (
<strong
className={`tag-name-link ${selectedTag?.id === t.id ? 'selected' : ''}`}
onClick={() => handleTagSelect(t)}
style={{ cursor: 'pointer' }}
>
<span className="version-badge">{t.version || t.name}</span>
</strong>
),
},
{
key: 'artifact_original_name',
header: 'Filename',
className: 'cell-truncate',
render: (t: TagDetail) => (
<span title={t.artifact_original_name || t.name}>{t.artifact_original_name || t.name}</span>
),
},
{
key: 'artifact_size',
header: 'Size',
render: (t: TagDetail) => <span>{formatBytes(t.artifact_size)}</span>,
},
{
key: 'created_at',
header: 'Cached',
sortable: true,
render: (t: TagDetail) => (
<span>{new Date(t.created_at).toLocaleDateString()}</span>
),
},
{
key: 'actions',
header: '',
render: (t: TagDetail) => (
<div className="action-buttons">
<a
href={getDownloadUrl(projectName!, packageName!, t.name)}
className="btn btn-icon"
download
title="Download"
>
<svg width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2">
<path d="M21 15v4a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2v-4" />
<polyline points="7 10 12 15 17 10" />
<line x1="12" y1="15" x2="12" y2="3" />
</svg>
</a>
<button
className="btn btn-icon"
onClick={(e) => handleMenuOpen(e, t.id)}
title="More actions"
>
<svg width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2">
<circle cx="12" cy="12" r="1" />
<circle cx="12" cy="5" r="1" />
<circle cx="12" cy="19" r="1" />
</svg>
</button>
</div>
),
},
]
: [
// Regular project columns: Tag, Version, Filename
const columns = [
{
key: 'name',
header: 'Tag',
@@ -441,15 +342,17 @@ function PackagePage() {
key: 'version',
header: 'Version',
render: (t: TagDetail) => (
<span className="version-badge">{t.version || ''}</span>
<span className="version-badge">{t.version || '-'}</span>
),
},
{
key: 'artifact_original_name',
header: 'Filename',
className: 'cell-truncate',
key: 'artifact_id',
header: 'Artifact ID',
render: (t: TagDetail) => (
<span title={t.artifact_original_name || undefined}>{t.artifact_original_name || '—'}</span>
<div className="artifact-id-cell">
<code className="artifact-id">{t.artifact_id.substring(0, 12)}...</code>
<CopyButton text={t.artifact_id} />
</div>
),
},
{
@@ -457,94 +360,56 @@ function PackagePage() {
header: 'Size',
render: (t: TagDetail) => <span>{formatBytes(t.artifact_size)}</span>,
},
{
key: 'artifact_content_type',
header: 'Type',
render: (t: TagDetail) => (
<span className="content-type">{t.artifact_content_type || '-'}</span>
),
},
{
key: 'artifact_original_name',
header: 'Filename',
className: 'cell-truncate',
render: (t: TagDetail) => (
<span title={t.artifact_original_name || undefined}>{t.artifact_original_name || '-'}</span>
),
},
{
key: 'created_at',
header: 'Created',
sortable: true,
render: (t: TagDetail) => (
<span title={`by ${t.created_by}`}>{new Date(t.created_at).toLocaleDateString()}</span>
<div className="created-cell">
<span>{new Date(t.created_at).toLocaleString()}</span>
<span className="created-by">by {t.created_by}</span>
</div>
),
},
{
key: 'actions',
header: '',
header: 'Actions',
render: (t: TagDetail) => (
<div className="action-buttons">
<button
className="btn btn-secondary btn-small"
onClick={() => fetchEnsureFileForTag(t.name)}
title="View orchard.ensure file"
>
Ensure
</button>
<a
href={getDownloadUrl(projectName!, packageName!, t.name)}
className="btn btn-icon"
className="btn btn-secondary btn-small"
download
title="Download"
>
<svg width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2">
<path d="M21 15v4a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2v-4" />
<polyline points="7 10 12 15 17 10" />
<line x1="12" y1="15" x2="12" y2="3" />
</svg>
Download
</a>
<button
className="btn btn-icon"
onClick={(e) => handleMenuOpen(e, t.id)}
title="More actions"
>
<svg width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2">
<circle cx="12" cy="12" r="1" />
<circle cx="12" cy="5" r="1" />
<circle cx="12" cy="19" r="1" />
</svg>
</button>
</div>
),
},
];
// Find the tag for the open menu
const openMenuTag = tags.find(t => t.id === openMenuId);
// Close menu when clicking outside
const handleClickOutside = () => {
if (openMenuId) {
setOpenMenuId(null);
setMenuPosition(null);
}
};
// Render dropdown menu as a portal-like element
const renderActionMenu = () => {
if (!openMenuId || !menuPosition || !openMenuTag) return null;
const t = openMenuTag;
return (
<div
className="action-menu-backdrop"
onClick={handleClickOutside}
>
<div
className="action-menu-dropdown"
style={{ top: menuPosition.top, left: menuPosition.left }}
onClick={(e) => e.stopPropagation()}
>
<button onClick={() => { setViewArtifactId(t.artifact_id); setShowArtifactIdModal(true); setOpenMenuId(null); setMenuPosition(null); }}>
View Artifact ID
</button>
<button onClick={() => { navigator.clipboard.writeText(t.artifact_id); setOpenMenuId(null); setMenuPosition(null); }}>
Copy Artifact ID
</button>
<button onClick={() => { fetchEnsureFileForTag(t.name); setOpenMenuId(null); setMenuPosition(null); }}>
View Ensure File
</button>
{canWrite && !isSystemProject && (
<button onClick={() => { setCreateTagArtifactId(t.artifact_id); setShowCreateTagModal(true); setOpenMenuId(null); setMenuPosition(null); }}>
Create/Update Tag
</button>
)}
<button onClick={() => { handleTagSelect(t); setShowDepsModal(true); setOpenMenuId(null); setMenuPosition(null); }}>
View Dependencies
</button>
</div>
</div>
);
};
if (loading && !tagsData) {
return <div className="loading">Loading...</div>;
}
@@ -586,19 +451,6 @@ function PackagePage() {
<div className="page-header__title-row">
<h1>{packageName}</h1>
{pkg && <Badge variant="default">{pkg.format}</Badge>}
{user && canWrite && !isSystemProject && (
<button
className="btn btn-primary btn-small header-upload-btn"
onClick={() => setShowUploadModal(true)}
>
<svg width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2" style={{ marginRight: '6px' }}>
<path d="M21 15v4a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2v-4" />
<polyline points="17 8 12 3 7 8" />
<line x1="12" y1="3" x2="12" y2="15" />
</svg>
Upload
</button>
)}
</div>
{pkg?.description && <p className="description">{pkg.description}</p>}
<div className="page-header__meta">
@@ -616,14 +468,14 @@ function PackagePage() {
</div>
{pkg && (pkg.tag_count !== undefined || pkg.artifact_count !== undefined) && (
<div className="package-header-stats">
{!isSystemProject && pkg.tag_count !== undefined && (
{pkg.tag_count !== undefined && (
<span className="stat-item">
<strong>{pkg.tag_count}</strong> tags
</span>
)}
{pkg.artifact_count !== undefined && (
<span className="stat-item">
<strong>{pkg.artifact_count}</strong> {isSystemProject ? 'versions' : 'artifacts'}
<strong>{pkg.artifact_count}</strong> artifacts
</span>
)}
{pkg.total_size !== undefined && pkg.total_size > 0 && (
@@ -631,7 +483,7 @@ function PackagePage() {
<strong>{formatBytes(pkg.total_size)}</strong> total
</span>
)}
{!isSystemProject && pkg.latest_tag && (
{pkg.latest_tag && (
<span className="stat-item">
Latest: <strong className="accent">{pkg.latest_tag}</strong>
</span>
@@ -644,9 +496,44 @@ function PackagePage() {
{error && <div className="error-message">{error}</div>}
{uploadSuccess && <div className="success-message">{uploadSuccess}</div>}
{user && (
<div className="upload-section card">
<h3>Upload Artifact</h3>
{canWrite ? (
<div className="upload-form">
<div className="form-group">
<label htmlFor="upload-tag">Tag (optional)</label>
<input
id="upload-tag"
type="text"
value={uploadTag}
onChange={(e) => setUploadTag(e.target.value)}
placeholder="v1.0.0, latest, stable..."
/>
</div>
<DragDropUpload
projectName={projectName!}
packageName={packageName!}
tag={uploadTag || undefined}
onUploadComplete={handleUploadComplete}
onUploadError={handleUploadError}
/>
</div>
) : (
<DragDropUpload
projectName={projectName!}
packageName={packageName!}
disabled={true}
disabledReason="You have read-only access to this project and cannot upload artifacts."
onUploadComplete={handleUploadComplete}
onUploadError={handleUploadError}
/>
)}
</div>
)}
<div className="section-header">
<h2>{isSystemProject ? 'Versions' : 'Tags / Versions'}</h2>
<h2>Tags / Versions</h2>
</div>
<div className="list-controls">
@@ -690,6 +577,110 @@ function PackagePage() {
/>
)}
{/* Dependencies Section */}
{tags.length > 0 && (
<div className="dependencies-section card">
<div className="dependencies-header">
<h3>Dependencies</h3>
<div className="dependencies-controls">
{selectedTag && (
<>
<button
className="btn btn-secondary btn-small"
onClick={fetchEnsureFile}
disabled={ensureFileLoading}
title="View orchard.ensure file"
>
<svg width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2" style={{ marginRight: '6px' }}>
<path d="M14 2H6a2 2 0 0 0-2 2v16a2 2 0 0 0 2 2h12a2 2 0 0 0 2-2V8z"></path>
<polyline points="14 2 14 8 20 8"></polyline>
<line x1="16" y1="13" x2="8" y2="13"></line>
<line x1="16" y1="17" x2="8" y2="17"></line>
<polyline points="10 9 9 9 8 9"></polyline>
</svg>
{ensureFileLoading ? 'Loading...' : 'View Ensure File'}
</button>
<button
className="btn btn-secondary btn-small"
onClick={() => setShowGraph(true)}
title="View full dependency tree"
>
<svg width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2" style={{ marginRight: '6px' }}>
<circle cx="12" cy="12" r="3"></circle>
<circle cx="4" cy="4" r="2"></circle>
<circle cx="20" cy="4" r="2"></circle>
<circle cx="4" cy="20" r="2"></circle>
<circle cx="20" cy="20" r="2"></circle>
<line x1="9.5" y1="9.5" x2="5.5" y2="5.5"></line>
<line x1="14.5" y1="9.5" x2="18.5" y2="5.5"></line>
<line x1="9.5" y1="14.5" x2="5.5" y2="18.5"></line>
<line x1="14.5" y1="14.5" x2="18.5" y2="18.5"></line>
</svg>
View Graph
</button>
</>
)}
</div>
</div>
<div className="dependencies-tag-select">
{selectedTag && (
<select
className="tag-selector"
value={selectedTag.id}
onChange={(e) => {
const tag = tags.find(t => t.id === e.target.value);
if (tag) setSelectedTag(tag);
}}
>
{tags.map(t => (
<option key={t.id} value={t.id}>
{t.name}{t.version ? ` (${t.version})` : ''}
</option>
))}
</select>
)}
</div>
{depsLoading ? (
<div className="deps-loading">Loading dependencies...</div>
) : depsError ? (
<div className="deps-error">{depsError}</div>
) : dependencies.length === 0 ? (
<div className="deps-empty">
{selectedTag ? (
<span><strong>{selectedTag.name}</strong> has no dependencies</span>
) : (
<span>No dependencies</span>
)}
</div>
) : (
<div className="deps-list">
<div className="deps-summary">
<strong>{selectedTag?.name}</strong> has {dependencies.length} {dependencies.length === 1 ? 'dependency' : 'dependencies'}:
</div>
<ul className="deps-items">
{dependencies.map((dep) => (
<li key={dep.id} className="dep-item">
<Link
to={`/project/${dep.project}/${dep.package}`}
className="dep-link"
>
{dep.project}/{dep.package}
</Link>
<span className="dep-constraint">
@ {dep.version || dep.tag}
</span>
<span className="dep-status dep-status--ok" title="Package exists">
&#10003;
</span>
</li>
))}
</ul>
</div>
)}
</div>
)}
{/* Used By (Reverse Dependencies) Section */}
<div className="used-by-section card">
<h3>Used By</h3>
@@ -746,6 +737,78 @@ function PackagePage() {
)}
</div>
<div className="download-by-id-section card">
<h3>Download by Artifact ID</h3>
<div className="download-by-id-form">
<input
type="text"
value={artifactIdInput}
onChange={(e) => setArtifactIdInput(e.target.value.toLowerCase().replace(/[^a-f0-9]/g, '').slice(0, 64))}
placeholder="Enter SHA256 artifact ID (64 hex characters)"
className="artifact-id-input"
/>
<a
href={artifactIdInput.length === 64 ? getDownloadUrl(projectName!, packageName!, `artifact:${artifactIdInput}`) : '#'}
className={`btn btn-primary ${artifactIdInput.length !== 64 ? 'btn-disabled' : ''}`}
download
onClick={(e) => {
if (artifactIdInput.length !== 64) {
e.preventDefault();
}
}}
>
Download
</a>
</div>
{artifactIdInput.length > 0 && artifactIdInput.length !== 64 && (
<p className="validation-hint">Artifact ID must be exactly 64 hex characters ({artifactIdInput.length}/64)</p>
)}
</div>
{user && canWrite && (
<div className="create-tag-section card">
<h3>Create / Update Tag</h3>
<p className="section-description">Point a tag at any existing artifact by its ID</p>
<form onSubmit={handleCreateTag} className="create-tag-form">
<div className="form-row">
<div className="form-group">
<label htmlFor="create-tag-name">Tag Name</label>
<input
id="create-tag-name"
type="text"
value={createTagName}
onChange={(e) => setCreateTagName(e.target.value)}
placeholder="latest, stable, v1.0.0..."
disabled={createTagLoading}
/>
</div>
<div className="form-group form-group--wide">
<label htmlFor="create-tag-artifact">Artifact ID</label>
<input
id="create-tag-artifact"
type="text"
value={createTagArtifactId}
onChange={(e) => setCreateTagArtifactId(e.target.value.toLowerCase().replace(/[^a-f0-9]/g, '').slice(0, 64))}
placeholder="SHA256 hash (64 hex characters)"
className="artifact-id-input"
disabled={createTagLoading}
/>
</div>
<button
type="submit"
className="btn btn-primary"
disabled={createTagLoading || !createTagName.trim() || createTagArtifactId.length !== 64}
>
{createTagLoading ? 'Creating...' : 'Create Tag'}
</button>
</div>
{createTagArtifactId.length > 0 && createTagArtifactId.length !== 64 && (
<p className="validation-hint">Artifact ID must be exactly 64 hex characters ({createTagArtifactId.length}/64)</p>
)}
</form>
</div>
)}
<div className="usage-section card">
<h3>Usage</h3>
<p>Download artifacts using:</p>
@@ -768,118 +831,6 @@ function PackagePage() {
/>
)}
{/* Upload Modal */}
{showUploadModal && (
<div className="modal-overlay" onClick={() => setShowUploadModal(false)}>
<div className="upload-modal" onClick={(e) => e.stopPropagation()}>
<div className="modal-header">
<h3>Upload Artifact</h3>
<button
className="modal-close"
onClick={() => setShowUploadModal(false)}
title="Close"
>
<svg width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2">
<line x1="18" y1="6" x2="6" y2="18"></line>
<line x1="6" y1="6" x2="18" y2="18"></line>
</svg>
</button>
</div>
<div className="modal-body">
<div className="form-group">
<label htmlFor="upload-tag">Tag (optional)</label>
<input
id="upload-tag"
type="text"
value={uploadTag}
onChange={(e) => setUploadTag(e.target.value)}
placeholder="v1.0.0, latest, stable..."
/>
</div>
<DragDropUpload
projectName={projectName!}
packageName={packageName!}
tag={uploadTag || undefined}
onUploadComplete={(result) => {
handleUploadComplete(result);
setShowUploadModal(false);
setUploadTag('');
}}
onUploadError={handleUploadError}
/>
</div>
</div>
</div>
)}
{/* Create/Update Tag Modal */}
{showCreateTagModal && (
<div className="modal-overlay" onClick={() => setShowCreateTagModal(false)}>
<div className="create-tag-modal" onClick={(e) => e.stopPropagation()}>
<div className="modal-header">
<h3>Create / Update Tag</h3>
<button
className="modal-close"
onClick={() => { setShowCreateTagModal(false); setCreateTagName(''); setCreateTagArtifactId(''); }}
title="Close"
>
<svg width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2">
<line x1="18" y1="6" x2="6" y2="18"></line>
<line x1="6" y1="6" x2="18" y2="18"></line>
</svg>
</button>
</div>
<div className="modal-body">
<p className="modal-description">Point a tag at an artifact by its ID</p>
<form onSubmit={(e) => { handleCreateTag(e); setShowCreateTagModal(false); }}>
<div className="form-group">
<label htmlFor="modal-tag-name">Tag Name</label>
<input
id="modal-tag-name"
type="text"
value={createTagName}
onChange={(e) => setCreateTagName(e.target.value)}
placeholder="latest, stable, v1.0.0..."
disabled={createTagLoading}
/>
</div>
<div className="form-group">
<label htmlFor="modal-artifact-id">Artifact ID</label>
<input
id="modal-artifact-id"
type="text"
value={createTagArtifactId}
onChange={(e) => setCreateTagArtifactId(e.target.value.toLowerCase().replace(/[^a-f0-9]/g, '').slice(0, 64))}
placeholder="SHA256 hash (64 hex characters)"
className="artifact-id-input"
disabled={createTagLoading}
/>
{createTagArtifactId.length > 0 && createTagArtifactId.length !== 64 && (
<p className="validation-hint">{createTagArtifactId.length}/64 characters</p>
)}
</div>
<div className="modal-actions">
<button
type="button"
className="btn btn-secondary"
onClick={() => { setShowCreateTagModal(false); setCreateTagName(''); setCreateTagArtifactId(''); }}
>
Cancel
</button>
<button
type="submit"
className="btn btn-primary"
disabled={createTagLoading || !createTagName.trim() || createTagArtifactId.length !== 64}
>
{createTagLoading ? 'Creating...' : 'Create Tag'}
</button>
</div>
</form>
</div>
</div>
</div>
)}
{/* Ensure File Modal */}
{showEnsureFile && (
<div className="modal-overlay" onClick={() => setShowEnsureFile(false)}>
@@ -921,107 +872,6 @@ function PackagePage() {
</div>
</div>
)}
{/* Dependencies Modal */}
{showDepsModal && selectedTag && (
<div className="modal-overlay" onClick={() => setShowDepsModal(false)}>
<div className="deps-modal" onClick={(e) => e.stopPropagation()}>
<div className="modal-header">
<h3>Dependencies for {selectedTag.version || selectedTag.name}</h3>
<button
className="modal-close"
onClick={() => setShowDepsModal(false)}
title="Close"
>
<svg width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2">
<line x1="18" y1="6" x2="6" y2="18"></line>
<line x1="6" y1="6" x2="18" y2="18"></line>
</svg>
</button>
</div>
<div className="modal-body">
<div className="deps-modal-controls">
<button
className="btn btn-secondary btn-small"
onClick={fetchEnsureFile}
disabled={ensureFileLoading}
>
View Ensure File
</button>
<button
className="btn btn-secondary btn-small"
onClick={() => { setShowDepsModal(false); setShowGraph(true); }}
>
View Graph
</button>
</div>
{depsLoading ? (
<div className="deps-loading">Loading dependencies...</div>
) : depsError ? (
<div className="deps-error">{depsError}</div>
) : dependencies.length === 0 ? (
<div className="deps-empty">No dependencies</div>
) : (
<div className="deps-list">
<div className="deps-summary">
{dependencies.length} {dependencies.length === 1 ? 'dependency' : 'dependencies'}:
</div>
<ul className="deps-items">
{dependencies.map((dep) => (
<li key={dep.id} className="dep-item">
<Link
to={`/project/${dep.project}/${dep.package}`}
className="dep-link"
onClick={() => setShowDepsModal(false)}
>
{dep.project}/{dep.package}
</Link>
<span className="dep-constraint">
@ {dep.version || dep.tag}
</span>
<span className="dep-status dep-status--ok" title="Package exists">
&#10003;
</span>
</li>
))}
</ul>
</div>
)}
</div>
</div>
</div>
)}
{/* Artifact ID Modal */}
{showArtifactIdModal && viewArtifactId && (
<div className="modal-overlay" onClick={() => setShowArtifactIdModal(false)}>
<div className="artifact-id-modal" onClick={(e) => e.stopPropagation()}>
<div className="modal-header">
<h3>Artifact ID</h3>
<button
className="modal-close"
onClick={() => setShowArtifactIdModal(false)}
title="Close"
>
<svg width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2">
<line x1="18" y1="6" x2="6" y2="18"></line>
<line x1="6" y1="6" x2="18" y2="18"></line>
</svg>
</button>
</div>
<div className="modal-body">
<p className="modal-description">SHA256 hash identifying this artifact:</p>
<div className="artifact-id-display">
<code>{viewArtifactId}</code>
<CopyButton text={viewArtifactId} />
</div>
</div>
</div>
</div>
)}
{/* Action Menu Dropdown */}
{renderActionMenu()}
</div>
);
}

View File

@@ -214,7 +214,7 @@ function ProjectPage() {
</div>
</div>
<div className="page-header__actions">
{canAdmin && !project.team_id && !project.is_system && (
{canAdmin && !project.team_id && (
<button
className="btn btn-secondary"
onClick={() => navigate(`/project/${projectName}/settings`)}
@@ -227,11 +227,11 @@ function ProjectPage() {
Settings
</button>
)}
{canWrite && !project.is_system ? (
{canWrite ? (
<button className="btn btn-primary" onClick={() => setShowForm(!showForm)}>
{showForm ? 'Cancel' : '+ New Package'}
</button>
) : user && !project.is_system ? (
) : user ? (
<span className="text-muted" title="You have read-only access to this project">
Read-only access
</span>
@@ -294,7 +294,6 @@ function ProjectPage() {
placeholder="Filter packages..."
className="list-controls__search"
/>
{!project?.is_system && (
<select
className="list-controls__select"
value={format}
@@ -307,7 +306,6 @@ function ProjectPage() {
</option>
))}
</select>
)}
</div>
{hasActiveFilters && (
@@ -343,19 +341,19 @@ function ProjectPage() {
className: 'cell-description',
render: (pkg) => pkg.description || '—',
},
...(!project?.is_system ? [{
{
key: 'format',
header: 'Format',
render: (pkg: Package) => <Badge variant="default">{pkg.format}</Badge>,
}] : []),
...(!project?.is_system ? [{
render: (pkg) => <Badge variant="default">{pkg.format}</Badge>,
},
{
key: 'tag_count',
header: 'Tags',
render: (pkg: Package) => pkg.tag_count ?? '—',
}] : []),
render: (pkg) => pkg.tag_count ?? '—',
},
{
key: 'artifact_count',
header: project?.is_system ? 'Versions' : 'Artifacts',
header: 'Artifacts',
render: (pkg) => pkg.artifact_count ?? '—',
},
{
@@ -364,12 +362,12 @@ function ProjectPage() {
render: (pkg) =>
pkg.total_size !== undefined && pkg.total_size > 0 ? formatBytes(pkg.total_size) : '—',
},
...(!project?.is_system ? [{
{
key: 'latest_tag',
header: 'Latest',
render: (pkg: Package) =>
render: (pkg) =>
pkg.latest_tag ? <strong style={{ color: 'var(--accent-primary)' }}>{pkg.latest_tag}</strong> : '—',
}] : []),
},
{
key: 'created_at',
header: 'Created',

View File

@@ -515,6 +515,7 @@ export interface UpstreamSource {
source_type: SourceType;
url: string;
enabled: boolean;
is_public: boolean;
auth_type: AuthType;
username: string | null;
has_password: boolean;
@@ -530,6 +531,7 @@ export interface UpstreamSourceCreate {
source_type: SourceType;
url: string;
enabled?: boolean;
is_public?: boolean;
auth_type?: AuthType;
username?: string;
password?: string;
@@ -542,6 +544,7 @@ export interface UpstreamSourceUpdate {
source_type?: SourceType;
url?: string;
enabled?: boolean;
is_public?: boolean;
auth_type?: AuthType;
username?: string;
password?: string;
@@ -557,3 +560,18 @@ export interface UpstreamSourceTestResult {
source_id: string;
source_name: string;
}
// Cache Settings types
export interface CacheSettings {
allow_public_internet: boolean;
auto_create_system_projects: boolean;
allow_public_internet_env_override: boolean | null;
auto_create_system_projects_env_override: boolean | null;
created_at: string | null;
updated_at: string | null;
}
export interface CacheSettingsUpdate {
allow_public_internet?: boolean;
auto_create_system_projects?: boolean;
}

View File

@@ -128,10 +128,6 @@ spec:
value: {{ .Values.orchard.rateLimit.login | quote }}
{{- end }}
{{- end }}
{{- if .Values.orchard.purgeSeedData }}
- name: ORCHARD_PURGE_SEED_DATA
value: "true"
{{- end }}
{{- if .Values.orchard.database.poolSize }}
- name: ORCHARD_DATABASE_POOL_SIZE
value: {{ .Values.orchard.database.poolSize | quote }}

View File

@@ -91,7 +91,6 @@ affinity: {}
# Orchard server configuration
orchard:
env: "development" # Allows seed data for testing
purgeSeedData: true # Remove public seed data (npm-public, pypi-public, etc.)
server:
host: "0.0.0.0"
port: 8080

View File

@@ -1,55 +0,0 @@
-- Migration: 011_pypi_cache_tasks
-- Description: Add table for tracking PyPI dependency caching tasks
-- Date: 2026-02-02
-- Table for tracking PyPI cache tasks with retry support
CREATE TABLE pypi_cache_tasks (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
-- What to cache
package_name VARCHAR(255) NOT NULL,
version_constraint VARCHAR(255),
-- Origin tracking
parent_task_id UUID REFERENCES pypi_cache_tasks(id) ON DELETE SET NULL,
depth INTEGER NOT NULL DEFAULT 0,
triggered_by_artifact VARCHAR(64) REFERENCES artifacts(id) ON DELETE SET NULL,
-- Status
status VARCHAR(20) NOT NULL DEFAULT 'pending',
attempts INTEGER NOT NULL DEFAULT 0,
max_attempts INTEGER NOT NULL DEFAULT 3,
-- Results
cached_artifact_id VARCHAR(64) REFERENCES artifacts(id) ON DELETE SET NULL,
error_message TEXT,
-- Timing
created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW(),
started_at TIMESTAMP WITH TIME ZONE,
completed_at TIMESTAMP WITH TIME ZONE,
next_retry_at TIMESTAMP WITH TIME ZONE,
-- Constraints
CONSTRAINT check_task_status CHECK (status IN ('pending', 'in_progress', 'completed', 'failed')),
CONSTRAINT check_depth_non_negative CHECK (depth >= 0),
CONSTRAINT check_attempts_non_negative CHECK (attempts >= 0)
);
-- Index for finding tasks ready to process (pending with retry time passed)
CREATE INDEX idx_pypi_cache_tasks_status_retry ON pypi_cache_tasks(status, next_retry_at);
-- Index for deduplication check (is this package already queued?)
CREATE INDEX idx_pypi_cache_tasks_package_status ON pypi_cache_tasks(package_name, status);
-- Index for tracing dependency chains
CREATE INDEX idx_pypi_cache_tasks_parent ON pypi_cache_tasks(parent_task_id);
-- Index for finding tasks by artifact that triggered them
CREATE INDEX idx_pypi_cache_tasks_triggered_by ON pypi_cache_tasks(triggered_by_artifact);
-- Index for finding tasks by cached artifact
CREATE INDEX idx_pypi_cache_tasks_cached_artifact ON pypi_cache_tasks(cached_artifact_id);
-- Index for sorting by depth and creation time (processing order)
CREATE INDEX idx_pypi_cache_tasks_depth_created ON pypi_cache_tasks(depth, created_at);