2 Commits

Author SHA1 Message Date
Mondo Diaz
9df50d0963 Add migration tracking with smart error detection
- Add _schema_migrations table to track applied migrations
- Each migration has a unique name and checksum
- Migrations are only run once (tracked by name)
- Checksum changes are detected and logged as warnings
- Smart error detection distinguishes "already applied" errors from real failures
- Real errors now fail hard with clear error messages
- Safe PostgreSQL error codes (42P07, 42701, 42710, 42P16) are recognized
- Fix semver migration to generate UUID for id column
2026-01-28 21:05:45 +00:00
Mondo Diaz
c60a7ba1ab Add rollback after failed migration to allow subsequent migrations to run
When a migration fails, the transaction is left in a failed state. Without
rollback, all subsequent migrations fail with "current transaction is aborted".
This was preventing the team_id column from being added to projects.
2026-01-28 20:27:46 +00:00
33 changed files with 457 additions and 8896 deletions

View File

@@ -11,6 +11,13 @@ variables:
# Environment URLs (used by deploy and test jobs)
STAGE_URL: https://orchard-stage.common.global.bsf.tools
PROD_URL: https://orchard.common.global.bsf.tools
# Stage environment AWS resources (used by reset job)
STAGE_RDS_HOST: orchard-stage.cluster-cvw3jzjkozoc.us-gov-west-1.rds.amazonaws.com
STAGE_RDS_DBNAME: postgres
STAGE_SECRET_ARN: "arn:aws-us-gov:secretsmanager:us-gov-west-1:052673043337:secret:rds!cluster-a573672b-1a38-4665-a654-1b7df37b5297-IaeFQL"
STAGE_AUTH_SECRET_ARN: "arn:aws-us-gov:secretsmanager:us-gov-west-1:052673043337:secret:orchard-stage-creds-SMqvQx"
STAGE_S3_BUCKET: orchard-artifacts-stage
AWS_REGION: us-gov-west-1
# Shared pip cache directory
PIP_CACHE_DIR: "$CI_PROJECT_DIR/.pip-cache"
@@ -88,18 +95,10 @@ cve_sbom_analysis:
when: never
- when: on_success
# Disable prosper_setup for tag pipelines since no build/analysis jobs run
# (image is already built when commit was on main, and deploy uses helm directly)
prosper_setup:
rules:
- if: '$CI_COMMIT_TAG'
when: never
- when: on_success
# Override release job to wait for stage deployment and smoke tests before creating tag
# Override release job to wait for stage integration tests before creating tag
# This ensures the tag (which triggers prod deploy) is only created after stage passes
release:
needs: [smoke_test_stage, changelog]
needs: [integration_test_stage, changelog]
# Full integration test suite template (for feature/stage deployments)
# Runs the complete pytest integration test suite against the deployed environment
@@ -201,6 +200,108 @@ release:
sys.exit(0)
PYTEST_SCRIPT
# Reset stage template - runs from CI runner, uses CI variable for auth
# Calls the /api/v1/admin/factory-reset endpoint which handles DB and S3 cleanup
.reset_stage_template: &reset_stage_template
stage: deploy
image: deps.global.bsf.tools/docker/python:3.12-slim
timeout: 5m
retry: 1
before_script:
- pip install --index-url "$PIP_INDEX_URL" httpx
script:
- |
python - <<'RESET_SCRIPT'
import httpx
import sys
import os
import time
BASE_URL = os.environ.get("STAGE_URL", "")
ADMIN_USER = "admin"
ADMIN_PASS = os.environ.get("STAGE_ADMIN_PASSWORD", "")
MAX_RETRIES = 3
RETRY_DELAY = 5
if not BASE_URL:
print("ERROR: STAGE_URL not set")
sys.exit(1)
if not ADMIN_PASS:
print("ERROR: STAGE_ADMIN_PASSWORD not set")
sys.exit(1)
print(f"=== Resetting stage environment at {BASE_URL} ===")
def do_reset():
with httpx.Client(base_url=BASE_URL, timeout=120.0) as client:
print("Logging in as admin...")
login_response = client.post(
"/api/v1/auth/login",
json={"username": ADMIN_USER, "password": ADMIN_PASS},
)
if login_response.status_code != 200:
raise Exception(f"Login failed: {login_response.status_code} - {login_response.text}")
print("Login successful")
print("Calling factory reset endpoint...")
reset_response = client.post(
"/api/v1/admin/factory-reset",
headers={"X-Confirm-Reset": "yes-delete-all-data"},
)
if reset_response.status_code == 200:
result = reset_response.json()
print("Factory reset successful!")
print(f" Database tables dropped: {result['results']['database_tables_dropped']}")
print(f" S3 objects deleted: {result['results']['s3_objects_deleted']}")
print(f" Database reinitialized: {result['results']['database_reinitialized']}")
print(f" Seeded: {result['results']['seeded']}")
return True
else:
raise Exception(f"Factory reset failed: {reset_response.status_code} - {reset_response.text}")
for attempt in range(1, MAX_RETRIES + 1):
try:
print(f"Attempt {attempt}/{MAX_RETRIES}")
if do_reset():
sys.exit(0)
except Exception as e:
print(f"Attempt {attempt} failed: {e}")
if attempt < MAX_RETRIES:
print(f"Retrying in {RETRY_DELAY} seconds...")
time.sleep(RETRY_DELAY)
else:
print("All retry attempts failed")
sys.exit(1)
RESET_SCRIPT
rules:
- if: '$CI_COMMIT_BRANCH == "main"'
when: on_success
# Reset stage BEFORE integration tests (ensure known state)
reset_stage_pre:
<<: *reset_stage_template
needs: [deploy_stage]
# Integration tests for stage deployment
# Uses CI variable STAGE_ADMIN_PASSWORD (set in GitLab CI/CD settings)
integration_test_stage:
<<: *integration_test_template
needs: [reset_stage_pre]
variables:
ORCHARD_TEST_URL: $STAGE_URL
ORCHARD_TEST_PASSWORD: $STAGE_ADMIN_PASSWORD
rules:
- if: '$CI_COMMIT_BRANCH == "main"'
when: on_success
# Reset stage AFTER integration tests (clean slate for next run)
reset_stage:
<<: *reset_stage_template
needs: [integration_test_stage]
allow_failure: true # Don't fail pipeline if reset has issues
# Integration tests for feature deployment (full suite)
# Uses DEV_ADMIN_PASSWORD CI variable (same as deploy_feature)
integration_test_feature:
@@ -311,88 +412,9 @@ frontend_tests:
echo "Health check failed after 30 attempts"
exit 1
# Ephemeral test deployment in stage namespace (main branch only)
# Runs integration tests before promoting to long-running stage
deploy_test:
<<: *deploy_template
variables:
NAMESPACE: orch-stage-namespace
VALUES_FILE: helm/orchard/values-dev.yaml
BASE_URL: https://orchard-test.common.global.bsf.tools
before_script:
- kubectl config use-context esv/bsf/bsf-integration/orchard/orchard-mvp:orchard-stage
- *helm_setup
script:
- echo "Deploying ephemeral test environment"
- cd $CI_PROJECT_DIR
- |
helm upgrade --install orchard-test ./helm/orchard \
--namespace $NAMESPACE \
-f $VALUES_FILE \
--set image.tag=git.linux-amd64-$CI_COMMIT_SHA \
--set orchard.auth.adminPassword=$STAGE_ADMIN_PASSWORD \
--set ingress.hosts[0].host=orchard-test.common.global.bsf.tools \
--set ingress.tls[0].hosts[0]=orchard-test.common.global.bsf.tools \
--set ingress.tls[0].secretName=orchard-test-tls \
--set minioIngress.host=minio-test.common.global.bsf.tools \
--set minioIngress.tls.secretName=minio-test-tls \
--wait \
--atomic \
--timeout 10m
- kubectl rollout status deployment/orchard-test-server -n $NAMESPACE --timeout=10m
- *verify_deployment
environment:
name: test
url: https://orchard-test.common.global.bsf.tools
on_stop: cleanup_test
kubernetes:
agent: esv/bsf/bsf-integration/orchard/orchard-mvp:orchard-stage
rules:
- if: '$CI_COMMIT_BRANCH == "main"'
when: on_success
# Integration tests for ephemeral test deployment (main branch)
# Runs against orchard-test before promoting to long-running stage
integration_test_main:
<<: *integration_test_template
needs: [deploy_test]
variables:
ORCHARD_TEST_URL: https://orchard-test.common.global.bsf.tools
ORCHARD_TEST_PASSWORD: $STAGE_ADMIN_PASSWORD
rules:
- if: '$CI_COMMIT_BRANCH == "main"'
when: on_success
# Cleanup ephemeral test deployment after integration tests
cleanup_test:
stage: deploy
needs: [integration_test_main]
image: deps.global.bsf.tools/registry-1.docker.io/alpine/k8s:1.29.12
timeout: 5m
variables:
NAMESPACE: orch-stage-namespace
GIT_STRATEGY: none
before_script:
- kubectl config use-context esv/bsf/bsf-integration/orchard/orchard-mvp:orchard-stage
script:
- echo "Cleaning up ephemeral test deployment orchard-test"
- helm uninstall orchard-test --namespace $NAMESPACE || true
environment:
name: test
action: stop
kubernetes:
agent: esv/bsf/bsf-integration/orchard/orchard-mvp:orchard-stage
rules:
- if: '$CI_COMMIT_BRANCH == "main"'
when: on_success
allow_failure: true
# Deploy to long-running stage (main branch, after ephemeral tests pass)
# Deploy to stage (main branch)
deploy_stage:
stage: deploy
# Wait for ephemeral test to pass before promoting to long-running stage
needs: [cleanup_test]
image: deps.global.bsf.tools/registry-1.docker.io/alpine/k8s:1.29.12
<<: *deploy_template
variables:
NAMESPACE: orch-stage-namespace
VALUES_FILE: helm/orchard/values-stage.yaml
@@ -401,7 +423,7 @@ deploy_stage:
- kubectl config use-context esv/bsf/bsf-integration/orchard/orchard-mvp:orchard-stage
- *helm_setup
script:
- echo "Deploying to long-running stage environment"
- echo "Deploying to stage environment"
- cd $CI_PROJECT_DIR
- |
helm upgrade --install orchard-stage ./helm/orchard \
@@ -423,16 +445,6 @@ deploy_stage:
- if: '$CI_COMMIT_BRANCH == "main"'
when: on_success
# Smoke test for long-running stage (after promotion)
smoke_test_stage:
<<: *smoke_test_template
needs: [deploy_stage]
variables:
ORCHARD_TEST_URL: $STAGE_URL
rules:
- if: '$CI_COMMIT_BRANCH == "main"'
when: on_success
# Deploy feature branch to dev namespace
deploy_feature:
<<: *deploy_template

View File

@@ -7,113 +7,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## [Unreleased]
### Added
- Added transparent PyPI proxy implementing PEP 503 Simple API (#108)
- `GET /pypi/simple/` - package index (proxied from upstream)
- `GET /pypi/simple/{package}/` - version list with rewritten download links
- `GET /pypi/simple/{package}/{filename}` - download with automatic caching
- Allows `pip install --index-url https://orchard.../pypi/simple/ <package>`
- Artifacts cached on first access through configured upstream sources
- Added `POST /api/v1/cache/resolve` endpoint to cache packages by coordinates instead of URL (#108)
### Changed
- Upstream sources table text is now centered under column headers (#108)
- ENV badge now appears inline with source name instead of separate column (#108)
- Test and Edit buttons now have more prominent button styling (#108)
- Reduced footer padding for cleaner layout (#108)
### Fixed
- Fixed purge_seed_data crash when deleting access permissions - was comparing UUID to VARCHAR column (#107)
### Changed
- Upstream source connectivity test no longer follows redirects, fixing "Exceeded maximum allowed redirects" error with Artifactory proxies (#107)
- Test runs automatically after saving a new or updated upstream source (#107)
- Test status now shows as colored dots (green=success, red=error) instead of text badges (#107)
- Clicking red dot shows error details in a modal (#107)
- Source name column no longer wraps text for better table layout (#107)
- Renamed "Cache Management" page to "Upstream Sources" (#107)
- Moved Delete button from table row to edit modal for cleaner table layout (#107)
### Removed
- Removed `is_public` field from upstream sources - all sources are now treated as internal/private (#107)
- Removed `allow_public_internet` (air-gap mode) setting from cache settings - not needed for enterprise proxy use case (#107)
- Removed seeding of public registry URLs (npm-public, pypi-public, maven-central, docker-hub) (#107)
- Removed "Public" badge and checkbox from upstream sources UI (#107)
- Removed "Allow Public Internet" toggle from cache settings UI (#107)
- Removed "Global Settings" section from cache management UI - auto-create system projects is always enabled (#107)
- Removed unused CacheSettings frontend types and API functions (#107)
### Added
- Added `ORCHARD_PURGE_SEED_DATA` environment variable support to stage helm values to remove seed data from long-running deployments (#107)
- Added frontend system projects visual distinction (#105)
- "Cache" badge for system projects in project list
- "System Cache" badge on project detail page
- Added `is_system` field to Project type
- Added frontend admin page for upstream sources and cache settings (#75)
- New `/admin/cache` page accessible from user menu (admin only)
- Upstream sources table with create/edit/delete/test connectivity
- Cache settings section with air-gap mode and auto-create system projects toggles
- Visual indicators for env-defined sources (locked, cannot be modified)
- Environment variable override badges when settings are overridden
- API client functions for all cache admin operations
- Added environment variable overrides for cache configuration (#74)
- `ORCHARD_CACHE_ALLOW_PUBLIC_INTERNET` - Override allow_public_internet (air-gap mode)
- `ORCHARD_CACHE_AUTO_CREATE_SYSTEM_PROJECTS` - Override auto_create_system_projects
- `ORCHARD_UPSTREAM__{NAME}__*` - Define upstream sources via env vars
- Env-defined sources appear in API with `source: "env"` marker
- Env-defined sources cannot be modified/deleted via API (400 error)
- Cache settings response includes `*_env_override` fields when overridden
- 7 unit tests for env var parsing and configuration
- Added Global Cache Settings Admin API (#73)
- `GET /api/v1/admin/cache-settings` - Retrieve current cache settings
- `PUT /api/v1/admin/cache-settings` - Update cache settings (partial updates)
- Admin-only access with audit logging
- Controls `allow_public_internet` (air-gap mode) and `auto_create_system_projects`
- 7 integration tests for settings management
- Added Upstream Sources Admin API for managing cache sources (#72)
- `GET /api/v1/admin/upstream-sources` - List sources with filtering
- `POST /api/v1/admin/upstream-sources` - Create source with auth configuration
- `GET /api/v1/admin/upstream-sources/{id}` - Get source details
- `PUT /api/v1/admin/upstream-sources/{id}` - Update source (partial updates)
- `DELETE /api/v1/admin/upstream-sources/{id}` - Delete source
- `POST /api/v1/admin/upstream-sources/{id}/test` - Test connectivity
- Admin-only access with audit logging
- Credentials never exposed (only has_password/has_headers flags)
- 13 integration tests for all CRUD operations
- Added system project restrictions and management (#71)
- System projects (`_npm`, `_pypi`, etc.) cannot be deleted (returns 403)
- System projects cannot be made private (must remain public)
- `GET /api/v1/system-projects` endpoint to list all system cache projects
- 5 integration tests for system project restrictions
- Added Cache API endpoint for fetching and storing artifacts from upstream URLs (#70)
- `POST /api/v1/cache` endpoint to cache artifacts from upstream registries
- URL parsing helpers to extract package name/version from npm, PyPI, Maven URLs
- Automatic system project creation (`_npm`, `_pypi`, `_maven`, etc.)
- URL-to-artifact provenance tracking via `cached_urls` table
- Optional user project cross-referencing for custom organization
- Cache hit returns existing artifact without re-fetching
- Air-gap mode enforcement (blocks public URLs when disabled)
- Hash verification for downloaded artifacts
- 21 unit tests for URL parsing and cache endpoint
- Added HTTP client for fetching artifacts from upstream sources (#69)
- `UpstreamClient` class in `backend/app/upstream.py` with streaming downloads
- SHA256 hash computation while streaming (doesn't load large files into memory)
- Auth support: none, basic auth, bearer token, API key (custom headers)
- URL-to-source matching by URL prefix with priority ordering
- Configuration options: timeouts, retries with exponential backoff, redirect limits, max file size
- Air-gap mode enforcement via `allow_public_internet` setting
- Response header capture for provenance tracking
- Proper error handling with custom exception types
- Connection test method for upstream source validation
- 33 unit tests for client functionality
- Added upstream artifact caching schema for hermetic builds (#68)
- `upstream_sources` table for configuring upstream registries (npm, PyPI, Maven, etc.)
- `cache_settings` table for global settings including air-gap mode
- `cached_urls` table for URL-to-artifact provenance tracking
- `is_system` column on projects for system cache projects (_npm, _pypi, etc.)
- Support for multiple auth types: none, basic auth, bearer token, API key
- Fernet encryption for credentials using `ORCHARD_CACHE_ENCRYPTION_KEY`
- Default upstream sources seeded (npm-public, pypi-public, maven-central, docker-hub) - disabled by default
- Migration `010_upstream_caching.sql`
- Added team-based multi-tenancy for organizing projects and collaboration (#88-#104)
- Teams serve as organizational containers for projects
- Users can belong to multiple teams with different roles (owner, admin, member)

View File

@@ -1,316 +0,0 @@
"""
Cache service for upstream artifact caching.
Provides URL parsing, system project management, and caching logic
for the upstream caching feature.
"""
import logging
import re
from dataclasses import dataclass
from typing import Optional
from urllib.parse import urlparse, unquote
logger = logging.getLogger(__name__)
# System project names for each source type
SYSTEM_PROJECT_NAMES = {
"npm": "_npm",
"pypi": "_pypi",
"maven": "_maven",
"docker": "_docker",
"helm": "_helm",
"nuget": "_nuget",
"deb": "_deb",
"rpm": "_rpm",
"generic": "_generic",
}
# System project descriptions
SYSTEM_PROJECT_DESCRIPTIONS = {
"npm": "System cache for npm packages",
"pypi": "System cache for PyPI packages",
"maven": "System cache for Maven packages",
"docker": "System cache for Docker images",
"helm": "System cache for Helm charts",
"nuget": "System cache for NuGet packages",
"deb": "System cache for Debian packages",
"rpm": "System cache for RPM packages",
"generic": "System cache for generic artifacts",
}
@dataclass
class ParsedUrl:
"""Parsed URL information for caching."""
package_name: str
version: Optional[str] = None
filename: Optional[str] = None
def parse_npm_url(url: str) -> Optional[ParsedUrl]:
"""
Parse npm registry URL to extract package name and version.
Formats:
- https://registry.npmjs.org/{package}/-/{package}-{version}.tgz
- https://registry.npmjs.org/@{scope}/{package}/-/{package}-{version}.tgz
Examples:
- https://registry.npmjs.org/lodash/-/lodash-4.17.21.tgz
- https://registry.npmjs.org/@types/node/-/node-18.0.0.tgz
"""
parsed = urlparse(url)
path = unquote(parsed.path)
# Pattern for scoped packages: /@scope/package/-/package-version.tgz
scoped_pattern = r"^/@([^/]+)/([^/]+)/-/\2-(.+)\.tgz$"
match = re.match(scoped_pattern, path)
if match:
scope, name, version = match.groups()
return ParsedUrl(
package_name=f"@{scope}/{name}",
version=version,
filename=f"{name}-{version}.tgz",
)
# Pattern for unscoped packages: /package/-/package-version.tgz
unscoped_pattern = r"^/([^/@]+)/-/\1-(.+)\.tgz$"
match = re.match(unscoped_pattern, path)
if match:
name, version = match.groups()
return ParsedUrl(
package_name=name,
version=version,
filename=f"{name}-{version}.tgz",
)
return None
def parse_pypi_url(url: str) -> Optional[ParsedUrl]:
"""
Parse PyPI URL to extract package name and version.
Formats:
- https://files.pythonhosted.org/packages/.../package-version.tar.gz
- https://files.pythonhosted.org/packages/.../package-version-py3-none-any.whl
- https://pypi.org/packages/.../package-version.tar.gz
Examples:
- https://files.pythonhosted.org/packages/ab/cd/requests-2.28.0.tar.gz
- https://files.pythonhosted.org/packages/ab/cd/requests-2.28.0-py3-none-any.whl
"""
parsed = urlparse(url)
path = unquote(parsed.path)
# Get the filename from the path
filename = path.split("/")[-1]
if not filename:
return None
# Handle wheel files: package-version-py3-none-any.whl
wheel_pattern = r"^([a-zA-Z0-9_-]+)-(\d+[^-]*)-.*\.whl$"
match = re.match(wheel_pattern, filename)
if match:
name, version = match.groups()
# Normalize package name (PyPI uses underscores internally)
name = name.replace("_", "-").lower()
return ParsedUrl(
package_name=name,
version=version,
filename=filename,
)
# Handle source distributions: package-version.tar.gz or package-version.zip
sdist_pattern = r"^([a-zA-Z0-9_-]+)-(\d+(?:\.\d+)*(?:[a-zA-Z0-9_.+-]*)?)(?:\.tar\.gz|\.zip|\.tar\.bz2)$"
match = re.match(sdist_pattern, filename)
if match:
name, version = match.groups()
name = name.replace("_", "-").lower()
return ParsedUrl(
package_name=name,
version=version,
filename=filename,
)
return None
def parse_maven_url(url: str) -> Optional[ParsedUrl]:
"""
Parse Maven repository URL to extract artifact info.
Format:
- https://repo1.maven.org/maven2/{group}/{artifact}/{version}/{artifact}-{version}.jar
Examples:
- https://repo1.maven.org/maven2/org/apache/commons/commons-lang3/3.12.0/commons-lang3-3.12.0.jar
- https://repo1.maven.org/maven2/com/google/guava/guava/31.1-jre/guava-31.1-jre.jar
"""
parsed = urlparse(url)
path = unquote(parsed.path)
# Find /maven2/ or similar repository path
maven2_idx = path.find("/maven2/")
if maven2_idx >= 0:
path = path[maven2_idx + 8:] # Remove /maven2/
elif path.startswith("/"):
path = path[1:]
parts = path.split("/")
if len(parts) < 4:
return None
# Last part is filename, before that is version, before that is artifact
filename = parts[-1]
version = parts[-2]
artifact = parts[-3]
group = ".".join(parts[:-3])
# Verify filename matches expected pattern
if not filename.startswith(f"{artifact}-{version}"):
return None
return ParsedUrl(
package_name=f"{group}:{artifact}",
version=version,
filename=filename,
)
def parse_docker_url(url: str) -> Optional[ParsedUrl]:
"""
Parse Docker registry URL to extract image info.
Note: Docker registries are more complex (manifests, blobs, etc.)
This handles basic blob/manifest URLs.
Examples:
- https://registry-1.docker.io/v2/library/nginx/blobs/sha256:abc123
- https://registry-1.docker.io/v2/myuser/myimage/manifests/latest
"""
parsed = urlparse(url)
path = unquote(parsed.path)
# Pattern: /v2/{namespace}/{image}/blobs/{digest} or /manifests/{tag}
pattern = r"^/v2/([^/]+(?:/[^/]+)?)/([^/]+)/(blobs|manifests)/(.+)$"
match = re.match(pattern, path)
if match:
namespace, image, artifact_type, reference = match.groups()
if namespace == "library":
package_name = image
else:
package_name = f"{namespace}/{image}"
# For manifests, the reference is the tag
version = reference if artifact_type == "manifests" else None
return ParsedUrl(
package_name=package_name,
version=version,
filename=f"{image}-{reference}" if version else reference,
)
return None
def parse_generic_url(url: str) -> ParsedUrl:
"""
Parse a generic URL to extract filename.
Attempts to extract meaningful package name and version from filename.
Examples:
- https://example.com/downloads/myapp-1.2.3.tar.gz
- https://github.com/user/repo/releases/download/v1.0/release.zip
"""
parsed = urlparse(url)
path = unquote(parsed.path)
filename = path.split("/")[-1] or "artifact"
# List of known compound and simple extensions
known_extensions = [
".tar.gz", ".tar.bz2", ".tar.xz",
".zip", ".tgz", ".gz", ".jar", ".war", ".deb", ".rpm"
]
# Strip extension from filename first
base_name = filename
matched_ext = None
for ext in known_extensions:
if filename.endswith(ext):
base_name = filename[:-len(ext)]
matched_ext = ext
break
if matched_ext is None:
# Unknown extension, return filename as package name
return ParsedUrl(
package_name=filename,
version=None,
filename=filename,
)
# Try to extract version from base_name
# Pattern: name-version or name_version
# Version starts with digit(s) and can include dots, dashes, and alphanumeric suffixes
version_pattern = r"^(.+?)[-_](v?\d+(?:\.\d+)*(?:[-_][a-zA-Z0-9]+)?)$"
match = re.match(version_pattern, base_name)
if match:
name, version = match.groups()
return ParsedUrl(
package_name=name,
version=version,
filename=filename,
)
# No version found, use base_name as package name
return ParsedUrl(
package_name=base_name,
version=None,
filename=filename,
)
def parse_url(url: str, source_type: str) -> ParsedUrl:
"""
Parse URL to extract package name and version based on source type.
Args:
url: The URL to parse.
source_type: The source type (npm, pypi, maven, docker, etc.)
Returns:
ParsedUrl with extracted information.
"""
parsed = None
if source_type == "npm":
parsed = parse_npm_url(url)
elif source_type == "pypi":
parsed = parse_pypi_url(url)
elif source_type == "maven":
parsed = parse_maven_url(url)
elif source_type == "docker":
parsed = parse_docker_url(url)
# Fall back to generic parsing if type-specific parsing fails
if parsed is None:
parsed = parse_generic_url(url)
return parsed
def get_system_project_name(source_type: str) -> str:
"""Get the system project name for a source type."""
return SYSTEM_PROJECT_NAMES.get(source_type, "_generic")
def get_system_project_description(source_type: str) -> str:
"""Get the system project description for a source type."""
return SYSTEM_PROJECT_DESCRIPTIONS.get(
source_type, "System cache for artifacts"
)

View File

@@ -1,8 +1,5 @@
from pydantic_settings import BaseSettings
from functools import lru_cache
from typing import Optional
import os
import re
class Settings(BaseSettings):
@@ -59,11 +56,6 @@ class Settings(BaseSettings):
# Initial admin user settings
admin_password: str = "" # Initial admin password (if empty, uses 'changeme123')
# Cache settings
cache_encryption_key: str = "" # Fernet key for encrypting upstream credentials (auto-generated if empty)
# Global cache settings override (None = use DB value, True/False = override DB)
cache_auto_create_system_projects: Optional[bool] = None # Override auto_create_system_projects
# JWT Authentication settings (optional, for external identity providers)
jwt_enabled: bool = False # Enable JWT token validation
jwt_secret: str = "" # Secret key for HS256, or leave empty for RS256 with JWKS
@@ -96,110 +88,3 @@ class Settings(BaseSettings):
@lru_cache()
def get_settings() -> Settings:
return Settings()
class EnvUpstreamSource:
"""Represents an upstream source defined via environment variables."""
def __init__(
self,
name: str,
url: str,
source_type: str = "generic",
enabled: bool = True,
auth_type: str = "none",
username: Optional[str] = None,
password: Optional[str] = None,
priority: int = 100,
):
self.name = name
self.url = url
self.source_type = source_type
self.enabled = enabled
self.auth_type = auth_type
self.username = username
self.password = password
self.priority = priority
self.source = "env" # Mark as env-defined
def parse_upstream_sources_from_env() -> list[EnvUpstreamSource]:
"""
Parse upstream sources from environment variables.
Uses double underscore (__) as separator to allow source names with single underscores.
Pattern: ORCHARD_UPSTREAM__{NAME}__FIELD
Example:
ORCHARD_UPSTREAM__NPM_PRIVATE__URL=https://npm.corp.com
ORCHARD_UPSTREAM__NPM_PRIVATE__TYPE=npm
ORCHARD_UPSTREAM__NPM_PRIVATE__ENABLED=true
ORCHARD_UPSTREAM__NPM_PRIVATE__AUTH_TYPE=basic
ORCHARD_UPSTREAM__NPM_PRIVATE__USERNAME=reader
ORCHARD_UPSTREAM__NPM_PRIVATE__PASSWORD=secret
Returns:
List of EnvUpstreamSource objects parsed from environment variables.
"""
# Pattern: ORCHARD_UPSTREAM__{NAME}__{FIELD}
pattern = re.compile(r"^ORCHARD_UPSTREAM__([A-Z0-9_]+)__([A-Z_]+)$", re.IGNORECASE)
# Collect all env vars matching the pattern, grouped by source name
sources_data: dict[str, dict[str, str]] = {}
for key, value in os.environ.items():
match = pattern.match(key)
if match:
source_name = match.group(1).lower() # Normalize to lowercase
field = match.group(2).upper()
if source_name not in sources_data:
sources_data[source_name] = {}
sources_data[source_name][field] = value
# Build source objects from collected data
sources: list[EnvUpstreamSource] = []
for name, data in sources_data.items():
# URL is required
url = data.get("URL")
if not url:
continue # Skip sources without URL
# Parse boolean fields
def parse_bool(val: Optional[str], default: bool) -> bool:
if val is None:
return default
return val.lower() in ("true", "1", "yes", "on")
# Parse integer fields
def parse_int(val: Optional[str], default: int) -> int:
if val is None:
return default
try:
return int(val)
except ValueError:
return default
source = EnvUpstreamSource(
name=name.replace("_", "-"), # Convert underscores to hyphens for readability
url=url,
source_type=data.get("TYPE", "generic").lower(),
enabled=parse_bool(data.get("ENABLED"), True),
auth_type=data.get("AUTH_TYPE", "none").lower(),
username=data.get("USERNAME"),
password=data.get("PASSWORD"),
priority=parse_int(data.get("PRIORITY"), 100),
)
sources.append(source)
return sources
@lru_cache()
def get_env_upstream_sources() -> tuple[EnvUpstreamSource, ...]:
"""
Get cached list of upstream sources from environment variables.
Returns a tuple for hashability (required by lru_cache).
"""
return tuple(parse_upstream_sources_from_env())

View File

@@ -9,7 +9,6 @@ import hashlib
from .config import get_settings
from .models import Base
from .purge_seed_data import should_purge_seed_data, purge_seed_data
settings = get_settings()
logger = logging.getLogger(__name__)
@@ -81,14 +80,6 @@ def init_db():
# Run migrations for schema updates
_run_migrations()
# Purge seed data if requested (for transitioning to production-like environment)
if should_purge_seed_data():
db = SessionLocal()
try:
purge_seed_data(db)
finally:
db.close()
def _ensure_migrations_table(conn) -> None:
"""Create the migrations tracking table if it doesn't exist."""
@@ -438,133 +429,6 @@ def _run_migrations():
END $$;
""",
),
Migration(
name="016_add_is_system_to_projects",
sql="""
DO $$
BEGIN
IF NOT EXISTS (
SELECT 1 FROM information_schema.columns
WHERE table_name = 'projects' AND column_name = 'is_system'
) THEN
ALTER TABLE projects ADD COLUMN is_system BOOLEAN NOT NULL DEFAULT FALSE;
CREATE INDEX IF NOT EXISTS idx_projects_is_system ON projects(is_system);
END IF;
END $$;
""",
),
Migration(
name="017_create_upstream_sources",
sql="""
CREATE TABLE IF NOT EXISTS upstream_sources (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
name VARCHAR(255) NOT NULL UNIQUE,
source_type VARCHAR(50) NOT NULL DEFAULT 'generic',
url VARCHAR(2048) NOT NULL,
enabled BOOLEAN NOT NULL DEFAULT FALSE,
auth_type VARCHAR(20) NOT NULL DEFAULT 'none',
username VARCHAR(255),
password_encrypted BYTEA,
headers_encrypted BYTEA,
priority INTEGER NOT NULL DEFAULT 100,
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
CONSTRAINT check_source_type CHECK (
source_type IN ('npm', 'pypi', 'maven', 'docker', 'helm', 'nuget', 'deb', 'rpm', 'generic')
),
CONSTRAINT check_auth_type CHECK (
auth_type IN ('none', 'basic', 'bearer', 'api_key')
),
CONSTRAINT check_priority_positive CHECK (priority > 0)
);
CREATE INDEX IF NOT EXISTS idx_upstream_sources_enabled ON upstream_sources(enabled);
CREATE INDEX IF NOT EXISTS idx_upstream_sources_source_type ON upstream_sources(source_type);
CREATE INDEX IF NOT EXISTS idx_upstream_sources_priority ON upstream_sources(priority);
""",
),
Migration(
name="018_create_cache_settings",
sql="""
CREATE TABLE IF NOT EXISTS cache_settings (
id INTEGER PRIMARY KEY DEFAULT 1,
auto_create_system_projects BOOLEAN NOT NULL DEFAULT TRUE,
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
CONSTRAINT check_cache_settings_singleton CHECK (id = 1)
);
INSERT INTO cache_settings (id, auto_create_system_projects)
VALUES (1, TRUE)
ON CONFLICT (id) DO NOTHING;
""",
),
Migration(
name="019_create_cached_urls",
sql="""
CREATE TABLE IF NOT EXISTS cached_urls (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
url VARCHAR(4096) NOT NULL,
url_hash VARCHAR(64) NOT NULL UNIQUE,
artifact_id VARCHAR(64) NOT NULL REFERENCES artifacts(id),
source_id UUID REFERENCES upstream_sources(id) ON DELETE SET NULL,
fetched_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW(),
response_headers JSONB DEFAULT '{}',
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW()
);
CREATE INDEX IF NOT EXISTS idx_cached_urls_url_hash ON cached_urls(url_hash);
CREATE INDEX IF NOT EXISTS idx_cached_urls_artifact_id ON cached_urls(artifact_id);
CREATE INDEX IF NOT EXISTS idx_cached_urls_source_id ON cached_urls(source_id);
CREATE INDEX IF NOT EXISTS idx_cached_urls_fetched_at ON cached_urls(fetched_at);
""",
),
Migration(
name="020_seed_default_upstream_sources",
sql="""
-- Originally seeded public sources, but these are no longer used.
-- Migration 023 deletes any previously seeded sources.
-- This migration is now a no-op for fresh installs.
SELECT 1;
""",
),
Migration(
name="021_remove_is_public_from_upstream_sources",
sql="""
DO $$
BEGIN
-- Drop the index if it exists
DROP INDEX IF EXISTS idx_upstream_sources_is_public;
-- Drop the column if it exists
IF EXISTS (
SELECT 1 FROM information_schema.columns
WHERE table_name = 'upstream_sources' AND column_name = 'is_public'
) THEN
ALTER TABLE upstream_sources DROP COLUMN is_public;
END IF;
END $$;
""",
),
Migration(
name="022_remove_allow_public_internet_from_cache_settings",
sql="""
DO $$
BEGIN
IF EXISTS (
SELECT 1 FROM information_schema.columns
WHERE table_name = 'cache_settings' AND column_name = 'allow_public_internet'
) THEN
ALTER TABLE cache_settings DROP COLUMN allow_public_internet;
END IF;
END $$;
""",
),
Migration(
name="023_delete_seeded_public_sources",
sql="""
-- Delete the seeded public sources that were added by migration 020
DELETE FROM upstream_sources
WHERE name IN ('npm-public', 'pypi-public', 'maven-central', 'docker-hub');
""",
),
]
with engine.connect() as conn:

View File

@@ -1,160 +0,0 @@
"""
Encryption utilities for sensitive data storage.
Uses Fernet symmetric encryption for credentials like upstream passwords.
The encryption key is sourced from ORCHARD_CACHE_ENCRYPTION_KEY environment variable.
If not set, a random key is generated on startup (with a warning).
"""
import base64
import logging
import os
import secrets
from functools import lru_cache
from typing import Optional
from cryptography.fernet import Fernet, InvalidToken
logger = logging.getLogger(__name__)
# Module-level storage for auto-generated key (only used if env var not set)
_generated_key: Optional[bytes] = None
def _get_key_from_env() -> Optional[bytes]:
"""Get encryption key from environment variable."""
key_str = os.environ.get("ORCHARD_CACHE_ENCRYPTION_KEY", "")
if not key_str:
return None
# Support both raw base64 and url-safe base64 formats
try:
# Try to decode as-is (Fernet keys are url-safe base64)
key_bytes = key_str.encode("utf-8")
# Validate it's a valid Fernet key by trying to create a Fernet instance
Fernet(key_bytes)
return key_bytes
except Exception:
pass
# Try base64 decoding if it's a raw 32-byte key encoded as base64
try:
decoded = base64.urlsafe_b64decode(key_str)
if len(decoded) == 32:
# Re-encode as url-safe base64 for Fernet
key_bytes = base64.urlsafe_b64encode(decoded)
Fernet(key_bytes)
return key_bytes
except Exception:
pass
logger.error(
"ORCHARD_CACHE_ENCRYPTION_KEY is set but invalid. "
"Must be a valid Fernet key (32 bytes, url-safe base64 encoded). "
"Generate one with: python -c \"from cryptography.fernet import Fernet; print(Fernet.generate_key().decode())\""
)
return None
def get_encryption_key() -> bytes:
"""
Get the Fernet encryption key.
Returns the key from ORCHARD_CACHE_ENCRYPTION_KEY if set and valid,
otherwise generates a random key (with a warning logged).
The generated key is cached for the lifetime of the process.
"""
global _generated_key
# Try to get from environment
env_key = _get_key_from_env()
if env_key:
return env_key
# Generate a new key if needed
if _generated_key is None:
_generated_key = Fernet.generate_key()
logger.warning(
"ORCHARD_CACHE_ENCRYPTION_KEY not set - using auto-generated key. "
"Encrypted credentials will be lost on restart! "
"Set ORCHARD_CACHE_ENCRYPTION_KEY for persistent encryption. "
"Generate a key with: python -c \"from cryptography.fernet import Fernet; print(Fernet.generate_key().decode())\""
)
return _generated_key
@lru_cache(maxsize=1)
def _get_fernet() -> Fernet:
"""Get a cached Fernet instance."""
return Fernet(get_encryption_key())
def encrypt_value(plaintext: str) -> bytes:
"""
Encrypt a string value using Fernet.
Args:
plaintext: The string to encrypt
Returns:
Encrypted bytes (includes Fernet token with timestamp)
"""
if not plaintext:
raise ValueError("Cannot encrypt empty value")
fernet = _get_fernet()
return fernet.encrypt(plaintext.encode("utf-8"))
def decrypt_value(ciphertext: bytes) -> str:
"""
Decrypt a Fernet-encrypted value.
Args:
ciphertext: The encrypted bytes
Returns:
Decrypted string
Raises:
InvalidToken: If decryption fails (wrong key or corrupted data)
"""
if not ciphertext:
raise ValueError("Cannot decrypt empty value")
fernet = _get_fernet()
return fernet.decrypt(ciphertext).decode("utf-8")
def can_decrypt(ciphertext: bytes) -> bool:
"""
Check if a value can be decrypted with the current key.
Useful for checking if credentials are still valid after key rotation.
Args:
ciphertext: The encrypted bytes
Returns:
True if decryption succeeds, False otherwise
"""
if not ciphertext:
return False
try:
decrypt_value(ciphertext)
return True
except (InvalidToken, ValueError):
return False
def generate_key() -> str:
"""
Generate a new Fernet encryption key.
Returns:
A valid Fernet key as a string (url-safe base64 encoded)
"""
return Fernet.generate_key().decode("utf-8")

View File

@@ -11,7 +11,6 @@ from slowapi.errors import RateLimitExceeded
from .config import get_settings
from .database import init_db, SessionLocal
from .routes import router
from .pypi_proxy import router as pypi_router
from .seed import seed_database
from .auth import create_default_admin
from .rate_limit import limiter
@@ -66,7 +65,6 @@ app.add_exception_handler(RateLimitExceeded, _rate_limit_exceeded_handler)
# Include API routes
app.include_router(router)
app.include_router(pypi_router)
# Serve static files (React build) if the directory exists
static_dir = os.path.join(os.path.dirname(__file__), "..", "..", "frontend", "dist")

View File

@@ -12,7 +12,6 @@ from sqlalchemy import (
Index,
JSON,
ARRAY,
LargeBinary,
)
from sqlalchemy.dialects.postgresql import UUID
from sqlalchemy.orm import relationship, declarative_base
@@ -28,7 +27,6 @@ class Project(Base):
name = Column(String(255), unique=True, nullable=False)
description = Column(Text)
is_public = Column(Boolean, default=True)
is_system = Column(Boolean, default=False, nullable=False)
created_at = Column(DateTime(timezone=True), default=datetime.utcnow)
updated_at = Column(
DateTime(timezone=True), default=datetime.utcnow, onupdate=datetime.utcnow
@@ -48,7 +46,6 @@ class Project(Base):
Index("idx_projects_name", "name"),
Index("idx_projects_created_by", "created_by"),
Index("idx_projects_team_id", "team_id"),
Index("idx_projects_is_system", "is_system"),
)
@@ -640,166 +637,3 @@ class TeamMembership(Base):
name="check_team_role",
),
)
# =============================================================================
# Upstream Caching Models
# =============================================================================
# Valid source types for upstream registries
SOURCE_TYPES = ["npm", "pypi", "maven", "docker", "helm", "nuget", "deb", "rpm", "generic"]
# Valid authentication types
AUTH_TYPES = ["none", "basic", "bearer", "api_key"]
class UpstreamSource(Base):
"""Configuration for an upstream artifact registry.
Stores connection details and authentication for upstream registries
like npm, PyPI, Maven Central, or private Artifactory instances.
"""
__tablename__ = "upstream_sources"
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
name = Column(String(255), unique=True, nullable=False)
source_type = Column(String(50), default="generic", nullable=False)
url = Column(String(2048), nullable=False)
enabled = Column(Boolean, default=False, nullable=False)
auth_type = Column(String(20), default="none", nullable=False)
username = Column(String(255))
password_encrypted = Column(LargeBinary)
headers_encrypted = Column(LargeBinary)
priority = Column(Integer, default=100, nullable=False)
created_at = Column(DateTime(timezone=True), default=datetime.utcnow)
updated_at = Column(
DateTime(timezone=True), default=datetime.utcnow, onupdate=datetime.utcnow
)
# Relationships
cached_urls = relationship("CachedUrl", back_populates="source")
__table_args__ = (
Index("idx_upstream_sources_enabled", "enabled"),
Index("idx_upstream_sources_source_type", "source_type"),
Index("idx_upstream_sources_priority", "priority"),
CheckConstraint(
"source_type IN ('npm', 'pypi', 'maven', 'docker', 'helm', 'nuget', 'deb', 'rpm', 'generic')",
name="check_source_type",
),
CheckConstraint(
"auth_type IN ('none', 'basic', 'bearer', 'api_key')",
name="check_auth_type",
),
CheckConstraint("priority > 0", name="check_priority_positive"),
)
def set_password(self, password: str) -> None:
"""Encrypt and store a password/token."""
from .encryption import encrypt_value
if password:
self.password_encrypted = encrypt_value(password)
else:
self.password_encrypted = None
def get_password(self) -> str | None:
"""Decrypt and return the stored password/token."""
from .encryption import decrypt_value
if self.password_encrypted:
try:
return decrypt_value(self.password_encrypted)
except Exception:
return None
return None
def has_password(self) -> bool:
"""Check if a password/token is stored."""
return self.password_encrypted is not None
def set_headers(self, headers: dict) -> None:
"""Encrypt and store custom headers as JSON."""
from .encryption import encrypt_value
import json
if headers:
self.headers_encrypted = encrypt_value(json.dumps(headers))
else:
self.headers_encrypted = None
def get_headers(self) -> dict | None:
"""Decrypt and return custom headers."""
from .encryption import decrypt_value
import json
if self.headers_encrypted:
try:
return json.loads(decrypt_value(self.headers_encrypted))
except Exception:
return None
return None
class CacheSettings(Base):
"""Global cache settings (singleton table).
Controls behavior of the upstream caching system.
"""
__tablename__ = "cache_settings"
id = Column(Integer, primary_key=True, default=1)
auto_create_system_projects = Column(Boolean, default=True, nullable=False)
created_at = Column(DateTime(timezone=True), default=datetime.utcnow)
updated_at = Column(
DateTime(timezone=True), default=datetime.utcnow, onupdate=datetime.utcnow
)
__table_args__ = (
CheckConstraint("id = 1", name="check_cache_settings_singleton"),
)
class CachedUrl(Base):
"""Tracks URL to artifact mappings for provenance.
Records which URLs have been cached and maps them to their stored artifacts.
Enables "is this URL already cached?" lookups and audit trails.
"""
__tablename__ = "cached_urls"
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
url = Column(String(4096), nullable=False)
url_hash = Column(String(64), unique=True, nullable=False)
artifact_id = Column(
String(64), ForeignKey("artifacts.id"), nullable=False
)
source_id = Column(
UUID(as_uuid=True),
ForeignKey("upstream_sources.id", ondelete="SET NULL"),
)
fetched_at = Column(DateTime(timezone=True), default=datetime.utcnow, nullable=False)
response_headers = Column(JSON, default=dict)
created_at = Column(DateTime(timezone=True), default=datetime.utcnow)
# Relationships
artifact = relationship("Artifact")
source = relationship("UpstreamSource", back_populates="cached_urls")
__table_args__ = (
Index("idx_cached_urls_url_hash", "url_hash"),
Index("idx_cached_urls_artifact_id", "artifact_id"),
Index("idx_cached_urls_source_id", "source_id"),
Index("idx_cached_urls_fetched_at", "fetched_at"),
)
@staticmethod
def compute_url_hash(url: str) -> str:
"""Compute SHA256 hash of a URL for fast lookups."""
import hashlib
return hashlib.sha256(url.encode("utf-8")).hexdigest()

View File

@@ -1,212 +0,0 @@
"""
Purge seed/demo data from the database.
This is used when transitioning an environment from dev/test to production-like.
Triggered by setting ORCHARD_PURGE_SEED_DATA=true environment variable.
"""
import logging
import os
from sqlalchemy.orm import Session
from .models import (
Project,
Package,
Artifact,
Tag,
Upload,
PackageVersion,
ArtifactDependency,
Team,
TeamMembership,
User,
AccessPermission,
)
from .storage import get_storage
logger = logging.getLogger(__name__)
# Seed data identifiers (from seed.py)
SEED_PROJECT_NAMES = [
"frontend-libs",
"backend-services",
"mobile-apps",
"internal-tools",
]
SEED_TEAM_SLUG = "demo-team"
SEED_USERNAMES = [
"alice",
"bob",
"charlie",
"diana",
"eve",
"frank",
]
def should_purge_seed_data() -> bool:
"""Check if seed data should be purged based on environment variable."""
return os.environ.get("ORCHARD_PURGE_SEED_DATA", "").lower() == "true"
def purge_seed_data(db: Session) -> dict:
"""
Purge all seed/demo data from the database.
Returns a dict with counts of deleted items.
"""
logger.warning("PURGING SEED DATA - This will delete demo projects, users, and teams")
results = {
"dependencies_deleted": 0,
"tags_deleted": 0,
"versions_deleted": 0,
"uploads_deleted": 0,
"artifacts_deleted": 0,
"packages_deleted": 0,
"projects_deleted": 0,
"permissions_deleted": 0,
"team_memberships_deleted": 0,
"users_deleted": 0,
"teams_deleted": 0,
"s3_objects_deleted": 0,
}
storage = get_storage()
# Find seed projects
seed_projects = db.query(Project).filter(Project.name.in_(SEED_PROJECT_NAMES)).all()
seed_project_ids = [p.id for p in seed_projects]
if not seed_projects:
logger.info("No seed projects found, nothing to purge")
return results
logger.info(f"Found {len(seed_projects)} seed projects to purge")
# Find packages in seed projects
seed_packages = db.query(Package).filter(Package.project_id.in_(seed_project_ids)).all()
seed_package_ids = [p.id for p in seed_packages]
# Find artifacts in seed packages (via uploads)
seed_uploads = db.query(Upload).filter(Upload.package_id.in_(seed_package_ids)).all()
seed_artifact_ids = list(set(u.artifact_id for u in seed_uploads))
# Delete in order (respecting foreign keys)
# 1. Delete artifact dependencies
if seed_artifact_ids:
count = db.query(ArtifactDependency).filter(
ArtifactDependency.artifact_id.in_(seed_artifact_ids)
).delete(synchronize_session=False)
results["dependencies_deleted"] = count
logger.info(f"Deleted {count} artifact dependencies")
# 2. Delete tags
if seed_package_ids:
count = db.query(Tag).filter(Tag.package_id.in_(seed_package_ids)).delete(
synchronize_session=False
)
results["tags_deleted"] = count
logger.info(f"Deleted {count} tags")
# 3. Delete package versions
if seed_package_ids:
count = db.query(PackageVersion).filter(
PackageVersion.package_id.in_(seed_package_ids)
).delete(synchronize_session=False)
results["versions_deleted"] = count
logger.info(f"Deleted {count} package versions")
# 4. Delete uploads
if seed_package_ids:
count = db.query(Upload).filter(Upload.package_id.in_(seed_package_ids)).delete(
synchronize_session=False
)
results["uploads_deleted"] = count
logger.info(f"Deleted {count} uploads")
# 5. Delete S3 objects for seed artifacts
if seed_artifact_ids:
seed_artifacts = db.query(Artifact).filter(Artifact.id.in_(seed_artifact_ids)).all()
for artifact in seed_artifacts:
if artifact.s3_key:
try:
storage.client.delete_object(Bucket=storage.bucket, Key=artifact.s3_key)
results["s3_objects_deleted"] += 1
except Exception as e:
logger.warning(f"Failed to delete S3 object {artifact.s3_key}: {e}")
logger.info(f"Deleted {results['s3_objects_deleted']} S3 objects")
# 6. Delete artifacts (only those with ref_count that would be 0 after our deletions)
# Since we deleted all tags/versions pointing to these artifacts, we can delete them
if seed_artifact_ids:
count = db.query(Artifact).filter(Artifact.id.in_(seed_artifact_ids)).delete(
synchronize_session=False
)
results["artifacts_deleted"] = count
logger.info(f"Deleted {count} artifacts")
# 7. Delete packages
if seed_package_ids:
count = db.query(Package).filter(Package.id.in_(seed_package_ids)).delete(
synchronize_session=False
)
results["packages_deleted"] = count
logger.info(f"Deleted {count} packages")
# 8. Delete access permissions for seed projects
if seed_project_ids:
count = db.query(AccessPermission).filter(
AccessPermission.project_id.in_(seed_project_ids)
).delete(synchronize_session=False)
results["permissions_deleted"] = count
logger.info(f"Deleted {count} access permissions")
# 9. Delete seed projects
count = db.query(Project).filter(Project.name.in_(SEED_PROJECT_NAMES)).delete(
synchronize_session=False
)
results["projects_deleted"] = count
logger.info(f"Deleted {count} projects")
# 10. Find and delete seed team
seed_team = db.query(Team).filter(Team.slug == SEED_TEAM_SLUG).first()
if seed_team:
# Delete team memberships first
count = db.query(TeamMembership).filter(
TeamMembership.team_id == seed_team.id
).delete(synchronize_session=False)
results["team_memberships_deleted"] = count
logger.info(f"Deleted {count} team memberships")
# Delete the team
db.delete(seed_team)
results["teams_deleted"] = 1
logger.info(f"Deleted team: {SEED_TEAM_SLUG}")
# 11. Delete seed users (but NOT admin)
seed_users = db.query(User).filter(User.username.in_(SEED_USERNAMES)).all()
for user in seed_users:
# Delete any remaining team memberships for this user
db.query(TeamMembership).filter(TeamMembership.user_id == user.id).delete(
synchronize_session=False
)
# Delete any access permissions for this user
# Note: AccessPermission.user_id is VARCHAR (username), not UUID
db.query(AccessPermission).filter(AccessPermission.user_id == user.username).delete(
synchronize_session=False
)
db.delete(user)
results["users_deleted"] += 1
if results["users_deleted"] > 0:
logger.info(f"Deleted {results['users_deleted']} seed users")
db.commit()
logger.warning("SEED DATA PURGE COMPLETE")
logger.info(f"Purge results: {results}")
return results

View File

@@ -1,780 +0,0 @@
"""
Transparent PyPI proxy implementing PEP 503 (Simple API).
Provides endpoints that allow pip to use Orchard as a PyPI index URL.
Artifacts are cached on first access through configured upstream sources.
"""
import hashlib
import logging
import re
import tarfile
import zipfile
from io import BytesIO
from typing import Optional, List, Tuple
from urllib.parse import urljoin, urlparse, quote, unquote
import httpx
from fastapi import APIRouter, Depends, HTTPException, Request, Response
from fastapi.responses import StreamingResponse, HTMLResponse
from sqlalchemy.orm import Session
from .database import get_db
from .models import UpstreamSource, CachedUrl, Artifact, Project, Package, Tag, PackageVersion, ArtifactDependency
from .storage import S3Storage, get_storage
from .config import get_env_upstream_sources
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/pypi", tags=["pypi-proxy"])
def _parse_requires_dist(requires_dist: str) -> Tuple[str, Optional[str]]:
"""Parse a Requires-Dist line into (package_name, version_constraint).
Examples:
"requests (>=2.25.0)" -> ("requests", ">=2.25.0")
"typing-extensions; python_version < '3.8'" -> ("typing-extensions", None)
"numpy>=1.21.0" -> ("numpy", ">=1.21.0")
"certifi" -> ("certifi", None)
Returns:
Tuple of (normalized_package_name, version_constraint or None)
"""
# Remove any environment markers (after semicolon)
if ';' in requires_dist:
requires_dist = requires_dist.split(';')[0].strip()
# Match patterns like "package (>=1.0)" or "package>=1.0" or "package"
# Pattern breakdown: package name, optional whitespace, optional version in parens or directly
match = re.match(
r'^([a-zA-Z0-9][-a-zA-Z0-9._]*)\s*(?:\(([^)]+)\)|([<>=!~][^\s;]+))?',
requires_dist.strip()
)
if not match:
return None, None
package_name = match.group(1)
# Version can be in parentheses (group 2) or directly after name (group 3)
version_constraint = match.group(2) or match.group(3)
# Normalize package name (PEP 503)
normalized_name = re.sub(r'[-_.]+', '-', package_name).lower()
# Clean up version constraint
if version_constraint:
version_constraint = version_constraint.strip()
return normalized_name, version_constraint
def _extract_requires_from_metadata(metadata_content: str) -> List[Tuple[str, Optional[str]]]:
"""Extract all Requires-Dist entries from METADATA/PKG-INFO content.
Args:
metadata_content: The content of a METADATA or PKG-INFO file
Returns:
List of (package_name, version_constraint) tuples
"""
dependencies = []
for line in metadata_content.split('\n'):
if line.startswith('Requires-Dist:'):
# Extract the value after "Requires-Dist:"
value = line[len('Requires-Dist:'):].strip()
pkg_name, version = _parse_requires_dist(value)
if pkg_name:
dependencies.append((pkg_name, version))
return dependencies
def _extract_metadata_from_wheel(content: bytes) -> Optional[str]:
"""Extract METADATA file content from a wheel (zip) file.
Wheel files have structure: {package}-{version}.dist-info/METADATA
Args:
content: The wheel file content as bytes
Returns:
METADATA file content as string, or None if not found
"""
try:
with zipfile.ZipFile(BytesIO(content)) as zf:
# Find the .dist-info directory
for name in zf.namelist():
if name.endswith('.dist-info/METADATA'):
return zf.read(name).decode('utf-8', errors='replace')
except Exception as e:
logger.warning(f"Failed to extract metadata from wheel: {e}")
return None
def _extract_metadata_from_sdist(content: bytes, filename: str) -> Optional[str]:
"""Extract PKG-INFO file content from a source distribution (.tar.gz).
Source distributions have structure: {package}-{version}/PKG-INFO
Args:
content: The tarball content as bytes
filename: The original filename (used to determine package name)
Returns:
PKG-INFO file content as string, or None if not found
"""
try:
with tarfile.open(fileobj=BytesIO(content), mode='r:gz') as tf:
# Find PKG-INFO in the root directory of the archive
for member in tf.getmembers():
if member.name.endswith('/PKG-INFO') and member.name.count('/') == 1:
f = tf.extractfile(member)
if f:
return f.read().decode('utf-8', errors='replace')
except Exception as e:
logger.warning(f"Failed to extract metadata from sdist {filename}: {e}")
return None
def _extract_dependencies(content: bytes, filename: str) -> List[Tuple[str, Optional[str]]]:
"""Extract dependencies from a PyPI package file.
Supports wheel (.whl) and source distribution (.tar.gz) formats.
Args:
content: The package file content as bytes
filename: The original filename
Returns:
List of (package_name, version_constraint) tuples
"""
metadata = None
if filename.endswith('.whl'):
metadata = _extract_metadata_from_wheel(content)
elif filename.endswith('.tar.gz'):
metadata = _extract_metadata_from_sdist(content, filename)
if metadata:
return _extract_requires_from_metadata(metadata)
return []
# Timeout configuration for proxy requests
PROXY_CONNECT_TIMEOUT = 30.0
PROXY_READ_TIMEOUT = 60.0
def _extract_pypi_version(filename: str) -> Optional[str]:
"""Extract version from PyPI filename.
Handles formats like:
- cowsay-6.1-py3-none-any.whl
- cowsay-1.0.tar.gz
- some_package-1.2.3.post1-cp39-cp39-linux_x86_64.whl
"""
# Remove extension
if filename.endswith('.whl'):
# Wheel: name-version-pytag-abitag-platform.whl
parts = filename[:-4].split('-')
if len(parts) >= 2:
return parts[1]
elif filename.endswith('.tar.gz'):
# Source: name-version.tar.gz
base = filename[:-7]
# Find the last hyphen that precedes a version-like string
match = re.match(r'^(.+)-(\d+.*)$', base)
if match:
return match.group(2)
elif filename.endswith('.zip'):
# Egg/zip: name-version.zip
base = filename[:-4]
match = re.match(r'^(.+)-(\d+.*)$', base)
if match:
return match.group(2)
return None
def _get_pypi_upstream_sources(db: Session) -> list[UpstreamSource]:
"""Get all enabled upstream sources configured for PyPI."""
# Get database sources
db_sources = (
db.query(UpstreamSource)
.filter(
UpstreamSource.source_type == "pypi",
UpstreamSource.enabled == True,
)
.order_by(UpstreamSource.priority)
.all()
)
# Get env sources
env_sources = [
s for s in get_env_upstream_sources()
if s.source_type == "pypi" and s.enabled
]
# Combine and sort by priority
all_sources = list(db_sources) + list(env_sources)
return sorted(all_sources, key=lambda s: s.priority)
def _build_auth_headers(source) -> dict:
"""Build authentication headers for an upstream source."""
headers = {}
if hasattr(source, 'auth_type'):
if source.auth_type == "bearer":
password = source.get_password() if hasattr(source, 'get_password') else getattr(source, 'password', None)
if password:
headers["Authorization"] = f"Bearer {password}"
elif source.auth_type == "api_key":
custom_headers = source.get_headers() if hasattr(source, 'get_headers') else {}
if custom_headers:
headers.update(custom_headers)
return headers
def _get_basic_auth(source) -> Optional[tuple[str, str]]:
"""Get basic auth credentials if applicable."""
if hasattr(source, 'auth_type') and source.auth_type == "basic":
username = getattr(source, 'username', None)
if username:
password = source.get_password() if hasattr(source, 'get_password') else getattr(source, 'password', '')
return (username, password or '')
return None
def _get_base_url(request: Request) -> str:
"""
Get the external base URL, respecting X-Forwarded-Proto header.
When behind a reverse proxy that terminates SSL, the request.base_url
will show http:// even though the external URL is https://. This function
checks the X-Forwarded-Proto header to determine the correct scheme.
"""
base_url = str(request.base_url).rstrip('/')
# Check for X-Forwarded-Proto header (set by reverse proxies)
forwarded_proto = request.headers.get('x-forwarded-proto')
if forwarded_proto:
# Replace the scheme with the forwarded protocol
parsed = urlparse(base_url)
base_url = f"{forwarded_proto}://{parsed.netloc}{parsed.path}"
return base_url
def _rewrite_package_links(html: str, base_url: str, package_name: str, upstream_base_url: str) -> str:
"""
Rewrite download links in a PyPI simple page to go through our proxy.
Args:
html: The HTML content from upstream
base_url: Our server's base URL
package_name: The package name for the URL path
upstream_base_url: The upstream URL used to fetch this page (for resolving relative URLs)
Returns:
HTML with rewritten download links
"""
# Pattern to match href attributes in anchor tags
# PyPI simple pages have links like:
# <a href="https://files.pythonhosted.org/packages/.../file.tar.gz#sha256=...">file.tar.gz</a>
# Or relative URLs from Artifactory like:
# <a href="../../packages/packages/62/35/.../requests-0.10.0.tar.gz#sha256=...">
def replace_href(match):
original_url = match.group(1)
# Resolve relative URLs to absolute using the upstream base URL
if not original_url.startswith(('http://', 'https://')):
# Split off fragment before resolving
url_without_fragment = original_url.split('#')[0]
fragment_part = original_url[len(url_without_fragment):]
absolute_url = urljoin(upstream_base_url, url_without_fragment) + fragment_part
else:
absolute_url = original_url
# Extract the filename from the URL
parsed = urlparse(absolute_url)
path_parts = parsed.path.split('/')
filename = path_parts[-1] if path_parts else ''
# Keep the hash fragment if present
fragment = f"#{parsed.fragment}" if parsed.fragment else ""
# Encode the absolute URL (without fragment) for safe transmission
encoded_url = quote(absolute_url.split('#')[0], safe='')
# Build new URL pointing to our proxy
new_url = f"{base_url}/pypi/simple/{package_name}/{filename}?upstream={encoded_url}{fragment}"
return f'href="{new_url}"'
# Match href="..." patterns
rewritten = re.sub(r'href="([^"]+)"', replace_href, html)
return rewritten
@router.get("/simple/")
async def pypi_simple_index(
request: Request,
db: Session = Depends(get_db),
):
"""
PyPI Simple API index - lists all packages.
Proxies to the first available upstream PyPI source.
"""
sources = _get_pypi_upstream_sources(db)
if not sources:
raise HTTPException(
status_code=503,
detail="No PyPI upstream sources configured"
)
# Try each source in priority order
last_error = None
for source in sources:
try:
headers = {"User-Agent": "Orchard-PyPI-Proxy/1.0"}
headers.update(_build_auth_headers(source))
auth = _get_basic_auth(source)
# Use URL as-is - users should provide full path including /simple
simple_url = source.url.rstrip('/') + '/'
timeout = httpx.Timeout(PROXY_READ_TIMEOUT, connect=PROXY_CONNECT_TIMEOUT)
with httpx.Client(timeout=timeout, follow_redirects=False) as client:
response = client.get(
simple_url,
headers=headers,
auth=auth,
)
# Handle redirects manually to avoid loops
if response.status_code in (301, 302, 303, 307, 308):
redirect_url = response.headers.get('location')
if redirect_url:
# Follow the redirect once
response = client.get(
redirect_url,
headers=headers,
auth=auth,
follow_redirects=False,
)
if response.status_code == 200:
# Return the index as-is (links are to package pages, not files)
# We could rewrite these too, but for now just proxy
content = response.text
# Rewrite package links to go through our proxy
base_url = _get_base_url(request)
content = re.sub(
r'href="([^"]+)/"',
lambda m: f'href="{base_url}/pypi/simple/{m.group(1)}/"',
content
)
return HTMLResponse(content=content)
last_error = f"HTTP {response.status_code}"
except httpx.ConnectError as e:
last_error = f"Connection failed: {e}"
logger.warning(f"PyPI proxy: failed to connect to {source.url}: {e}")
except httpx.TimeoutException as e:
last_error = f"Timeout: {e}"
logger.warning(f"PyPI proxy: timeout connecting to {source.url}: {e}")
except Exception as e:
last_error = str(e)
logger.warning(f"PyPI proxy: error fetching from {source.url}: {e}")
raise HTTPException(
status_code=502,
detail=f"Failed to fetch package index from upstream: {last_error}"
)
@router.get("/simple/{package_name}/")
async def pypi_package_versions(
request: Request,
package_name: str,
db: Session = Depends(get_db),
):
"""
PyPI Simple API package page - lists all versions/files for a package.
Proxies to upstream and rewrites download links to go through our cache.
"""
sources = _get_pypi_upstream_sources(db)
if not sources:
raise HTTPException(
status_code=503,
detail="No PyPI upstream sources configured"
)
base_url = _get_base_url(request)
# Normalize package name (PEP 503)
normalized_name = re.sub(r'[-_.]+', '-', package_name).lower()
# Try each source in priority order
last_error = None
for source in sources:
try:
headers = {"User-Agent": "Orchard-PyPI-Proxy/1.0"}
headers.update(_build_auth_headers(source))
auth = _get_basic_auth(source)
# Use URL as-is - users should provide full path including /simple
package_url = source.url.rstrip('/') + f'/{normalized_name}/'
final_url = package_url # Track final URL after redirects
timeout = httpx.Timeout(PROXY_READ_TIMEOUT, connect=PROXY_CONNECT_TIMEOUT)
with httpx.Client(timeout=timeout, follow_redirects=False) as client:
response = client.get(
package_url,
headers=headers,
auth=auth,
)
# Handle redirects manually
redirect_count = 0
while response.status_code in (301, 302, 303, 307, 308) and redirect_count < 5:
redirect_url = response.headers.get('location')
if not redirect_url:
break
# Make redirect URL absolute if needed
if not redirect_url.startswith('http'):
redirect_url = urljoin(final_url, redirect_url)
final_url = redirect_url # Update final URL
response = client.get(
redirect_url,
headers=headers,
auth=auth,
follow_redirects=False,
)
redirect_count += 1
if response.status_code == 200:
content = response.text
# Rewrite download links to go through our proxy
# Pass final_url so relative URLs can be resolved correctly
content = _rewrite_package_links(content, base_url, normalized_name, final_url)
return HTMLResponse(content=content)
if response.status_code == 404:
# Package not found in this source, try next
last_error = f"Package not found in {source.name}"
continue
last_error = f"HTTP {response.status_code}"
except httpx.ConnectError as e:
last_error = f"Connection failed: {e}"
logger.warning(f"PyPI proxy: failed to connect to {source.url}: {e}")
except httpx.TimeoutException as e:
last_error = f"Timeout: {e}"
logger.warning(f"PyPI proxy: timeout connecting to {source.url}: {e}")
except Exception as e:
last_error = str(e)
logger.warning(f"PyPI proxy: error fetching {package_name} from {source.url}: {e}")
raise HTTPException(
status_code=404,
detail=f"Package '{package_name}' not found: {last_error}"
)
@router.get("/simple/{package_name}/{filename}")
async def pypi_download_file(
request: Request,
package_name: str,
filename: str,
upstream: Optional[str] = None,
db: Session = Depends(get_db),
storage: S3Storage = Depends(get_storage),
):
"""
Download a package file, caching it in Orchard.
Args:
package_name: The package name
filename: The filename to download
upstream: URL-encoded upstream URL to fetch from
"""
if not upstream:
raise HTTPException(
status_code=400,
detail="Missing 'upstream' query parameter with source URL"
)
# Decode the upstream URL
upstream_url = unquote(upstream)
# Check if we already have this URL cached
url_hash = hashlib.sha256(upstream_url.encode()).hexdigest()
cached_url = db.query(CachedUrl).filter(CachedUrl.url_hash == url_hash).first()
if cached_url:
# Serve from cache
artifact = db.query(Artifact).filter(Artifact.id == cached_url.artifact_id).first()
if artifact:
logger.info(f"PyPI proxy: serving cached {filename} (artifact {artifact.id[:12]})")
# Stream from S3
try:
stream, content_length, _ = storage.get_stream(artifact.s3_key)
def stream_content():
"""Generator that yields chunks from the S3 stream."""
try:
for chunk in stream.iter_chunks():
yield chunk
finally:
stream.close()
return StreamingResponse(
stream_content(),
media_type=artifact.content_type or "application/octet-stream",
headers={
"Content-Disposition": f'attachment; filename="{filename}"',
"Content-Length": str(content_length),
"X-Checksum-SHA256": artifact.id,
"X-Cache": "HIT",
}
)
except Exception as e:
logger.error(f"PyPI proxy: error streaming cached artifact: {e}")
# Fall through to fetch from upstream
# Not cached - fetch from upstream
sources = _get_pypi_upstream_sources(db)
# Use the first available source for authentication headers
# Note: The upstream URL may point to files.pythonhosted.org or other CDNs,
# not the configured source URL directly, so we can't strictly validate the host
matched_source = sources[0] if sources else None
try:
headers = {"User-Agent": "Orchard-PyPI-Proxy/1.0"}
if matched_source:
headers.update(_build_auth_headers(matched_source))
auth = _get_basic_auth(matched_source) if matched_source else None
timeout = httpx.Timeout(300.0, connect=PROXY_CONNECT_TIMEOUT) # 5 minutes for large files
# Fetch the file
logger.info(f"PyPI proxy: fetching {filename} from {upstream_url}")
with httpx.Client(timeout=timeout, follow_redirects=False) as client:
response = client.get(
upstream_url,
headers=headers,
auth=auth,
)
# Handle redirects manually
redirect_count = 0
while response.status_code in (301, 302, 303, 307, 308) and redirect_count < 5:
redirect_url = response.headers.get('location')
if not redirect_url:
break
if not redirect_url.startswith('http'):
redirect_url = urljoin(upstream_url, redirect_url)
logger.info(f"PyPI proxy: following redirect to {redirect_url}")
# Don't send auth to different hosts
redirect_headers = {"User-Agent": "Orchard-PyPI-Proxy/1.0"}
redirect_auth = None
if urlparse(redirect_url).netloc == urlparse(upstream_url).netloc:
redirect_headers.update(headers)
redirect_auth = auth
response = client.get(
redirect_url,
headers=redirect_headers,
auth=redirect_auth,
follow_redirects=False,
)
redirect_count += 1
if response.status_code != 200:
raise HTTPException(
status_code=response.status_code,
detail=f"Upstream returned {response.status_code}"
)
content = response.content
content_type = response.headers.get('content-type', 'application/octet-stream')
# Store in S3 (computes hash and deduplicates automatically)
from io import BytesIO
result = storage.store(BytesIO(content))
sha256 = result.sha256
size = result.size
logger.info(f"PyPI proxy: downloaded {filename}, {size} bytes, sha256={sha256[:12]}")
# Check if artifact already exists
existing = db.query(Artifact).filter(Artifact.id == sha256).first()
if existing:
# Increment ref count
existing.ref_count += 1
db.flush()
else:
# Create artifact record
new_artifact = Artifact(
id=sha256,
original_name=filename,
content_type=content_type,
size=size,
ref_count=1,
created_by="pypi-proxy",
s3_key=result.s3_key,
checksum_md5=result.md5,
checksum_sha1=result.sha1,
s3_etag=result.s3_etag,
)
db.add(new_artifact)
db.flush()
# Create/get system project and package
system_project = db.query(Project).filter(Project.name == "_pypi").first()
if not system_project:
system_project = Project(
name="_pypi",
description="System project for cached PyPI packages",
is_public=True,
is_system=True,
created_by="pypi-proxy",
)
db.add(system_project)
db.flush()
elif not system_project.is_system:
# Ensure existing project is marked as system
system_project.is_system = True
db.flush()
# Normalize package name
normalized_name = re.sub(r'[-_.]+', '-', package_name).lower()
package = db.query(Package).filter(
Package.project_id == system_project.id,
Package.name == normalized_name,
).first()
if not package:
package = Package(
project_id=system_project.id,
name=normalized_name,
description=f"PyPI package: {normalized_name}",
format="pypi",
)
db.add(package)
db.flush()
# Create tag with filename
existing_tag = db.query(Tag).filter(
Tag.package_id == package.id,
Tag.name == filename,
).first()
if not existing_tag:
tag = Tag(
package_id=package.id,
name=filename,
artifact_id=sha256,
created_by="pypi-proxy",
)
db.add(tag)
# Extract and create version
# Only create version for actual package files, not .metadata files
version = _extract_pypi_version(filename)
if version and not filename.endswith('.metadata'):
# Check by version string (the unique constraint is on package_id + version)
existing_version = db.query(PackageVersion).filter(
PackageVersion.package_id == package.id,
PackageVersion.version == version,
).first()
if not existing_version:
pkg_version = PackageVersion(
package_id=package.id,
artifact_id=sha256,
version=version,
version_source="filename",
created_by="pypi-proxy",
)
db.add(pkg_version)
# Cache the URL mapping
existing_cached = db.query(CachedUrl).filter(CachedUrl.url_hash == url_hash).first()
if not existing_cached:
cached_url_record = CachedUrl(
url_hash=url_hash,
url=upstream_url,
artifact_id=sha256,
)
db.add(cached_url_record)
# Extract and store dependencies
dependencies = _extract_dependencies(content, filename)
if dependencies:
logger.info(f"PyPI proxy: extracted {len(dependencies)} dependencies from {filename}")
for dep_name, dep_version in dependencies:
# Check if this dependency already exists for this artifact
existing_dep = db.query(ArtifactDependency).filter(
ArtifactDependency.artifact_id == sha256,
ArtifactDependency.dependency_project == "_pypi",
ArtifactDependency.dependency_package == dep_name,
).first()
if not existing_dep:
dep = ArtifactDependency(
artifact_id=sha256,
dependency_project="_pypi",
dependency_package=dep_name,
version_constraint=dep_version if dep_version else "*",
)
db.add(dep)
db.commit()
# Return the file
return Response(
content=content,
media_type=content_type,
headers={
"Content-Disposition": f'attachment; filename="{filename}"',
"Content-Length": str(size),
"X-Checksum-SHA256": sha256,
"X-Cache": "MISS",
}
)
except httpx.ConnectError as e:
raise HTTPException(status_code=502, detail=f"Connection failed: {e}")
except httpx.TimeoutException as e:
raise HTTPException(status_code=504, detail=f"Timeout: {e}")
except HTTPException:
raise
except Exception as e:
logger.exception(f"PyPI proxy: error downloading {filename}")
raise HTTPException(status_code=500, detail=str(e))

File diff suppressed because it is too large Load Diff

View File

@@ -33,7 +33,6 @@ class ProjectResponse(BaseModel):
name: str
description: Optional[str]
is_public: bool
is_system: bool = False
created_at: datetime
updated_at: datetime
created_by: str
@@ -1197,277 +1196,3 @@ class TeamMemberResponse(BaseModel):
class Config:
from_attributes = True
# =============================================================================
# Upstream Caching Schemas
# =============================================================================
# Valid source types
SOURCE_TYPES = ["npm", "pypi", "maven", "docker", "helm", "nuget", "deb", "rpm", "generic"]
# Valid auth types
AUTH_TYPES = ["none", "basic", "bearer", "api_key"]
class UpstreamSourceCreate(BaseModel):
"""Create a new upstream source"""
name: str
source_type: str = "generic"
url: str
enabled: bool = False
auth_type: str = "none"
username: Optional[str] = None
password: Optional[str] = None # Write-only
headers: Optional[dict] = None # Write-only, custom headers
priority: int = 100
@field_validator('name')
@classmethod
def validate_name(cls, v: str) -> str:
v = v.strip()
if not v:
raise ValueError("name cannot be empty")
if len(v) > 255:
raise ValueError("name must be 255 characters or less")
return v
@field_validator('source_type')
@classmethod
def validate_source_type(cls, v: str) -> str:
if v not in SOURCE_TYPES:
raise ValueError(f"source_type must be one of: {', '.join(SOURCE_TYPES)}")
return v
@field_validator('url')
@classmethod
def validate_url(cls, v: str) -> str:
v = v.strip()
if not v:
raise ValueError("url cannot be empty")
if not (v.startswith('http://') or v.startswith('https://')):
raise ValueError("url must start with http:// or https://")
if len(v) > 2048:
raise ValueError("url must be 2048 characters or less")
return v
@field_validator('auth_type')
@classmethod
def validate_auth_type(cls, v: str) -> str:
if v not in AUTH_TYPES:
raise ValueError(f"auth_type must be one of: {', '.join(AUTH_TYPES)}")
return v
@field_validator('priority')
@classmethod
def validate_priority(cls, v: int) -> int:
if v <= 0:
raise ValueError("priority must be greater than 0")
return v
class UpstreamSourceUpdate(BaseModel):
"""Update an upstream source (partial)"""
name: Optional[str] = None
source_type: Optional[str] = None
url: Optional[str] = None
enabled: Optional[bool] = None
auth_type: Optional[str] = None
username: Optional[str] = None
password: Optional[str] = None # Write-only, None = keep existing, empty string = clear
headers: Optional[dict] = None # Write-only
priority: Optional[int] = None
@field_validator('name')
@classmethod
def validate_name(cls, v: Optional[str]) -> Optional[str]:
if v is not None:
v = v.strip()
if not v:
raise ValueError("name cannot be empty")
if len(v) > 255:
raise ValueError("name must be 255 characters or less")
return v
@field_validator('source_type')
@classmethod
def validate_source_type(cls, v: Optional[str]) -> Optional[str]:
if v is not None and v not in SOURCE_TYPES:
raise ValueError(f"source_type must be one of: {', '.join(SOURCE_TYPES)}")
return v
@field_validator('url')
@classmethod
def validate_url(cls, v: Optional[str]) -> Optional[str]:
if v is not None:
v = v.strip()
if not v:
raise ValueError("url cannot be empty")
if not (v.startswith('http://') or v.startswith('https://')):
raise ValueError("url must start with http:// or https://")
if len(v) > 2048:
raise ValueError("url must be 2048 characters or less")
return v
@field_validator('auth_type')
@classmethod
def validate_auth_type(cls, v: Optional[str]) -> Optional[str]:
if v is not None and v not in AUTH_TYPES:
raise ValueError(f"auth_type must be one of: {', '.join(AUTH_TYPES)}")
return v
@field_validator('priority')
@classmethod
def validate_priority(cls, v: Optional[int]) -> Optional[int]:
if v is not None and v <= 0:
raise ValueError("priority must be greater than 0")
return v
class UpstreamSourceResponse(BaseModel):
"""Upstream source response (credentials never included)"""
id: UUID
name: str
source_type: str
url: str
enabled: bool
auth_type: str
username: Optional[str]
has_password: bool # True if password is set
has_headers: bool # True if custom headers are set
priority: int
source: str = "database" # "database" or "env" (env = defined via environment variables)
created_at: Optional[datetime] = None # May be None for legacy/env data
updated_at: Optional[datetime] = None # May be None for legacy/env data
class Config:
from_attributes = True
class CacheSettingsResponse(BaseModel):
"""Global cache settings response"""
auto_create_system_projects: bool
auto_create_system_projects_env_override: Optional[bool] = None # Set if overridden by env var
created_at: Optional[datetime] = None # May be None for legacy data
updated_at: Optional[datetime] = None # May be None for legacy data
class Config:
from_attributes = True
class CacheSettingsUpdate(BaseModel):
"""Update cache settings (partial)"""
auto_create_system_projects: Optional[bool] = None
class CachedUrlResponse(BaseModel):
"""Cached URL response"""
id: UUID
url: str
url_hash: str
artifact_id: str
source_id: Optional[UUID]
source_name: Optional[str] = None # Populated from join
fetched_at: datetime
created_at: datetime
class Config:
from_attributes = True
class CacheRequest(BaseModel):
"""Request to cache an artifact from an upstream URL"""
url: str
source_type: str
package_name: Optional[str] = None # Auto-derived from URL if not provided
tag: Optional[str] = None # Auto-derived from URL if not provided
user_project: Optional[str] = None # Cross-reference to user project
user_package: Optional[str] = None
user_tag: Optional[str] = None
expected_hash: Optional[str] = None # Verify downloaded content
@field_validator('url')
@classmethod
def validate_url(cls, v: str) -> str:
v = v.strip()
if not v:
raise ValueError("url cannot be empty")
if not (v.startswith('http://') or v.startswith('https://')):
raise ValueError("url must start with http:// or https://")
if len(v) > 4096:
raise ValueError("url must be 4096 characters or less")
return v
@field_validator('source_type')
@classmethod
def validate_source_type(cls, v: str) -> str:
if v not in SOURCE_TYPES:
raise ValueError(f"source_type must be one of: {', '.join(SOURCE_TYPES)}")
return v
@field_validator('expected_hash')
@classmethod
def validate_expected_hash(cls, v: Optional[str]) -> Optional[str]:
if v is not None:
v = v.strip().lower()
# Remove sha256: prefix if present
if v.startswith('sha256:'):
v = v[7:]
# Validate hex format
if len(v) != 64 or not all(c in '0123456789abcdef' for c in v):
raise ValueError("expected_hash must be a 64-character hex string (SHA256)")
return v
class CacheResponse(BaseModel):
"""Response from caching an artifact"""
artifact_id: str
sha256: str
size: int
content_type: Optional[str]
already_cached: bool
source_url: str
source_name: Optional[str]
system_project: str
system_package: str
system_tag: Optional[str]
user_reference: Optional[str] = None # e.g., "my-app/npm-deps:lodash-4.17.21"
class CacheResolveRequest(BaseModel):
"""Request to cache an artifact by package coordinates (no URL required).
The server will construct the appropriate URL based on source_type and
configured upstream sources.
"""
source_type: str
package: str
version: str
user_project: Optional[str] = None
user_package: Optional[str] = None
user_tag: Optional[str] = None
@field_validator('source_type')
@classmethod
def validate_source_type(cls, v: str) -> str:
if v not in SOURCE_TYPES:
raise ValueError(f"source_type must be one of: {', '.join(SOURCE_TYPES)}")
return v
@field_validator('package')
@classmethod
def validate_package(cls, v: str) -> str:
v = v.strip()
if not v:
raise ValueError("package cannot be empty")
return v
@field_validator('version')
@classmethod
def validate_version(cls, v: str) -> str:
v = v.strip()
if not v:
raise ValueError("version cannot be empty")
return v

View File

@@ -1,565 +0,0 @@
"""
HTTP client for fetching artifacts from upstream sources.
Provides streaming downloads with SHA256 computation, authentication support,
and automatic source matching based on URL prefixes.
"""
from __future__ import annotations
import hashlib
import logging
import tempfile
import time
from dataclasses import dataclass, field
from pathlib import Path
from typing import BinaryIO, Optional, TYPE_CHECKING
from urllib.parse import urlparse
import httpx
if TYPE_CHECKING:
from .models import CacheSettings, UpstreamSource
logger = logging.getLogger(__name__)
class UpstreamError(Exception):
"""Base exception for upstream client errors."""
pass
class UpstreamConnectionError(UpstreamError):
"""Connection to upstream failed (network error, DNS, etc.)."""
pass
class UpstreamTimeoutError(UpstreamError):
"""Request to upstream timed out."""
pass
class UpstreamHTTPError(UpstreamError):
"""Upstream returned an HTTP error response."""
def __init__(self, message: str, status_code: int, response_headers: dict = None):
super().__init__(message)
self.status_code = status_code
self.response_headers = response_headers or {}
class UpstreamSSLError(UpstreamError):
"""SSL/TLS error when connecting to upstream."""
pass
class FileSizeExceededError(UpstreamError):
"""File size exceeds the maximum allowed."""
def __init__(self, message: str, content_length: int, max_size: int):
super().__init__(message)
self.content_length = content_length
self.max_size = max_size
class SourceNotFoundError(UpstreamError):
"""No matching upstream source found for URL."""
pass
class SourceDisabledError(UpstreamError):
"""The matching upstream source is disabled."""
pass
@dataclass
class FetchResult:
"""Result of fetching an artifact from upstream."""
content: BinaryIO # File-like object with content
sha256: str # SHA256 hash of content
size: int # Size in bytes
content_type: Optional[str] # Content-Type header
response_headers: dict # All response headers for provenance
source_name: Optional[str] = None # Name of matched upstream source
temp_path: Optional[Path] = None # Path to temp file (for cleanup)
def close(self):
"""Close and clean up resources."""
if self.content:
try:
self.content.close()
except Exception:
pass
if self.temp_path and self.temp_path.exists():
try:
self.temp_path.unlink()
except Exception:
pass
@dataclass
class UpstreamClientConfig:
"""Configuration for the upstream client."""
connect_timeout: float = 30.0 # Connection timeout in seconds
read_timeout: float = 300.0 # Read timeout in seconds (5 minutes for large files)
max_retries: int = 3 # Maximum number of retry attempts
retry_backoff_base: float = 1.0 # Base delay for exponential backoff
retry_backoff_max: float = 30.0 # Maximum delay between retries
follow_redirects: bool = True # Whether to follow redirects
max_redirects: int = 5 # Maximum number of redirects to follow
max_file_size: Optional[int] = None # Maximum file size (None = unlimited)
verify_ssl: bool = True # Verify SSL certificates
user_agent: str = "Orchard-UpstreamClient/1.0"
class UpstreamClient:
"""
HTTP client for fetching artifacts from upstream sources.
Supports streaming downloads, multiple authentication methods,
automatic source matching, and air-gap mode enforcement.
"""
def __init__(
self,
sources: list[UpstreamSource] = None,
cache_settings: CacheSettings = None,
config: UpstreamClientConfig = None,
):
"""
Initialize the upstream client.
Args:
sources: List of upstream sources for URL matching and auth.
Should be sorted by priority (lowest first).
cache_settings: Global cache settings including air-gap mode.
config: Client configuration options.
"""
self.sources = sources or []
self.cache_settings = cache_settings
self.config = config or UpstreamClientConfig()
# Sort sources by priority (lower = higher priority)
self.sources = sorted(self.sources, key=lambda s: s.priority)
def _match_source(self, url: str) -> Optional[UpstreamSource]:
"""
Find the upstream source that matches the given URL.
Matches by URL prefix, returns the highest priority match.
Args:
url: The URL to match.
Returns:
The matching UpstreamSource or None if no match.
"""
for source in self.sources:
# Check if URL starts with source URL (prefix match)
if url.startswith(source.url.rstrip("/")):
return source
return None
def _build_auth_headers(self, source: UpstreamSource) -> dict:
"""
Build authentication headers for the given source.
Args:
source: The upstream source with auth configuration.
Returns:
Dictionary of headers to add to the request.
"""
headers = {}
if source.auth_type == "none":
pass
elif source.auth_type == "basic":
# httpx handles basic auth via auth parameter, but we can also
# do it manually if needed. We'll use the auth parameter instead.
pass
elif source.auth_type == "bearer":
password = source.get_password()
if password:
headers["Authorization"] = f"Bearer {password}"
elif source.auth_type == "api_key":
# API key auth uses custom headers
custom_headers = source.get_headers()
if custom_headers:
headers.update(custom_headers)
return headers
def _get_basic_auth(self, source: UpstreamSource) -> Optional[tuple[str, str]]:
"""
Get basic auth credentials if applicable.
Args:
source: The upstream source.
Returns:
Tuple of (username, password) or None.
"""
if source.auth_type == "basic" and source.username:
password = source.get_password() or ""
return (source.username, password)
return None
def _should_retry(self, error: Exception, attempt: int) -> bool:
"""
Determine if a request should be retried.
Args:
error: The exception that occurred.
attempt: Current attempt number (0-indexed).
Returns:
True if the request should be retried.
"""
if attempt >= self.config.max_retries - 1:
return False
# Retry on connection errors and timeouts
if isinstance(error, (httpx.ConnectError, httpx.ConnectTimeout)):
return True
# Retry on read timeouts
if isinstance(error, httpx.ReadTimeout):
return True
# Retry on certain HTTP errors (502, 503, 504)
if isinstance(error, httpx.HTTPStatusError):
return error.response.status_code in (502, 503, 504)
return False
def _calculate_backoff(self, attempt: int) -> float:
"""
Calculate backoff delay for retry.
Uses exponential backoff with jitter.
Args:
attempt: Current attempt number (0-indexed).
Returns:
Delay in seconds.
"""
import random
delay = self.config.retry_backoff_base * (2**attempt)
# Add jitter (±25%)
delay *= 0.75 + random.random() * 0.5
return min(delay, self.config.retry_backoff_max)
def fetch(self, url: str, expected_hash: Optional[str] = None) -> FetchResult:
"""
Fetch an artifact from the given URL.
Streams the response to a temp file while computing the SHA256 hash.
Handles authentication, retries, and error cases.
Args:
url: The URL to fetch.
expected_hash: Optional expected SHA256 hash for verification.
Returns:
FetchResult with content, hash, size, and headers.
Raises:
SourceDisabledError: If the matching source is disabled.
UpstreamConnectionError: On connection failures.
UpstreamTimeoutError: On timeout.
UpstreamHTTPError: On HTTP error responses.
UpstreamSSLError: On SSL/TLS errors.
FileSizeExceededError: If Content-Length exceeds max_file_size.
"""
start_time = time.time()
# Match URL to source
source = self._match_source(url)
# Check if source is enabled (if we have a match)
if source is not None and not source.enabled:
raise SourceDisabledError(
f"Upstream source '{source.name}' is disabled"
)
source_name = source.name if source else None
logger.info(
f"Fetching URL: {url} (source: {source_name or 'none'})"
)
# Build request parameters
headers = {"User-Agent": self.config.user_agent}
auth = None
if source:
headers.update(self._build_auth_headers(source))
auth = self._get_basic_auth(source)
timeout = httpx.Timeout(
connect=self.config.connect_timeout,
read=self.config.read_timeout,
write=30.0,
pool=10.0,
)
# Attempt fetch with retries
last_error = None
for attempt in range(self.config.max_retries):
try:
return self._do_fetch(
url=url,
headers=headers,
auth=auth,
timeout=timeout,
source_name=source_name,
start_time=start_time,
expected_hash=expected_hash,
)
except (
httpx.ConnectError,
httpx.ConnectTimeout,
httpx.ReadTimeout,
httpx.HTTPStatusError,
) as e:
last_error = e
if self._should_retry(e, attempt):
delay = self._calculate_backoff(attempt)
logger.warning(
f"Fetch failed (attempt {attempt + 1}/{self.config.max_retries}), "
f"retrying in {delay:.1f}s: {e}"
)
time.sleep(delay)
else:
break
# Convert final error to our exception types
self._raise_upstream_error(last_error, url)
def _do_fetch(
self,
url: str,
headers: dict,
auth: Optional[tuple[str, str]],
timeout: httpx.Timeout,
source_name: Optional[str],
start_time: float,
expected_hash: Optional[str] = None,
) -> FetchResult:
"""
Perform the actual fetch operation.
Args:
url: URL to fetch.
headers: Request headers.
auth: Basic auth credentials or None.
timeout: Request timeout configuration.
source_name: Name of matched source for logging.
start_time: Request start time for timing.
expected_hash: Optional expected hash for verification.
Returns:
FetchResult with content and metadata.
"""
with httpx.Client(
timeout=timeout,
follow_redirects=self.config.follow_redirects,
max_redirects=self.config.max_redirects,
verify=self.config.verify_ssl,
) as client:
with client.stream("GET", url, headers=headers, auth=auth) as response:
# Check for HTTP errors
response.raise_for_status()
# Check Content-Length against max size
content_length = response.headers.get("content-length")
if content_length:
content_length = int(content_length)
if (
self.config.max_file_size
and content_length > self.config.max_file_size
):
raise FileSizeExceededError(
f"File size {content_length} exceeds maximum {self.config.max_file_size}",
content_length,
self.config.max_file_size,
)
# Stream to temp file while computing hash
hasher = hashlib.sha256()
size = 0
# Create temp file
temp_file = tempfile.NamedTemporaryFile(
delete=False, prefix="orchard_upstream_"
)
temp_path = Path(temp_file.name)
try:
for chunk in response.iter_bytes(chunk_size=65536):
temp_file.write(chunk)
hasher.update(chunk)
size += len(chunk)
# Check size while streaming if max_file_size is set
if self.config.max_file_size and size > self.config.max_file_size:
temp_file.close()
temp_path.unlink()
raise FileSizeExceededError(
f"Downloaded size {size} exceeds maximum {self.config.max_file_size}",
size,
self.config.max_file_size,
)
temp_file.close()
sha256 = hasher.hexdigest()
# Verify hash if expected
if expected_hash and sha256 != expected_hash.lower():
temp_path.unlink()
raise UpstreamError(
f"Hash mismatch: expected {expected_hash}, got {sha256}"
)
# Capture response headers
response_headers = dict(response.headers)
# Get content type
content_type = response.headers.get("content-type")
elapsed = time.time() - start_time
logger.info(
f"Fetched {url}: {size} bytes, sha256={sha256[:12]}..., "
f"source={source_name}, time={elapsed:.2f}s"
)
# Return file handle positioned at start
content = open(temp_path, "rb")
return FetchResult(
content=content,
sha256=sha256,
size=size,
content_type=content_type,
response_headers=response_headers,
source_name=source_name,
temp_path=temp_path,
)
except Exception:
# Clean up on error
try:
temp_file.close()
except Exception:
pass
if temp_path.exists():
temp_path.unlink()
raise
def _raise_upstream_error(self, error: Exception, url: str):
"""
Convert httpx exception to appropriate UpstreamError.
Args:
error: The httpx exception.
url: The URL that was being fetched.
Raises:
Appropriate UpstreamError subclass.
"""
if error is None:
raise UpstreamError(f"Unknown error fetching {url}")
if isinstance(error, httpx.ConnectError):
raise UpstreamConnectionError(
f"Failed to connect to upstream: {error}"
) from error
if isinstance(error, (httpx.ConnectTimeout, httpx.ReadTimeout)):
raise UpstreamTimeoutError(
f"Request timed out: {error}"
) from error
if isinstance(error, httpx.HTTPStatusError):
raise UpstreamHTTPError(
f"HTTP {error.response.status_code}: {error}",
error.response.status_code,
dict(error.response.headers),
) from error
# Check for SSL errors in the error chain
if "ssl" in str(error).lower() or "certificate" in str(error).lower():
raise UpstreamSSLError(f"SSL/TLS error: {error}") from error
raise UpstreamError(f"Error fetching {url}: {error}") from error
def test_connection(self, source: UpstreamSource) -> tuple[bool, Optional[str], Optional[int]]:
"""
Test connectivity to an upstream source.
Performs a HEAD request to the source URL to verify connectivity
and authentication. Does not follow redirects - a 3xx response
is considered successful since it proves the server is reachable.
Args:
source: The upstream source to test.
Returns:
Tuple of (success, error_message, status_code).
"""
headers = {"User-Agent": self.config.user_agent}
headers.update(self._build_auth_headers(source))
auth = self._get_basic_auth(source)
timeout = httpx.Timeout(
connect=self.config.connect_timeout,
read=30.0,
write=30.0,
pool=10.0,
)
try:
with httpx.Client(
timeout=timeout,
verify=self.config.verify_ssl,
) as client:
response = client.head(
source.url,
headers=headers,
auth=auth,
follow_redirects=False,
)
# Consider 2xx and 3xx as success, also 405 (Method Not Allowed)
# since some servers don't support HEAD
if response.status_code < 400 or response.status_code == 405:
return (True, None, response.status_code)
else:
return (
False,
f"HTTP {response.status_code}",
response.status_code,
)
except httpx.ConnectError as e:
return (False, f"Connection failed: {e}", None)
except httpx.ConnectTimeout as e:
return (False, f"Connection timed out: {e}", None)
except httpx.ReadTimeout as e:
return (False, f"Read timed out: {e}", None)
except httpx.TooManyRedirects as e:
return (False, f"Too many redirects: {e}", None)
except Exception as e:
return (False, f"Error: {e}", None)

View File

@@ -11,10 +11,10 @@ python-jose[cryptography]==3.3.0
passlib[bcrypt]==1.7.4
bcrypt==4.0.1
slowapi==0.1.9
httpx>=0.25.0
# Test dependencies
pytest>=7.4.0
pytest-asyncio>=0.21.0
pytest-cov>=4.1.0
httpx>=0.25.0
moto[s3]>=4.2.0

View File

@@ -128,9 +128,7 @@ class TestProjectListingFilters:
assert response.status_code == 200
data = response.json()
# Filter out system projects (names starting with "_") as they may have
# collation-specific sort behavior and aren't part of the test data
names = [p["name"] for p in data["items"] if not p["name"].startswith("_")]
names = [p["name"] for p in data["items"]]
assert names == sorted(names)

View File

@@ -1,137 +0,0 @@
"""Integration tests for PyPI transparent proxy."""
import os
import pytest
import httpx
def get_base_url():
"""Get the base URL for the Orchard server from environment."""
return os.environ.get("ORCHARD_TEST_URL", "http://localhost:8080")
class TestPyPIProxyEndpoints:
"""Tests for PyPI proxy endpoints.
These endpoints are public (no auth required) since pip needs to use them.
"""
@pytest.mark.integration
def test_pypi_simple_index(self):
"""Test that /pypi/simple/ returns HTML response."""
with httpx.Client(base_url=get_base_url(), timeout=30.0) as client:
response = client.get("/pypi/simple/")
# Returns 200 if sources configured, 503 if not
assert response.status_code in (200, 503)
if response.status_code == 200:
assert "text/html" in response.headers.get("content-type", "")
else:
assert "No PyPI upstream sources configured" in response.json()["detail"]
@pytest.mark.integration
def test_pypi_package_endpoint(self):
"""Test that /pypi/simple/{package}/ returns appropriate response."""
with httpx.Client(base_url=get_base_url(), timeout=30.0) as client:
response = client.get("/pypi/simple/requests/")
# Returns 200 if sources configured and package found,
# 404 if package not found, 503 if no sources
assert response.status_code in (200, 404, 503)
if response.status_code == 200:
assert "text/html" in response.headers.get("content-type", "")
elif response.status_code == 404:
assert "not found" in response.json()["detail"].lower()
else: # 503
assert "No PyPI upstream sources configured" in response.json()["detail"]
@pytest.mark.integration
def test_pypi_download_missing_upstream_param(self):
"""Test that /pypi/simple/{package}/{filename} requires upstream param."""
with httpx.Client(base_url=get_base_url(), timeout=30.0) as client:
response = client.get("/pypi/simple/requests/requests-2.31.0.tar.gz")
assert response.status_code == 400
assert "upstream" in response.json()["detail"].lower()
class TestPyPILinkRewriting:
"""Tests for URL rewriting in PyPI proxy responses."""
def test_rewrite_package_links(self):
"""Test that download links are rewritten to go through proxy."""
from app.pypi_proxy import _rewrite_package_links
html = '''
<html>
<body>
<a href="https://files.pythonhosted.org/packages/ab/cd/requests-2.31.0.tar.gz#sha256=abc123">requests-2.31.0.tar.gz</a>
<a href="https://files.pythonhosted.org/packages/ef/gh/requests-2.31.0-py3-none-any.whl#sha256=def456">requests-2.31.0-py3-none-any.whl</a>
</body>
</html>
'''
# upstream_base_url is used to resolve relative URLs (not needed here since URLs are absolute)
result = _rewrite_package_links(
html,
"http://localhost:8080",
"requests",
"https://pypi.org/simple/requests/"
)
# Links should be rewritten to go through our proxy
assert "/pypi/simple/requests/requests-2.31.0.tar.gz?upstream=" in result
assert "/pypi/simple/requests/requests-2.31.0-py3-none-any.whl?upstream=" in result
# Original URLs should be encoded in upstream param
assert "files.pythonhosted.org" in result
# Hash fragments should be preserved
assert "#sha256=abc123" in result
assert "#sha256=def456" in result
def test_rewrite_relative_links(self):
"""Test that relative URLs are resolved to absolute URLs."""
from app.pypi_proxy import _rewrite_package_links
# Artifactory-style relative URLs
html = '''
<html>
<body>
<a href="../../packages/ab/cd/requests-2.31.0.tar.gz#sha256=abc123">requests-2.31.0.tar.gz</a>
</body>
</html>
'''
result = _rewrite_package_links(
html,
"https://orchard.example.com",
"requests",
"https://artifactory.example.com/api/pypi/pypi-remote/simple/requests/"
)
# The relative URL should be resolved to absolute
# ../../packages/ab/cd/... from /api/pypi/pypi-remote/simple/requests/ resolves to /api/pypi/pypi-remote/packages/ab/cd/...
assert "upstream=https%3A%2F%2Fartifactory.example.com%2Fapi%2Fpypi%2Fpypi-remote%2Fpackages" in result
# Hash fragment should be preserved
assert "#sha256=abc123" in result
class TestPyPIPackageNormalization:
"""Tests for PyPI package name normalization."""
@pytest.mark.integration
def test_package_name_normalized(self):
"""Test that package names are normalized per PEP 503.
Different capitalizations/separators should all be valid paths.
The endpoint normalizes to lowercase with hyphens before lookup.
"""
with httpx.Client(base_url=get_base_url(), timeout=30.0) as client:
# Test various name formats - all should be valid endpoint paths
for package_name in ["Requests", "some_package", "some-package"]:
response = client.get(f"/pypi/simple/{package_name}/")
# 200 = found, 404 = not found, 503 = no sources configured
assert response.status_code in (200, 404, 503), \
f"Unexpected status {response.status_code} for {package_name}"
# Verify response is appropriate for the status code
if response.status_code == 200:
assert "text/html" in response.headers.get("content-type", "")
elif response.status_code == 503:
assert "No PyPI upstream sources configured" in response.json()["detail"]

File diff suppressed because it is too large Load Diff

View File

@@ -1,672 +0,0 @@
# Epic: Upstream Artifact Caching for Hermetic Builds
## Overview
Orchard will act as a permanent, content-addressable cache for upstream artifacts (npm, PyPI, Maven, Docker, etc.). Once an artifact is cached, it is stored forever by SHA256 hash - enabling reproducible builds years later regardless of whether the upstream source still exists.
## Problem Statement
Build reproducibility is critical for enterprise environments:
- Packages get deleted, yanked, or modified upstream
- Registries go down or change URLs
- Version constraints resolve differently over time
- Air-gapped environments cannot access public internet
Teams need to guarantee that a build from 5 years ago produces the exact same output today.
## Solution
Orchard becomes "the cache that never forgets":
1. **Fetch once, store forever** - When a build needs `lodash@4.17.21`, Orchard fetches it from npm, stores it by SHA256 hash, and never deletes it
2. **Content-addressable** - Same hash = same bytes, guaranteed
3. **Format-agnostic** - Orchard doesn't need to understand npm/PyPI/Maven protocols; the client provides the URL, Orchard fetches and stores
4. **Air-gap support** - Disable public internet entirely, only allow configured private upstreams
## User Workflow
```
1. Build tool resolves dependencies npm install / pip install / mvn resolve
2. Generate lockfile with URLs package-lock.json / requirements.txt
3. Cache all URLs in Orchard orchard cache --file urls.txt
4. Pin by SHA256 hash lodash = "sha256:abc123..."
5. Future builds fetch by hash Always get exact same bytes
```
## Key Features
- **Multiple upstream sources** - Configure npm, PyPI, Maven Central, private Artifactory, etc.
- **Per-source authentication** - Basic auth, bearer tokens, API keys
- **System cache projects** - `_npm`, `_pypi`, `_maven` organize cached packages by format
- **Cross-referencing** - Link cached artifacts to user projects for visibility
- **URL tracking** - Know which URLs map to which hashes, audit provenance
- **Air-gap mode** - Global kill switch for all public internet access
- **Environment variable config** - 12-factor friendly for containerized deployments
## Architecture
```
┌─────────────────────────────────────────────────────────────────┐
│ Orchard Server │
├─────────────────────────────────────────────────────────────────┤
│ POST /api/v1/cache │
│ ├── Check if URL already cached (url_hash lookup) │
│ ├── Match URL to upstream source (get auth) │
│ ├── Fetch via UpstreamClient (stream + compute SHA256) │
│ ├── Store artifact in S3 (content-addressable) │
│ ├── Create tag in system project (_npm/lodash:4.17.21) │
│ ├── Optionally create tag in user project │
│ └── Record in cached_urls table (provenance) │
├─────────────────────────────────────────────────────────────────┤
│ Tables │
│ ├── upstream_sources (npm-public, pypi-public, artifactory) │
│ ├── cache_settings (allow_public_internet, etc.) │
│ ├── cached_urls (url → artifact_id mapping) │
│ └── projects.is_system (for _npm, _pypi, etc.) │
└─────────────────────────────────────────────────────────────────┘
```
## Issues Summary
| Issue | Title | Status | Dependencies |
|-------|-------|--------|--------------|
| #68 | Schema: Upstream Sources & Cache Tracking | ✅ Complete | None |
| #69 | HTTP Client: Generic URL Fetcher | Pending | None |
| #70 | Cache API Endpoint | Pending | #68, #69 |
| #71 | System Projects (Cache Namespaces) | Pending | #68, #70 |
| #72 | Upstream Sources Admin API | Pending | #68 |
| #73 | Global Cache Settings API | Pending | #68 |
| #74 | Environment Variable Overrides | Pending | #68, #72, #73 |
| #75 | Frontend: Upstream Sources Management | Pending | #72, #73 |
| #105 | Frontend: System Projects Integration | Pending | #71 |
| #77 | CLI: Cache Command | Pending | #70 |
## Implementation Phases
**Phase 1 - Core (MVP):**
- #68 Schema ✅
- #69 HTTP Client
- #70 Cache API
- #71 System Projects
**Phase 2 - Admin:**
- #72 Upstream Sources API
- #73 Cache Settings API
- #74 Environment Variables
**Phase 3 - Frontend:**
- #75 Upstream Sources UI
- #105 System Projects UI
**Phase 4 - CLI:**
- #77 Cache Command
---
# Issue #68: Schema - Upstream Sources & Cache Tracking
**Status: ✅ Complete**
## Description
Create database schema for flexible multi-source upstream configuration and URL-to-artifact tracking. This replaces the previous singleton proxy_config design with a more flexible model supporting multiple upstream sources, air-gap mode, and provenance tracking.
## Acceptance Criteria
- [x] `upstream_sources` table:
- id (UUID, primary key)
- name (VARCHAR(255), unique, e.g., "npm-public", "artifactory-private")
- source_type (VARCHAR(50), enum: npm, pypi, maven, docker, helm, nuget, deb, rpm, generic)
- url (VARCHAR(2048), base URL of upstream)
- enabled (BOOLEAN, default false)
- is_public (BOOLEAN, true if this is a public internet source)
- auth_type (VARCHAR(20), enum: none, basic, bearer, api_key)
- username (VARCHAR(255), nullable)
- password_encrypted (BYTEA, nullable, Fernet encrypted)
- headers_encrypted (BYTEA, nullable, for custom headers like API keys)
- priority (INTEGER, default 100, lower = checked first)
- created_at, updated_at timestamps
- [x] `cache_settings` table (singleton, id always 1):
- id (INTEGER, primary key, check id = 1)
- allow_public_internet (BOOLEAN, default true, air-gap kill switch)
- auto_create_system_projects (BOOLEAN, default true)
- created_at, updated_at timestamps
- [x] `cached_urls` table:
- id (UUID, primary key)
- url (VARCHAR(4096), original URL fetched)
- url_hash (VARCHAR(64), SHA256 of URL for fast lookup, indexed)
- artifact_id (VARCHAR(64), FK to artifacts)
- source_id (UUID, FK to upstream_sources, nullable for manual imports)
- fetched_at (TIMESTAMP WITH TIME ZONE)
- response_headers (JSONB, original upstream headers for provenance)
- created_at timestamp
- [x] Add `is_system` BOOLEAN column to projects table (default false)
- [x] Migration SQL file in migrations/
- [x] Runtime migration in database.py
- [x] SQLAlchemy models for all new tables
- [x] Pydantic schemas for API input/output (passwords write-only)
- [x] Encryption helpers for password/headers fields
- [x] Seed default upstream sources (disabled by default):
- npm-public: https://registry.npmjs.org
- pypi-public: https://pypi.org/simple
- maven-central: https://repo1.maven.org/maven2
- docker-hub: https://registry-1.docker.io
- [x] Unit tests for models and schemas
## Files Modified
- `migrations/010_upstream_caching.sql`
- `backend/app/database.py` (migrations 016-020)
- `backend/app/models.py` (UpstreamSource, CacheSettings, CachedUrl, Project.is_system)
- `backend/app/schemas.py` (all caching schemas)
- `backend/app/encryption.py` (renamed env var)
- `backend/app/config.py` (renamed setting)
- `backend/tests/test_upstream_caching.py` (37 tests)
- `frontend/src/components/Layout.tsx` (footer tagline)
- `CHANGELOG.md`
---
# Issue #69: HTTP Client - Generic URL Fetcher
**Status: Pending**
## Description
Create a reusable HTTP client for fetching artifacts from upstream sources. Supports multiple auth methods, streaming for large files, and computes SHA256 while downloading.
## Acceptance Criteria
- [ ] `UpstreamClient` class in `backend/app/upstream.py`
- [ ] `fetch(url)` method that:
- Streams response body (doesn't load large files into memory)
- Computes SHA256 hash while streaming
- Returns file content, hash, size, and response headers
- [ ] Auth support based on upstream source configuration:
- None (anonymous)
- Basic auth (username/password)
- Bearer token (Authorization: Bearer {token})
- API key (custom header name/value)
- [ ] URL-to-source matching:
- Match URL to configured upstream source by URL prefix
- Apply auth from matched source
- Respect source priority for multiple matches
- [ ] Configuration options:
- Timeout (connect and read, default 30s/300s)
- Max retries (default 3)
- Follow redirects (default true, max 5)
- Max file size (reject if Content-Length exceeds limit)
- [ ] Respect `allow_public_internet` setting:
- If false, reject URLs matching `is_public=true` sources
- If false, reject URLs not matching any configured source
- [ ] Capture response headers for provenance tracking
- [ ] Proper error handling:
- Connection errors (retry with backoff)
- HTTP errors (4xx, 5xx)
- Timeout errors
- SSL/TLS errors
- [ ] Logging for debugging (URL, source matched, status, timing)
- [ ] Unit tests with mocked HTTP responses
- [ ] Integration tests against httpbin.org or similar (optional, marked)
## Technical Notes
- Use `httpx` for async HTTP support (already in requirements)
- Stream to temp file to avoid memory issues with large artifacts
- Consider checksum verification if upstream provides it (e.g., npm provides shasum)
---
# Issue #70: Cache API Endpoint
**Status: Pending**
## Description
API endpoint to cache an artifact from an upstream URL. This is the core endpoint that fetches from upstream, stores in Orchard, and creates appropriate tags.
## Acceptance Criteria
- [ ] `POST /api/v1/cache` endpoint
- [ ] Request body:
```json
{
"url": "https://registry.npmjs.org/lodash/-/lodash-4.17.21.tgz",
"source_type": "npm",
"package_name": "lodash",
"tag": "4.17.21",
"user_project": "my-app",
"user_package": "npm-deps",
"user_tag": "lodash-4.17.21",
"expected_hash": "sha256:abc123..."
}
```
- `url` (required): URL to fetch
- `source_type` (required): Determines system project (_npm, _pypi, etc.)
- `package_name` (optional): Package name in system project, derived from URL if not provided
- `tag` (optional): Tag name in system project, derived from URL if not provided
- `user_project`, `user_package`, `user_tag` (optional): Cross-reference in user's project
- `expected_hash` (optional): Verify downloaded content matches
- [ ] Response:
```json
{
"artifact_id": "abc123...",
"sha256": "abc123...",
"size": 12345,
"content_type": "application/gzip",
"already_cached": false,
"source_url": "https://registry.npmjs.org/lodash/-/lodash-4.17.21.tgz",
"source_name": "npm-public",
"system_project": "_npm",
"system_package": "lodash",
"system_tag": "4.17.21",
"user_reference": "my-app/npm-deps:lodash-4.17.21"
}
```
- [ ] Behavior:
- Check if URL already cached (by url_hash in cached_urls)
- If cached: return existing artifact, optionally create user tag
- If not cached: fetch via UpstreamClient, store artifact, create tags
- Create/get system project if needed (e.g., `_npm`)
- Create package in system project (e.g., `_npm/lodash`)
- Create tag in system project (e.g., `_npm/lodash:4.17.21`)
- If user reference provided, create tag in user's project
- Record in cached_urls table with provenance
- [ ] Error handling:
- 400: Invalid request (bad URL format, missing required fields)
- 403: Air-gap mode enabled and URL is from public source
- 404: Upstream returned 404
- 409: Hash mismatch (if expected_hash provided)
- 502: Upstream fetch failed (connection error, timeout)
- 503: Upstream source disabled
- [ ] Authentication required (any authenticated user can cache)
- [ ] Audit logging for cache operations
- [ ] Integration tests covering success and error cases
## Technical Notes
- URL parsing for package_name/tag derivation is format-specific:
- npm: `/{package}/-/{package}-{version}.tgz` → package=lodash, tag=4.17.21
- pypi: `/packages/.../requests-2.28.0.tar.gz` → package=requests, tag=2.28.0
- maven: `/{group}/{artifact}/{version}/{artifact}-{version}.jar`
- Deduplication: if same SHA256 already exists, just create new tag pointing to it
---
# Issue #71: System Projects (Cache Namespaces)
**Status: Pending**
## Description
Implement auto-created system projects for organizing cached artifacts by format type. These are special projects that provide a browsable namespace for all cached upstream packages.
## Acceptance Criteria
- [ ] System project names: `_npm`, `_pypi`, `_maven`, `_docker`, `_helm`, `_nuget`, `_deb`, `_rpm`, `_generic`
- [ ] Auto-creation:
- Created automatically on first cache request for that format
- Created by cache endpoint, not at startup
- Uses system user as creator (`created_by = "system"`)
- [ ] System project properties:
- `is_system = true`
- `is_public = true` (readable by all authenticated users)
- `description` = "System cache for {format} packages"
- [ ] Restrictions:
- Cannot be deleted (return 403 with message)
- Cannot be renamed
- Cannot change `is_public` to false
- Only admins can modify description
- [ ] Helper function: `get_or_create_system_project(source_type)` in routes.py or new cache.py module
- [ ] Update project deletion endpoint to check `is_system` flag
- [ ] Update project update endpoint to enforce restrictions
- [ ] Query helper: list all system projects for UI dropdown
- [ ] Unit tests for restrictions
- [ ] Integration tests for auto-creation and restrictions
## Technical Notes
- System projects are identified by `is_system=true`, not just naming convention
- The `_` prefix is a convention for display purposes
- Packages within system projects follow upstream naming (e.g., `_npm/lodash`, `_npm/@types/node`)
---
# Issue #72: Upstream Sources Admin API
**Status: Pending**
## Description
CRUD API endpoints for managing upstream sources configuration. Admin-only access.
## Acceptance Criteria
- [ ] `GET /api/v1/admin/upstream-sources` - List all upstream sources
- Returns array of sources with id, name, source_type, url, enabled, is_public, auth_type, priority, has_credentials, created_at, updated_at
- Supports `?enabled=true/false` filter
- Supports `?source_type=npm,pypi` filter
- Passwords/tokens never returned
- [ ] `POST /api/v1/admin/upstream-sources` - Create upstream source
- Request: name, source_type, url, enabled, is_public, auth_type, username, password, headers, priority
- Validates unique name
- Validates URL format
- Encrypts password/headers before storage
- Returns created source (without secrets)
- [ ] `GET /api/v1/admin/upstream-sources/{id}` - Get source details
- Returns source with `has_credentials` boolean, not actual credentials
- [ ] `PUT /api/v1/admin/upstream-sources/{id}` - Update source
- Partial update supported
- If password provided, re-encrypt; if omitted, keep existing
- Special value `password: null` clears credentials
- [ ] `DELETE /api/v1/admin/upstream-sources/{id}` - Delete source
- Returns 400 if source has cached_urls referencing it (optional: cascade or reassign)
- [ ] `POST /api/v1/admin/upstream-sources/{id}/test` - Test connectivity
- Attempts HEAD request to source URL
- Returns success/failure with status code and timing
- Does not cache anything
- [ ] All endpoints require admin role
- [ ] Audit logging for all mutations
- [ ] Pydantic schemas: UpstreamSourceCreate, UpstreamSourceUpdate, UpstreamSourceResponse
- [ ] Integration tests for all endpoints
## Technical Notes
- Test endpoint should respect auth configuration to verify credentials work
- Consider adding `last_used_at` and `last_error` fields for observability (future enhancement)
---
# Issue #73: Global Cache Settings API
**Status: Pending**
## Description
API endpoints for managing global cache settings including air-gap mode.
## Acceptance Criteria
- [ ] `GET /api/v1/admin/cache-settings` - Get current settings
- Returns: allow_public_internet, auto_create_system_projects, created_at, updated_at
- [ ] `PUT /api/v1/admin/cache-settings` - Update settings
- Partial update supported
- Returns updated settings
- [ ] Settings fields:
- `allow_public_internet` (boolean): When false, blocks all requests to sources marked `is_public=true`
- `auto_create_system_projects` (boolean): When false, system projects must be created manually
- [ ] Admin-only access
- [ ] Audit logging for changes (especially air-gap mode changes)
- [ ] Pydantic schemas: CacheSettingsResponse, CacheSettingsUpdate
- [ ] Initialize singleton row on first access if not exists
- [ ] Integration tests
## Technical Notes
- Air-gap mode change should be logged prominently (security-relevant)
- Consider requiring confirmation header for disabling air-gap mode (similar to factory reset)
---
# Issue #74: Environment Variable Overrides
**Status: Pending**
## Description
Allow cache and upstream configuration via environment variables for containerized deployments. Environment variables override database settings following 12-factor app principles.
## Acceptance Criteria
- [ ] Global settings overrides:
- `ORCHARD_CACHE_ALLOW_PUBLIC_INTERNET=true/false`
- `ORCHARD_CACHE_AUTO_CREATE_SYSTEM_PROJECTS=true/false`
- `ORCHARD_CACHE_ENCRYPTION_KEY` (Fernet key for credential encryption)
- [ ] Upstream source definition via env vars:
- `ORCHARD_UPSTREAM__{NAME}__URL` (double underscore as separator)
- `ORCHARD_UPSTREAM__{NAME}__TYPE` (npm, pypi, maven, etc.)
- `ORCHARD_UPSTREAM__{NAME}__ENABLED` (true/false)
- `ORCHARD_UPSTREAM__{NAME}__IS_PUBLIC` (true/false)
- `ORCHARD_UPSTREAM__{NAME}__AUTH_TYPE` (none, basic, bearer, api_key)
- `ORCHARD_UPSTREAM__{NAME}__USERNAME`
- `ORCHARD_UPSTREAM__{NAME}__PASSWORD`
- `ORCHARD_UPSTREAM__{NAME}__PRIORITY`
- Example: `ORCHARD_UPSTREAM__NPM_PRIVATE__URL=https://npm.corp.com`
- [ ] Env var sources:
- Loaded at startup
- Merged with database sources
- Env var sources have `source = "env"` marker
- Cannot be modified via API (return 400)
- Cannot be deleted via API (return 400)
- [ ] Update Settings class in config.py
- [ ] Update get/list endpoints to include env-defined sources
- [ ] Document all env vars in CLAUDE.md
- [ ] Unit tests for env var parsing
- [ ] Integration tests with env vars set
## Technical Notes
- Double underscore (`__`) separator allows source names with single underscores
- Env-defined sources should appear in API responses but marked as read-only
- Consider startup validation that warns about invalid env var combinations
---
# Issue #75: Frontend - Upstream Sources Management
**Status: Pending**
## Description
Admin UI for managing upstream sources and cache settings.
## Acceptance Criteria
- [ ] New admin page: `/admin/cache` or `/admin/upstream-sources`
- [ ] Upstream sources section:
- Table listing all sources with: name, type, URL, enabled toggle, public badge, priority, actions
- Visual distinction for env-defined sources (locked icon, no edit/delete)
- Create button opens modal/form
- Edit button for DB-defined sources
- Delete with confirmation modal
- Test connection button with status indicator
- [ ] Create/edit form fields:
- Name (text, required)
- Source type (dropdown)
- URL (text, required)
- Priority (number)
- Is public (checkbox)
- Enabled (checkbox)
- Auth type (dropdown: none, basic, bearer, api_key)
- Conditional auth fields based on type:
- Basic: username, password
- Bearer: token
- API key: header name, header value
- Password fields masked, "unchanged" placeholder on edit
- [ ] Cache settings section:
- Air-gap mode toggle with warning
- Auto-create system projects toggle
- "Air-gap mode" shows prominent warning banner when enabled
- [ ] Link from main admin navigation
- [ ] Loading and error states
- [ ] Success/error toast notifications
## Technical Notes
- Use existing admin page patterns from user management
- Air-gap toggle should require confirmation (modal with warning text)
---
# Issue #105: Frontend - System Projects Integration
**Status: Pending**
## Description
Integrate system projects into the frontend UI with appropriate visual treatment and navigation.
## Acceptance Criteria
- [ ] Home page project dropdown:
- System projects shown in separate "Cached Packages" section
- Visual distinction (icon, different background, or badge)
- Format icon for each type (npm, pypi, maven, etc.)
- [ ] Project list/grid:
- System projects can be filtered: "Show system projects" toggle
- Or separate tab: "Projects" | "Package Cache"
- [ ] System project page:
- "System Cache" badge in header
- Description explains this is auto-managed cache
- Settings/delete buttons hidden or disabled
- Shows format type prominently
- [ ] Package page within system project:
- Shows "Cached from" with source URL (linked)
- Shows "First cached" timestamp
- Shows which upstream source provided it
- [ ] Artifact page:
- If artifact came from cache, show provenance:
- Original URL
- Upstream source name
- Fetch timestamp
- [ ] Search includes system projects (with filter option)
## Technical Notes
- Use React context or query params for system project filtering
- Consider dedicated route: `/cache/npm/lodash` as alias for `/_npm/lodash`
---
# Issue #77: CLI - Cache Command
**Status: Pending**
## Description
Add a new `orchard cache` command to the existing CLI for caching artifacts from upstream URLs. This integrates with the new cache API endpoint and can optionally update `orchard.ensure` with cached artifacts.
## Acceptance Criteria
- [ ] New command: `orchard cache <url>` in `orchard/commands/cache.py`
- [ ] Basic usage:
```bash
# Cache a URL, print artifact info
orchard cache https://registry.npmjs.org/lodash/-/lodash-4.17.21.tgz
# Output:
# Caching https://registry.npmjs.org/lodash/-/lodash-4.17.21.tgz...
# Source type: npm
# Package: lodash
# Version: 4.17.21
#
# Successfully cached artifact
# Artifact ID: abc123...
# Size: 1.2 MB
# System project: _npm
# System package: lodash
# System tag: 4.17.21
```
- [ ] Options:
| Option | Description |
|--------|-------------|
| `--type, -t TYPE` | Source type: npm, pypi, maven, docker, helm, generic (auto-detected from URL if not provided) |
| `--package, -p NAME` | Package name in system project (auto-derived from URL if not provided) |
| `--tag TAG` | Tag name in system project (auto-derived from URL if not provided) |
| `--project PROJECT` | Also create tag in this user project |
| `--user-package PKG` | Package name in user project (required if --project specified) |
| `--user-tag TAG` | Tag name in user project (default: same as system tag) |
| `--expected-hash HASH` | Verify downloaded content matches this SHA256 |
| `--add` | Add to orchard.ensure after caching |
| `--add-path PATH` | Extraction path for --add (default: `<package>/`) |
| `--file, -f FILE` | Path to orchard.ensure file |
| `--verbose, -v` | Show detailed output |
- [ ] URL type auto-detection:
- `registry.npmjs.org` → npm
- `pypi.org` or `files.pythonhosted.org` → pypi
- `repo1.maven.org` or contains `/maven2/` → maven
- `registry-1.docker.io` or `docker.io` → docker
- Otherwise → generic
- [ ] Package/version extraction from URL patterns:
- npm: `/{package}/-/{package}-{version}.tgz`
- pypi: `/packages/.../requests-{version}.tar.gz`
- maven: `/{group}/{artifact}/{version}/{artifact}-{version}.jar`
- [ ] Add `cache_artifact()` function to `orchard/api.py`
- [ ] Integration with `--add` flag:
- Parse existing orchard.ensure
- Add new dependency entry pointing to cached artifact
- Use artifact_id (SHA256) for hermetic pinning
- [ ] Batch mode: `orchard cache --file urls.txt`
- One URL per line
- Lines starting with `#` are comments
- Report success/failure for each
- [ ] Exit codes:
- 0: Success (or already cached)
- 1: Fetch failed
- 2: Hash mismatch
- 3: Air-gap mode blocked request
- [ ] Error handling consistent with existing CLI patterns
- [ ] Unit tests in `test/test_cache.py`
- [ ] Update README.md with cache command documentation
## Technical Notes
- Follow existing Click patterns from other commands
- Use `get_auth_headers()` from `orchard/auth.py`
- URL parsing can use `urllib.parse`
- Consider adding URL pattern registry for extensibility
- The `--add` flag should integrate with existing ensure file parsing in `orchard/ensure.py`
## Example Workflows
```bash
# Simple: cache a single URL
orchard cache https://registry.npmjs.org/lodash/-/lodash-4.17.21.tgz
# Cache and add to orchard.ensure for current project
orchard cache https://registry.npmjs.org/lodash/-/lodash-4.17.21.tgz \
--add --add-path libs/lodash/
# Cache with explicit metadata
orchard cache https://internal.corp/files/custom-lib.tar.gz \
--type generic \
--package custom-lib \
--tag v1.0.0
# Cache and cross-reference to user project
orchard cache https://registry.npmjs.org/lodash/-/lodash-4.17.21.tgz \
--project my-app \
--user-package npm-deps \
--user-tag lodash-4.17.21
# Batch cache from file
orchard cache --file deps-urls.txt
# Verify hash while caching
orchard cache https://example.com/file.tar.gz \
--expected-hash sha256:abc123...
```
---
## Out of Scope (Future Enhancements)
- Automatic transitive dependency resolution (client's responsibility)
- Lockfile parsing (`package-lock.json`, `requirements.txt`) - stretch goal for CLI
- Cache eviction policies (we cache forever by design)
- Mirroring/sync between Orchard instances
- Format-specific metadata extraction (npm package.json parsing, etc.)
## Success Criteria
- [ ] Can cache any URL and retrieve by SHA256 hash
- [ ] Cached artifacts persist indefinitely
- [ ] Air-gap mode blocks all public internet access
- [ ] Multiple upstream sources with different auth
- [ ] System projects organize cached packages by format
- [ ] CLI can cache URLs and update orchard.ensure
- [ ] Admin UI for upstream source management

View File

@@ -11,7 +11,6 @@ import ChangePasswordPage from './pages/ChangePasswordPage';
import APIKeysPage from './pages/APIKeysPage';
import AdminUsersPage from './pages/AdminUsersPage';
import AdminOIDCPage from './pages/AdminOIDCPage';
import AdminCachePage from './pages/AdminCachePage';
import ProjectSettingsPage from './pages/ProjectSettingsPage';
import TeamsPage from './pages/TeamsPage';
import TeamDashboardPage from './pages/TeamDashboardPage';
@@ -51,7 +50,6 @@ function AppRoutes() {
<Route path="/settings/api-keys" element={<APIKeysPage />} />
<Route path="/admin/users" element={<AdminUsersPage />} />
<Route path="/admin/oidc" element={<AdminOIDCPage />} />
<Route path="/admin/cache" element={<AdminCachePage />} />
<Route path="/teams" element={<TeamsPage />} />
<Route path="/teams/:slug" element={<TeamDashboardPage />} />
<Route path="/teams/:slug/settings" element={<TeamSettingsPage />} />

View File

@@ -42,10 +42,6 @@ import {
TeamUpdate,
TeamMemberCreate,
TeamMemberUpdate,
UpstreamSource,
UpstreamSourceCreate,
UpstreamSourceUpdate,
UpstreamSourceTestResult,
} from './types';
const API_BASE = '/api/v1';
@@ -686,63 +682,3 @@ export async function searchUsers(query: string, limit: number = 10): Promise<Us
});
return handleResponse<UserSearchResult[]>(response);
}
// Upstream Sources Admin API
export interface UpstreamSourceListParams {
enabled?: boolean;
source_type?: string;
}
export async function listUpstreamSources(params: UpstreamSourceListParams = {}): Promise<UpstreamSource[]> {
const query = buildQueryString(params as Record<string, unknown>);
const response = await fetch(`${API_BASE}/admin/upstream-sources${query}`, {
credentials: 'include',
});
return handleResponse<UpstreamSource[]>(response);
}
export async function createUpstreamSource(data: UpstreamSourceCreate): Promise<UpstreamSource> {
const response = await fetch(`${API_BASE}/admin/upstream-sources`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify(data),
credentials: 'include',
});
return handleResponse<UpstreamSource>(response);
}
export async function getUpstreamSource(id: string): Promise<UpstreamSource> {
const response = await fetch(`${API_BASE}/admin/upstream-sources/${id}`, {
credentials: 'include',
});
return handleResponse<UpstreamSource>(response);
}
export async function updateUpstreamSource(id: string, data: UpstreamSourceUpdate): Promise<UpstreamSource> {
const response = await fetch(`${API_BASE}/admin/upstream-sources/${id}`, {
method: 'PUT',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify(data),
credentials: 'include',
});
return handleResponse<UpstreamSource>(response);
}
export async function deleteUpstreamSource(id: string): Promise<void> {
const response = await fetch(`${API_BASE}/admin/upstream-sources/${id}`, {
method: 'DELETE',
credentials: 'include',
});
if (!response.ok) {
const error = await response.json().catch(() => ({ detail: 'Unknown error' }));
throw new ApiError(error.detail || `HTTP ${response.status}`, response.status);
}
}
export async function testUpstreamSource(id: string): Promise<UpstreamSourceTestResult> {
const response = await fetch(`${API_BASE}/admin/upstream-sources/${id}/test`, {
method: 'POST',
credentials: 'include',
});
return handleResponse<UpstreamSourceTestResult>(response);
}

View File

@@ -272,7 +272,7 @@
.footer {
background: var(--bg-secondary);
border-top: 1px solid var(--border-primary);
padding: 12px 0;
padding: 24px 0;
}
.footer-content {

View File

@@ -183,18 +183,6 @@ function Layout({ children }: LayoutProps) {
</svg>
SSO Configuration
</NavLink>
<NavLink
to="/admin/cache"
className="user-menu-item"
onClick={() => setShowUserMenu(false)}
>
<svg width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2">
<path d="M21 16V8a2 2 0 0 0-1-1.73l-7-4a2 2 0 0 0-2 0l-7 4A2 2 0 0 0 3 8v8a2 2 0 0 0 1 1.73l7 4a2 2 0 0 0 2 0l7-4A2 2 0 0 0 21 16z"/>
<polyline points="3.27 6.96 12 12.01 20.73 6.96"/>
<line x1="12" y1="22.08" x2="12" y2="12"/>
</svg>
Cache Management
</NavLink>
</>
)}
<div className="user-menu-divider"></div>
@@ -241,7 +229,7 @@ function Layout({ children }: LayoutProps) {
</svg>
<span className="footer-logo">Orchard</span>
<span className="footer-separator">·</span>
<span className="footer-tagline">The cache that never forgets</span>
<span className="footer-tagline">Content-Addressable Storage</span>
</div>
<div className="footer-links">
<a href="/docs">Documentation</a>

View File

@@ -1,377 +0,0 @@
.admin-cache-page {
padding: 2rem;
max-width: 1400px;
margin: 0 auto;
}
.admin-cache-page h1 {
margin-bottom: 2rem;
color: var(--text-primary);
}
.admin-cache-page h2 {
margin-bottom: 1rem;
color: var(--text-primary);
font-size: 1.25rem;
}
/* Success/Error Messages */
.success-message {
padding: 0.75rem 1rem;
background-color: #d4edda;
border: 1px solid #c3e6cb;
border-radius: 4px;
color: #155724;
margin-bottom: 1rem;
}
.error-message {
padding: 0.75rem 1rem;
background-color: #f8d7da;
border: 1px solid #f5c6cb;
border-radius: 4px;
color: #721c24;
margin-bottom: 1rem;
}
/* Sources Section */
.sources-section {
background: var(--bg-secondary);
border: 1px solid var(--border-color);
border-radius: 8px;
padding: 1.5rem;
}
.section-header {
display: flex;
justify-content: space-between;
align-items: center;
margin-bottom: 1rem;
}
.section-header h2 {
margin: 0;
}
/* Sources Table */
.sources-table {
width: 100%;
border-collapse: collapse;
background: var(--bg-primary);
border-radius: 4px;
overflow: hidden;
}
.sources-table th,
.sources-table td {
padding: 0.75rem 1rem;
text-align: center;
border-bottom: 1px solid var(--border-color);
}
.sources-table th {
background: var(--bg-tertiary);
font-weight: 600;
color: var(--text-secondary);
font-size: 0.85rem;
text-transform: uppercase;
}
.sources-table tr:last-child td {
border-bottom: none;
}
.sources-table tr.disabled-row {
opacity: 0.6;
}
.source-name {
font-weight: 500;
color: var(--text-primary);
white-space: nowrap;
}
/* Name column should be left-aligned */
.sources-table td:first-child {
text-align: left;
}
.url-cell {
font-family: monospace;
font-size: 0.9rem;
max-width: 300px;
overflow: hidden;
text-overflow: ellipsis;
white-space: nowrap;
text-align: left;
}
/* Badges */
.env-badge,
.status-badge {
display: inline-block;
padding: 0.2rem 0.5rem;
border-radius: 4px;
font-size: 0.75rem;
font-weight: 500;
margin-left: 0.5rem;
}
.env-badge {
background-color: #fff3e0;
color: #e65100;
}
.status-badge.enabled {
background-color: #e8f5e9;
color: #2e7d32;
}
.status-badge.disabled {
background-color: #ffebee;
color: #c62828;
}
.coming-soon-badge {
color: #9e9e9e;
font-style: italic;
font-size: 0.85em;
}
/* Actions */
.actions-cell {
white-space: nowrap;
}
.actions-cell .btn {
margin-right: 0.5rem;
}
.actions-cell .btn:last-child {
margin-right: 0;
}
.test-cell {
text-align: center;
width: 2rem;
}
.test-dot {
font-size: 1rem;
cursor: default;
}
.test-dot.success {
color: #2e7d32;
}
.test-dot.failure {
color: #c62828;
cursor: pointer;
}
.test-dot.failure:hover {
color: #b71c1c;
}
.test-dot.testing {
color: #1976d2;
animation: pulse 1s infinite;
}
@keyframes pulse {
0%, 100% { opacity: 1; }
50% { opacity: 0.4; }
}
/* Error Modal */
.error-modal-content {
background: var(--bg-primary);
border-radius: 8px;
padding: 2rem;
width: 100%;
max-width: 500px;
}
.error-modal-content h3 {
margin-top: 0;
color: #c62828;
}
.error-modal-content .error-details {
background: var(--bg-tertiary);
padding: 1rem;
border-radius: 4px;
font-family: monospace;
font-size: 0.9rem;
word-break: break-word;
white-space: pre-wrap;
}
.error-modal-content .modal-actions {
display: flex;
justify-content: flex-end;
margin-top: 1.5rem;
}
/* Buttons */
.btn {
padding: 0.5rem 1rem;
border: 1px solid var(--border-color);
border-radius: 4px;
background: var(--bg-primary);
color: var(--text-primary);
cursor: pointer;
font-size: 0.875rem;
}
.btn:hover {
background: var(--bg-tertiary);
}
.btn:disabled {
opacity: 0.6;
cursor: not-allowed;
}
.btn-primary {
background-color: var(--color-primary);
border-color: var(--color-primary);
color: white;
}
.btn-primary:hover {
background-color: var(--color-primary-hover);
}
.btn-danger {
background-color: #dc3545;
border-color: #dc3545;
color: white;
}
.btn-danger:hover {
background-color: #c82333;
}
.btn-sm {
padding: 0.25rem 0.75rem;
font-size: 0.8rem;
}
.btn-secondary {
background-color: var(--bg-tertiary);
border-color: var(--border-color);
color: var(--text-primary);
font-weight: 500;
}
.btn-secondary:hover {
background-color: var(--bg-secondary);
border-color: var(--text-secondary);
}
.empty-message {
color: var(--text-secondary);
font-style: italic;
padding: 2rem;
text-align: center;
}
/* Modal */
.modal-overlay {
position: fixed;
top: 0;
left: 0;
right: 0;
bottom: 0;
background: rgba(0, 0, 0, 0.5);
display: flex;
align-items: center;
justify-content: center;
z-index: 1000;
}
.modal-content {
background: var(--bg-primary);
border-radius: 8px;
padding: 2rem;
width: 100%;
max-width: 600px;
max-height: 90vh;
overflow-y: auto;
}
.modal-content h2 {
margin-top: 0;
}
/* Form */
.form-group {
margin-bottom: 1rem;
}
.form-group label {
display: block;
margin-bottom: 0.5rem;
font-weight: 500;
color: var(--text-primary);
}
.form-group input,
.form-group select {
width: 100%;
padding: 0.5rem;
border: 1px solid var(--border-color);
border-radius: 4px;
background: var(--bg-primary);
color: var(--text-primary);
font-size: 1rem;
}
.form-group input:focus,
.form-group select:focus {
outline: none;
border-color: var(--color-primary);
}
.form-row {
display: flex;
gap: 1rem;
}
.form-row .form-group {
flex: 1;
}
.checkbox-group label {
display: flex;
align-items: center;
gap: 0.5rem;
cursor: pointer;
}
.checkbox-group input[type="checkbox"] {
width: auto;
}
.help-text {
display: block;
font-size: 0.8rem;
color: var(--text-secondary);
margin-top: 0.25rem;
}
.form-actions {
display: flex;
justify-content: space-between;
align-items: center;
margin-top: 1.5rem;
padding-top: 1rem;
border-top: 1px solid var(--border-color);
}
.form-actions-right {
display: flex;
gap: 0.5rem;
}

View File

@@ -1,509 +0,0 @@
import { useState, useEffect } from 'react';
import { useNavigate } from 'react-router-dom';
import { useAuth } from '../contexts/AuthContext';
import {
listUpstreamSources,
createUpstreamSource,
updateUpstreamSource,
deleteUpstreamSource,
testUpstreamSource,
} from '../api';
import { UpstreamSource, SourceType, AuthType } from '../types';
import './AdminCachePage.css';
const SOURCE_TYPES: SourceType[] = ['npm', 'pypi', 'maven', 'docker', 'helm', 'nuget', 'deb', 'rpm', 'generic'];
const SUPPORTED_SOURCE_TYPES: Set<SourceType> = new Set(['pypi', 'generic']);
const AUTH_TYPES: AuthType[] = ['none', 'basic', 'bearer', 'api_key'];
function AdminCachePage() {
const { user, loading: authLoading } = useAuth();
const navigate = useNavigate();
// Upstream sources state
const [sources, setSources] = useState<UpstreamSource[]>([]);
const [loadingSources, setLoadingSources] = useState(true);
const [sourcesError, setSourcesError] = useState<string | null>(null);
// Create/Edit form state
const [showForm, setShowForm] = useState(false);
const [editingSource, setEditingSource] = useState<UpstreamSource | null>(null);
const [formData, setFormData] = useState({
name: '',
source_type: 'generic' as SourceType,
url: '',
enabled: true,
auth_type: 'none' as AuthType,
username: '',
password: '',
priority: 100,
});
const [formError, setFormError] = useState<string | null>(null);
const [isSaving, setIsSaving] = useState(false);
// Test result state
const [testingId, setTestingId] = useState<string | null>(null);
const [testResults, setTestResults] = useState<Record<string, { success: boolean; message: string }>>({});
// Delete confirmation state
const [deletingId, setDeletingId] = useState<string | null>(null);
// Success message
const [successMessage, setSuccessMessage] = useState<string | null>(null);
// Error modal state
const [showErrorModal, setShowErrorModal] = useState(false);
const [selectedError, setSelectedError] = useState<{ sourceName: string; error: string } | null>(null);
useEffect(() => {
if (!authLoading && !user) {
navigate('/login', { state: { from: '/admin/cache' } });
}
}, [user, authLoading, navigate]);
useEffect(() => {
if (user && user.is_admin) {
loadSources();
}
}, [user]);
useEffect(() => {
if (successMessage) {
const timer = setTimeout(() => setSuccessMessage(null), 3000);
return () => clearTimeout(timer);
}
}, [successMessage]);
async function loadSources() {
setLoadingSources(true);
setSourcesError(null);
try {
const data = await listUpstreamSources();
setSources(data);
} catch (err) {
setSourcesError(err instanceof Error ? err.message : 'Failed to load sources');
} finally {
setLoadingSources(false);
}
}
function openCreateForm() {
setEditingSource(null);
setFormData({
name: '',
source_type: 'generic',
url: '',
enabled: true,
auth_type: 'none',
username: '',
password: '',
priority: 100,
});
setFormError(null);
setShowForm(true);
}
function openEditForm(source: UpstreamSource) {
setEditingSource(source);
setFormData({
name: source.name,
source_type: source.source_type,
url: source.url,
enabled: source.enabled,
auth_type: source.auth_type,
username: source.username || '',
password: '',
priority: source.priority,
});
setFormError(null);
setShowForm(true);
}
async function handleFormSubmit(e: React.FormEvent) {
e.preventDefault();
if (!formData.name.trim()) {
setFormError('Name is required');
return;
}
if (!formData.url.trim()) {
setFormError('URL is required');
return;
}
setIsSaving(true);
setFormError(null);
try {
let savedSourceId: string | null = null;
if (editingSource) {
// Update existing source
await updateUpstreamSource(editingSource.id, {
name: formData.name.trim(),
source_type: formData.source_type,
url: formData.url.trim(),
enabled: formData.enabled,
auth_type: formData.auth_type,
username: formData.username.trim() || undefined,
password: formData.password || undefined,
priority: formData.priority,
});
savedSourceId = editingSource.id;
setSuccessMessage('Source updated successfully');
} else {
// Create new source
const newSource = await createUpstreamSource({
name: formData.name.trim(),
source_type: formData.source_type,
url: formData.url.trim(),
enabled: formData.enabled,
auth_type: formData.auth_type,
username: formData.username.trim() || undefined,
password: formData.password || undefined,
priority: formData.priority,
});
savedSourceId = newSource.id;
setSuccessMessage('Source created successfully');
}
setShowForm(false);
await loadSources();
// Auto-test the source after save
if (savedSourceId) {
testSourceById(savedSourceId);
}
} catch (err) {
setFormError(err instanceof Error ? err.message : 'Failed to save source');
} finally {
setIsSaving(false);
}
}
async function handleDelete(source: UpstreamSource) {
if (!window.confirm(`Delete upstream source "${source.name}"? This cannot be undone.`)) {
return;
}
setDeletingId(source.id);
try {
await deleteUpstreamSource(source.id);
setSuccessMessage(`Source "${source.name}" deleted`);
await loadSources();
} catch (err) {
setSourcesError(err instanceof Error ? err.message : 'Failed to delete source');
} finally {
setDeletingId(null);
}
}
async function handleTest(source: UpstreamSource) {
testSourceById(source.id);
}
async function testSourceById(sourceId: string) {
setTestingId(sourceId);
setTestResults((prev) => ({ ...prev, [sourceId]: { success: true, message: 'Testing...' } }));
try {
const result = await testUpstreamSource(sourceId);
setTestResults((prev) => ({
...prev,
[sourceId]: {
success: result.success,
message: result.success
? `OK (${result.elapsed_ms}ms)`
: result.error || `HTTP ${result.status_code}`,
},
}));
} catch (err) {
setTestResults((prev) => ({
...prev,
[sourceId]: {
success: false,
message: err instanceof Error ? err.message : 'Test failed',
},
}));
} finally {
setTestingId(null);
}
}
function showError(sourceName: string, error: string) {
setSelectedError({ sourceName, error });
setShowErrorModal(true);
}
if (authLoading) {
return <div className="admin-cache-page">Loading...</div>;
}
if (!user?.is_admin) {
return (
<div className="admin-cache-page">
<div className="error-message">Access denied. Admin privileges required.</div>
</div>
);
}
return (
<div className="admin-cache-page">
<h1>Upstream Sources</h1>
{successMessage && <div className="success-message">{successMessage}</div>}
{/* Upstream Sources Section */}
<section className="sources-section">
<div className="section-header">
<button className="btn btn-primary" onClick={openCreateForm}>
Add Source
</button>
</div>
{loadingSources ? (
<p>Loading sources...</p>
) : sourcesError ? (
<div className="error-message">{sourcesError}</div>
) : sources.length === 0 ? (
<p className="empty-message">No upstream sources configured.</p>
) : (
<table className="sources-table">
<thead>
<tr>
<th>Name</th>
<th>Type</th>
<th>URL</th>
<th>Priority</th>
<th>Status</th>
<th>Test</th>
<th>Actions</th>
</tr>
</thead>
<tbody>
{sources.map((source) => (
<tr key={source.id} className={source.enabled ? '' : 'disabled-row'}>
<td>
<span className="source-name">{source.name}</span>
{source.source === 'env' && (
<span className="env-badge" title="Defined via environment variable">ENV</span>
)}
</td>
<td>
{source.source_type}
{!SUPPORTED_SOURCE_TYPES.has(source.source_type) && (
<span className="coming-soon-badge"> (coming soon)</span>
)}
</td>
<td className="url-cell" title={source.url}>{source.url}</td>
<td>{source.priority}</td>
<td>
<span className={`status-badge ${source.enabled ? 'enabled' : 'disabled'}`}>
{source.enabled ? 'Enabled' : 'Disabled'}
</span>
</td>
<td className="test-cell">
{testingId === source.id ? (
<span className="test-dot testing" title="Testing..."></span>
) : testResults[source.id] ? (
testResults[source.id].success ? (
<span className="test-dot success" title={testResults[source.id].message}></span>
) : (
<span
className="test-dot failure"
title="Click to see error"
onClick={() => showError(source.name, testResults[source.id].message)}
></span>
)
) : null}
</td>
<td className="actions-cell">
<button
className="btn btn-sm btn-secondary"
onClick={() => handleTest(source)}
disabled={testingId === source.id}
>
Test
</button>
{source.source !== 'env' && (
<button className="btn btn-sm btn-secondary" onClick={() => openEditForm(source)}>
Edit
</button>
)}
</td>
</tr>
))}
</tbody>
</table>
)}
</section>
{/* Create/Edit Modal */}
{showForm && (
<div className="modal-overlay" onClick={() => setShowForm(false)}>
<div className="modal-content" onClick={(e) => e.stopPropagation()}>
<h2>{editingSource ? 'Edit Upstream Source' : 'Add Upstream Source'}</h2>
<form onSubmit={handleFormSubmit}>
{formError && <div className="error-message">{formError}</div>}
<div className="form-group">
<label htmlFor="name">Name</label>
<input
type="text"
id="name"
value={formData.name}
onChange={(e) => setFormData({ ...formData, name: e.target.value })}
placeholder="e.g., npm-private"
required
/>
</div>
<div className="form-row">
<div className="form-group">
<label htmlFor="source_type">Type</label>
<select
id="source_type"
value={formData.source_type}
onChange={(e) => setFormData({ ...formData, source_type: e.target.value as SourceType })}
>
{SOURCE_TYPES.map((type) => (
<option key={type} value={type}>
{type}{!SUPPORTED_SOURCE_TYPES.has(type) ? ' (coming soon)' : ''}
</option>
))}
</select>
</div>
<div className="form-group">
<label htmlFor="priority">Priority</label>
<input
type="number"
id="priority"
value={formData.priority}
onChange={(e) => setFormData({ ...formData, priority: parseInt(e.target.value) || 100 })}
min="1"
/>
<span className="help-text">Lower = higher priority</span>
</div>
</div>
<div className="form-group">
<label htmlFor="url">URL</label>
<input
type="url"
id="url"
value={formData.url}
onChange={(e) => setFormData({ ...formData, url: e.target.value })}
placeholder="https://registry.example.com"
required
/>
</div>
<div className="form-row">
<div className="form-group checkbox-group">
<label>
<input
type="checkbox"
checked={formData.enabled}
onChange={(e) => setFormData({ ...formData, enabled: e.target.checked })}
/>
Enabled
</label>
</div>
</div>
<div className="form-group">
<label htmlFor="auth_type">Authentication</label>
<select
id="auth_type"
value={formData.auth_type}
onChange={(e) => setFormData({ ...formData, auth_type: e.target.value as AuthType })}
>
{AUTH_TYPES.map((type) => (
<option key={type} value={type}>
{type === 'none' ? 'None' : type === 'api_key' ? 'API Key' : type.charAt(0).toUpperCase() + type.slice(1)}
</option>
))}
</select>
</div>
{formData.auth_type !== 'none' && (
<div className="form-row">
{(formData.auth_type === 'basic' || formData.auth_type === 'api_key') && (
<div className="form-group">
<label htmlFor="username">{formData.auth_type === 'api_key' ? 'Header Name' : 'Username'}</label>
<input
type="text"
id="username"
value={formData.username}
onChange={(e) => setFormData({ ...formData, username: e.target.value })}
placeholder={formData.auth_type === 'api_key' ? 'X-API-Key' : 'username'}
/>
</div>
)}
<div className="form-group">
<label htmlFor="password">
{formData.auth_type === 'bearer'
? 'Token'
: formData.auth_type === 'api_key'
? 'API Key Value'
: 'Password'}
</label>
<input
type="password"
id="password"
value={formData.password}
onChange={(e) => setFormData({ ...formData, password: e.target.value })}
placeholder={editingSource ? '(unchanged)' : ''}
/>
{editingSource && (
<span className="help-text">Leave empty to keep existing {formData.auth_type === 'bearer' ? 'token' : 'credentials'}</span>
)}
</div>
</div>
)}
<div className="form-actions">
{editingSource && (
<button
type="button"
className="btn btn-danger"
onClick={() => {
handleDelete(editingSource);
setShowForm(false);
}}
disabled={deletingId === editingSource.id}
>
{deletingId === editingSource.id ? 'Deleting...' : 'Delete'}
</button>
)}
<div className="form-actions-right">
<button type="button" className="btn" onClick={() => setShowForm(false)}>
Cancel
</button>
<button type="submit" className="btn btn-primary" disabled={isSaving}>
{isSaving ? 'Saving...' : editingSource ? 'Update' : 'Create'}
</button>
</div>
</div>
</form>
</div>
</div>
)}
{/* Error Details Modal */}
{showErrorModal && selectedError && (
<div className="modal-overlay" onClick={() => setShowErrorModal(false)}>
<div className="error-modal-content" onClick={(e) => e.stopPropagation()}>
<h3>Connection Error: {selectedError.sourceName}</h3>
<div className="error-details">{selectedError.error}</div>
<div className="modal-actions">
<button className="btn" onClick={() => setShowErrorModal(false)}>
Close
</button>
</div>
</div>
</div>
)}
</div>
);
}
export default AdminCachePage;

View File

@@ -493,16 +493,3 @@
gap: 6px;
flex-wrap: wrap;
}
/* Cell name styles */
.cell-name {
display: flex;
align-items: center;
gap: 8px;
}
/* System project badge */
.system-badge {
font-size: 0.7rem;
padding: 2px 6px;
}

View File

@@ -224,9 +224,6 @@ function Home() {
<span className="cell-name">
{!project.is_public && <LockIcon />}
{project.name}
{project.is_system && (
<Badge variant="warning" className="system-badge">Cache</Badge>
)}
</span>
),
},
@@ -249,7 +246,7 @@ function Home() {
key: 'created_by',
header: 'Owner',
className: 'cell-owner',
render: (project) => project.team_name || project.created_by,
render: (project) => project.created_by,
},
...(user
? [

View File

@@ -793,194 +793,4 @@ tr:hover .copy-btn {
.ensure-file-modal {
max-height: 90vh;
}
.action-menu-dropdown {
right: 0;
left: auto;
}
}
/* Header upload button */
.header-upload-btn {
margin-left: auto;
}
/* Tag/Version cell */
.tag-version-cell {
display: flex;
flex-direction: column;
gap: 4px;
}
.tag-version-cell .version-badge {
font-size: 0.75rem;
color: var(--text-muted);
}
/* Icon buttons */
.btn-icon {
display: flex;
align-items: center;
justify-content: center;
width: 32px;
height: 32px;
padding: 0;
background: transparent;
border: 1px solid transparent;
border-radius: var(--radius-sm);
color: var(--text-secondary);
cursor: pointer;
transition: all var(--transition-fast);
}
.btn-icon:hover {
background: var(--bg-hover);
color: var(--text-primary);
}
/* Action menu */
.action-buttons {
display: flex;
align-items: center;
gap: 4px;
}
.action-menu {
position: relative;
}
/* Action menu backdrop for click-outside */
.action-menu-backdrop {
position: fixed;
top: 0;
left: 0;
right: 0;
bottom: 0;
z-index: 999;
}
.action-menu-dropdown {
position: fixed;
z-index: 1000;
min-width: 180px;
padding: 4px 0;
background: var(--bg-secondary);
border: 1px solid var(--border-primary);
border-radius: var(--radius-md);
box-shadow: 0 4px 12px rgba(0, 0, 0, 0.15);
}
.action-menu-dropdown button {
display: block;
width: 100%;
padding: 8px 12px;
background: none;
border: none;
text-align: left;
font-size: 0.875rem;
color: var(--text-primary);
cursor: pointer;
transition: background var(--transition-fast);
}
.action-menu-dropdown button:hover {
background: var(--bg-hover);
}
/* Upload Modal */
.upload-modal,
.create-tag-modal {
background: var(--bg-secondary);
border-radius: var(--radius-lg);
width: 90%;
max-width: 500px;
max-height: 90vh;
overflow: hidden;
}
.modal-header {
display: flex;
align-items: center;
justify-content: space-between;
padding: 16px 20px;
border-bottom: 1px solid var(--border-primary);
}
.modal-header h3 {
margin: 0;
font-size: 1.125rem;
font-weight: 600;
}
.modal-body {
padding: 20px;
}
.modal-description {
margin-bottom: 16px;
color: var(--text-secondary);
font-size: 0.875rem;
}
.modal-actions {
display: flex;
justify-content: flex-end;
gap: 12px;
margin-top: 20px;
padding-top: 16px;
border-top: 1px solid var(--border-primary);
}
/* Dependencies Modal */
.deps-modal {
background: var(--bg-secondary);
border-radius: var(--radius-lg);
width: 90%;
max-width: 600px;
max-height: 80vh;
overflow: hidden;
display: flex;
flex-direction: column;
}
.deps-modal .modal-body {
overflow-y: auto;
flex: 1;
}
.deps-modal-controls {
display: flex;
gap: 8px;
margin-bottom: 16px;
}
/* Artifact ID Modal */
.artifact-id-modal {
background: var(--bg-secondary);
border-radius: var(--radius-lg);
width: 90%;
max-width: 500px;
}
.artifact-id-display {
display: flex;
align-items: center;
gap: 12px;
padding: 16px;
background: var(--bg-tertiary);
border-radius: var(--radius-md);
border: 1px solid var(--border-primary);
}
.artifact-id-display code {
font-family: 'JetBrains Mono', 'Fira Code', 'Consolas', monospace;
font-size: 0.8125rem;
color: var(--text-primary);
word-break: break-all;
flex: 1;
}
.artifact-id-display .copy-btn {
opacity: 1;
flex-shrink: 0;
}

View File

@@ -63,17 +63,12 @@ function PackagePage() {
const [accessDenied, setAccessDenied] = useState(false);
const [uploadTag, setUploadTag] = useState('');
const [uploadSuccess, setUploadSuccess] = useState<string | null>(null);
const [artifactIdInput, setArtifactIdInput] = useState('');
const [accessLevel, setAccessLevel] = useState<AccessLevel | null>(null);
const [createTagName, setCreateTagName] = useState('');
const [createTagArtifactId, setCreateTagArtifactId] = useState('');
const [createTagLoading, setCreateTagLoading] = useState(false);
// UI state
const [showUploadModal, setShowUploadModal] = useState(false);
const [showCreateTagModal, setShowCreateTagModal] = useState(false);
const [openMenuId, setOpenMenuId] = useState<string | null>(null);
const [menuPosition, setMenuPosition] = useState<{ top: number; left: number } | null>(null);
// Dependencies state
const [selectedTag, setSelectedTag] = useState<TagDetail | null>(null);
const [dependencies, setDependencies] = useState<Dependency[]>([]);
@@ -91,13 +86,6 @@ function PackagePage() {
// Dependency graph modal state
const [showGraph, setShowGraph] = useState(false);
// Dependencies modal state
const [showDepsModal, setShowDepsModal] = useState(false);
// Artifact ID modal state
const [showArtifactIdModal, setShowArtifactIdModal] = useState(false);
const [viewArtifactId, setViewArtifactId] = useState<string | null>(null);
// Ensure file modal state
const [showEnsureFile, setShowEnsureFile] = useState(false);
const [ensureFileContent, setEnsureFileContent] = useState<string | null>(null);
@@ -108,9 +96,6 @@ function PackagePage() {
// Derived permissions
const canWrite = accessLevel === 'write' || accessLevel === 'admin';
// Detect system projects (convention: name starts with "_")
const isSystemProject = projectName?.startsWith('_') ?? false;
// Get params from URL
const page = parseInt(searchParams.get('page') || '1', 10);
const search = searchParams.get('search') || '';
@@ -338,91 +323,7 @@ function PackagePage() {
setSelectedTag(tag);
};
const handleMenuOpen = (e: React.MouseEvent, tagId: string) => {
e.stopPropagation();
if (openMenuId === tagId) {
setOpenMenuId(null);
setMenuPosition(null);
} else {
const rect = e.currentTarget.getBoundingClientRect();
setMenuPosition({ top: rect.bottom + 4, left: rect.right - 180 });
setOpenMenuId(tagId);
}
};
// System projects show Version first, regular projects show Tag first
const columns = isSystemProject
? [
// System project columns: Version first, then Filename
{
key: 'version',
header: 'Version',
sortable: true,
render: (t: TagDetail) => (
<strong
className={`tag-name-link ${selectedTag?.id === t.id ? 'selected' : ''}`}
onClick={() => handleTagSelect(t)}
style={{ cursor: 'pointer' }}
>
<span className="version-badge">{t.version || t.name}</span>
</strong>
),
},
{
key: 'artifact_original_name',
header: 'Filename',
className: 'cell-truncate',
render: (t: TagDetail) => (
<span title={t.artifact_original_name || t.name}>{t.artifact_original_name || t.name}</span>
),
},
{
key: 'artifact_size',
header: 'Size',
render: (t: TagDetail) => <span>{formatBytes(t.artifact_size)}</span>,
},
{
key: 'created_at',
header: 'Cached',
sortable: true,
render: (t: TagDetail) => (
<span>{new Date(t.created_at).toLocaleDateString()}</span>
),
},
{
key: 'actions',
header: '',
render: (t: TagDetail) => (
<div className="action-buttons">
<a
href={getDownloadUrl(projectName!, packageName!, t.name)}
className="btn btn-icon"
download
title="Download"
>
<svg width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2">
<path d="M21 15v4a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2v-4" />
<polyline points="7 10 12 15 17 10" />
<line x1="12" y1="15" x2="12" y2="3" />
</svg>
</a>
<button
className="btn btn-icon"
onClick={(e) => handleMenuOpen(e, t.id)}
title="More actions"
>
<svg width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2">
<circle cx="12" cy="12" r="1" />
<circle cx="12" cy="5" r="1" />
<circle cx="12" cy="19" r="1" />
</svg>
</button>
</div>
),
},
]
: [
// Regular project columns: Tag, Version, Filename
const columns = [
{
key: 'name',
header: 'Tag',
@@ -441,15 +342,17 @@ function PackagePage() {
key: 'version',
header: 'Version',
render: (t: TagDetail) => (
<span className="version-badge">{t.version || ''}</span>
<span className="version-badge">{t.version || '-'}</span>
),
},
{
key: 'artifact_original_name',
header: 'Filename',
className: 'cell-truncate',
key: 'artifact_id',
header: 'Artifact ID',
render: (t: TagDetail) => (
<span title={t.artifact_original_name || undefined}>{t.artifact_original_name || '—'}</span>
<div className="artifact-id-cell">
<code className="artifact-id">{t.artifact_id.substring(0, 12)}...</code>
<CopyButton text={t.artifact_id} />
</div>
),
},
{
@@ -457,94 +360,56 @@ function PackagePage() {
header: 'Size',
render: (t: TagDetail) => <span>{formatBytes(t.artifact_size)}</span>,
},
{
key: 'artifact_content_type',
header: 'Type',
render: (t: TagDetail) => (
<span className="content-type">{t.artifact_content_type || '-'}</span>
),
},
{
key: 'artifact_original_name',
header: 'Filename',
className: 'cell-truncate',
render: (t: TagDetail) => (
<span title={t.artifact_original_name || undefined}>{t.artifact_original_name || '-'}</span>
),
},
{
key: 'created_at',
header: 'Created',
sortable: true,
render: (t: TagDetail) => (
<span title={`by ${t.created_by}`}>{new Date(t.created_at).toLocaleDateString()}</span>
<div className="created-cell">
<span>{new Date(t.created_at).toLocaleString()}</span>
<span className="created-by">by {t.created_by}</span>
</div>
),
},
{
key: 'actions',
header: '',
header: 'Actions',
render: (t: TagDetail) => (
<div className="action-buttons">
<button
className="btn btn-secondary btn-small"
onClick={() => fetchEnsureFileForTag(t.name)}
title="View orchard.ensure file"
>
Ensure
</button>
<a
href={getDownloadUrl(projectName!, packageName!, t.name)}
className="btn btn-icon"
className="btn btn-secondary btn-small"
download
title="Download"
>
<svg width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2">
<path d="M21 15v4a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2v-4" />
<polyline points="7 10 12 15 17 10" />
<line x1="12" y1="15" x2="12" y2="3" />
</svg>
Download
</a>
<button
className="btn btn-icon"
onClick={(e) => handleMenuOpen(e, t.id)}
title="More actions"
>
<svg width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2">
<circle cx="12" cy="12" r="1" />
<circle cx="12" cy="5" r="1" />
<circle cx="12" cy="19" r="1" />
</svg>
</button>
</div>
),
},
];
// Find the tag for the open menu
const openMenuTag = tags.find(t => t.id === openMenuId);
// Close menu when clicking outside
const handleClickOutside = () => {
if (openMenuId) {
setOpenMenuId(null);
setMenuPosition(null);
}
};
// Render dropdown menu as a portal-like element
const renderActionMenu = () => {
if (!openMenuId || !menuPosition || !openMenuTag) return null;
const t = openMenuTag;
return (
<div
className="action-menu-backdrop"
onClick={handleClickOutside}
>
<div
className="action-menu-dropdown"
style={{ top: menuPosition.top, left: menuPosition.left }}
onClick={(e) => e.stopPropagation()}
>
<button onClick={() => { setViewArtifactId(t.artifact_id); setShowArtifactIdModal(true); setOpenMenuId(null); setMenuPosition(null); }}>
View Artifact ID
</button>
<button onClick={() => { navigator.clipboard.writeText(t.artifact_id); setOpenMenuId(null); setMenuPosition(null); }}>
Copy Artifact ID
</button>
<button onClick={() => { fetchEnsureFileForTag(t.name); setOpenMenuId(null); setMenuPosition(null); }}>
View Ensure File
</button>
{canWrite && !isSystemProject && (
<button onClick={() => { setCreateTagArtifactId(t.artifact_id); setShowCreateTagModal(true); setOpenMenuId(null); setMenuPosition(null); }}>
Create/Update Tag
</button>
)}
<button onClick={() => { handleTagSelect(t); setShowDepsModal(true); setOpenMenuId(null); setMenuPosition(null); }}>
View Dependencies
</button>
</div>
</div>
);
};
if (loading && !tagsData) {
return <div className="loading">Loading...</div>;
}
@@ -586,19 +451,6 @@ function PackagePage() {
<div className="page-header__title-row">
<h1>{packageName}</h1>
{pkg && <Badge variant="default">{pkg.format}</Badge>}
{user && canWrite && !isSystemProject && (
<button
className="btn btn-primary btn-small header-upload-btn"
onClick={() => setShowUploadModal(true)}
>
<svg width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2" style={{ marginRight: '6px' }}>
<path d="M21 15v4a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2v-4" />
<polyline points="17 8 12 3 7 8" />
<line x1="12" y1="3" x2="12" y2="15" />
</svg>
Upload
</button>
)}
</div>
{pkg?.description && <p className="description">{pkg.description}</p>}
<div className="page-header__meta">
@@ -616,14 +468,14 @@ function PackagePage() {
</div>
{pkg && (pkg.tag_count !== undefined || pkg.artifact_count !== undefined) && (
<div className="package-header-stats">
{!isSystemProject && pkg.tag_count !== undefined && (
{pkg.tag_count !== undefined && (
<span className="stat-item">
<strong>{pkg.tag_count}</strong> tags
</span>
)}
{pkg.artifact_count !== undefined && (
<span className="stat-item">
<strong>{pkg.artifact_count}</strong> {isSystemProject ? 'versions' : 'artifacts'}
<strong>{pkg.artifact_count}</strong> artifacts
</span>
)}
{pkg.total_size !== undefined && pkg.total_size > 0 && (
@@ -631,7 +483,7 @@ function PackagePage() {
<strong>{formatBytes(pkg.total_size)}</strong> total
</span>
)}
{!isSystemProject && pkg.latest_tag && (
{pkg.latest_tag && (
<span className="stat-item">
Latest: <strong className="accent">{pkg.latest_tag}</strong>
</span>
@@ -644,9 +496,44 @@ function PackagePage() {
{error && <div className="error-message">{error}</div>}
{uploadSuccess && <div className="success-message">{uploadSuccess}</div>}
{user && (
<div className="upload-section card">
<h3>Upload Artifact</h3>
{canWrite ? (
<div className="upload-form">
<div className="form-group">
<label htmlFor="upload-tag">Tag (optional)</label>
<input
id="upload-tag"
type="text"
value={uploadTag}
onChange={(e) => setUploadTag(e.target.value)}
placeholder="v1.0.0, latest, stable..."
/>
</div>
<DragDropUpload
projectName={projectName!}
packageName={packageName!}
tag={uploadTag || undefined}
onUploadComplete={handleUploadComplete}
onUploadError={handleUploadError}
/>
</div>
) : (
<DragDropUpload
projectName={projectName!}
packageName={packageName!}
disabled={true}
disabledReason="You have read-only access to this project and cannot upload artifacts."
onUploadComplete={handleUploadComplete}
onUploadError={handleUploadError}
/>
)}
</div>
)}
<div className="section-header">
<h2>{isSystemProject ? 'Versions' : 'Tags / Versions'}</h2>
<h2>Tags / Versions</h2>
</div>
<div className="list-controls">
@@ -690,6 +577,110 @@ function PackagePage() {
/>
)}
{/* Dependencies Section */}
{tags.length > 0 && (
<div className="dependencies-section card">
<div className="dependencies-header">
<h3>Dependencies</h3>
<div className="dependencies-controls">
{selectedTag && (
<>
<button
className="btn btn-secondary btn-small"
onClick={fetchEnsureFile}
disabled={ensureFileLoading}
title="View orchard.ensure file"
>
<svg width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2" style={{ marginRight: '6px' }}>
<path d="M14 2H6a2 2 0 0 0-2 2v16a2 2 0 0 0 2 2h12a2 2 0 0 0 2-2V8z"></path>
<polyline points="14 2 14 8 20 8"></polyline>
<line x1="16" y1="13" x2="8" y2="13"></line>
<line x1="16" y1="17" x2="8" y2="17"></line>
<polyline points="10 9 9 9 8 9"></polyline>
</svg>
{ensureFileLoading ? 'Loading...' : 'View Ensure File'}
</button>
<button
className="btn btn-secondary btn-small"
onClick={() => setShowGraph(true)}
title="View full dependency tree"
>
<svg width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2" style={{ marginRight: '6px' }}>
<circle cx="12" cy="12" r="3"></circle>
<circle cx="4" cy="4" r="2"></circle>
<circle cx="20" cy="4" r="2"></circle>
<circle cx="4" cy="20" r="2"></circle>
<circle cx="20" cy="20" r="2"></circle>
<line x1="9.5" y1="9.5" x2="5.5" y2="5.5"></line>
<line x1="14.5" y1="9.5" x2="18.5" y2="5.5"></line>
<line x1="9.5" y1="14.5" x2="5.5" y2="18.5"></line>
<line x1="14.5" y1="14.5" x2="18.5" y2="18.5"></line>
</svg>
View Graph
</button>
</>
)}
</div>
</div>
<div className="dependencies-tag-select">
{selectedTag && (
<select
className="tag-selector"
value={selectedTag.id}
onChange={(e) => {
const tag = tags.find(t => t.id === e.target.value);
if (tag) setSelectedTag(tag);
}}
>
{tags.map(t => (
<option key={t.id} value={t.id}>
{t.name}{t.version ? ` (${t.version})` : ''}
</option>
))}
</select>
)}
</div>
{depsLoading ? (
<div className="deps-loading">Loading dependencies...</div>
) : depsError ? (
<div className="deps-error">{depsError}</div>
) : dependencies.length === 0 ? (
<div className="deps-empty">
{selectedTag ? (
<span><strong>{selectedTag.name}</strong> has no dependencies</span>
) : (
<span>No dependencies</span>
)}
</div>
) : (
<div className="deps-list">
<div className="deps-summary">
<strong>{selectedTag?.name}</strong> has {dependencies.length} {dependencies.length === 1 ? 'dependency' : 'dependencies'}:
</div>
<ul className="deps-items">
{dependencies.map((dep) => (
<li key={dep.id} className="dep-item">
<Link
to={`/project/${dep.project}/${dep.package}`}
className="dep-link"
>
{dep.project}/{dep.package}
</Link>
<span className="dep-constraint">
@ {dep.version || dep.tag}
</span>
<span className="dep-status dep-status--ok" title="Package exists">
&#10003;
</span>
</li>
))}
</ul>
</div>
)}
</div>
)}
{/* Used By (Reverse Dependencies) Section */}
<div className="used-by-section card">
<h3>Used By</h3>
@@ -746,6 +737,78 @@ function PackagePage() {
)}
</div>
<div className="download-by-id-section card">
<h3>Download by Artifact ID</h3>
<div className="download-by-id-form">
<input
type="text"
value={artifactIdInput}
onChange={(e) => setArtifactIdInput(e.target.value.toLowerCase().replace(/[^a-f0-9]/g, '').slice(0, 64))}
placeholder="Enter SHA256 artifact ID (64 hex characters)"
className="artifact-id-input"
/>
<a
href={artifactIdInput.length === 64 ? getDownloadUrl(projectName!, packageName!, `artifact:${artifactIdInput}`) : '#'}
className={`btn btn-primary ${artifactIdInput.length !== 64 ? 'btn-disabled' : ''}`}
download
onClick={(e) => {
if (artifactIdInput.length !== 64) {
e.preventDefault();
}
}}
>
Download
</a>
</div>
{artifactIdInput.length > 0 && artifactIdInput.length !== 64 && (
<p className="validation-hint">Artifact ID must be exactly 64 hex characters ({artifactIdInput.length}/64)</p>
)}
</div>
{user && canWrite && (
<div className="create-tag-section card">
<h3>Create / Update Tag</h3>
<p className="section-description">Point a tag at any existing artifact by its ID</p>
<form onSubmit={handleCreateTag} className="create-tag-form">
<div className="form-row">
<div className="form-group">
<label htmlFor="create-tag-name">Tag Name</label>
<input
id="create-tag-name"
type="text"
value={createTagName}
onChange={(e) => setCreateTagName(e.target.value)}
placeholder="latest, stable, v1.0.0..."
disabled={createTagLoading}
/>
</div>
<div className="form-group form-group--wide">
<label htmlFor="create-tag-artifact">Artifact ID</label>
<input
id="create-tag-artifact"
type="text"
value={createTagArtifactId}
onChange={(e) => setCreateTagArtifactId(e.target.value.toLowerCase().replace(/[^a-f0-9]/g, '').slice(0, 64))}
placeholder="SHA256 hash (64 hex characters)"
className="artifact-id-input"
disabled={createTagLoading}
/>
</div>
<button
type="submit"
className="btn btn-primary"
disabled={createTagLoading || !createTagName.trim() || createTagArtifactId.length !== 64}
>
{createTagLoading ? 'Creating...' : 'Create Tag'}
</button>
</div>
{createTagArtifactId.length > 0 && createTagArtifactId.length !== 64 && (
<p className="validation-hint">Artifact ID must be exactly 64 hex characters ({createTagArtifactId.length}/64)</p>
)}
</form>
</div>
)}
<div className="usage-section card">
<h3>Usage</h3>
<p>Download artifacts using:</p>
@@ -768,118 +831,6 @@ function PackagePage() {
/>
)}
{/* Upload Modal */}
{showUploadModal && (
<div className="modal-overlay" onClick={() => setShowUploadModal(false)}>
<div className="upload-modal" onClick={(e) => e.stopPropagation()}>
<div className="modal-header">
<h3>Upload Artifact</h3>
<button
className="modal-close"
onClick={() => setShowUploadModal(false)}
title="Close"
>
<svg width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2">
<line x1="18" y1="6" x2="6" y2="18"></line>
<line x1="6" y1="6" x2="18" y2="18"></line>
</svg>
</button>
</div>
<div className="modal-body">
<div className="form-group">
<label htmlFor="upload-tag">Tag (optional)</label>
<input
id="upload-tag"
type="text"
value={uploadTag}
onChange={(e) => setUploadTag(e.target.value)}
placeholder="v1.0.0, latest, stable..."
/>
</div>
<DragDropUpload
projectName={projectName!}
packageName={packageName!}
tag={uploadTag || undefined}
onUploadComplete={(result) => {
handleUploadComplete(result);
setShowUploadModal(false);
setUploadTag('');
}}
onUploadError={handleUploadError}
/>
</div>
</div>
</div>
)}
{/* Create/Update Tag Modal */}
{showCreateTagModal && (
<div className="modal-overlay" onClick={() => setShowCreateTagModal(false)}>
<div className="create-tag-modal" onClick={(e) => e.stopPropagation()}>
<div className="modal-header">
<h3>Create / Update Tag</h3>
<button
className="modal-close"
onClick={() => { setShowCreateTagModal(false); setCreateTagName(''); setCreateTagArtifactId(''); }}
title="Close"
>
<svg width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2">
<line x1="18" y1="6" x2="6" y2="18"></line>
<line x1="6" y1="6" x2="18" y2="18"></line>
</svg>
</button>
</div>
<div className="modal-body">
<p className="modal-description">Point a tag at an artifact by its ID</p>
<form onSubmit={(e) => { handleCreateTag(e); setShowCreateTagModal(false); }}>
<div className="form-group">
<label htmlFor="modal-tag-name">Tag Name</label>
<input
id="modal-tag-name"
type="text"
value={createTagName}
onChange={(e) => setCreateTagName(e.target.value)}
placeholder="latest, stable, v1.0.0..."
disabled={createTagLoading}
/>
</div>
<div className="form-group">
<label htmlFor="modal-artifact-id">Artifact ID</label>
<input
id="modal-artifact-id"
type="text"
value={createTagArtifactId}
onChange={(e) => setCreateTagArtifactId(e.target.value.toLowerCase().replace(/[^a-f0-9]/g, '').slice(0, 64))}
placeholder="SHA256 hash (64 hex characters)"
className="artifact-id-input"
disabled={createTagLoading}
/>
{createTagArtifactId.length > 0 && createTagArtifactId.length !== 64 && (
<p className="validation-hint">{createTagArtifactId.length}/64 characters</p>
)}
</div>
<div className="modal-actions">
<button
type="button"
className="btn btn-secondary"
onClick={() => { setShowCreateTagModal(false); setCreateTagName(''); setCreateTagArtifactId(''); }}
>
Cancel
</button>
<button
type="submit"
className="btn btn-primary"
disabled={createTagLoading || !createTagName.trim() || createTagArtifactId.length !== 64}
>
{createTagLoading ? 'Creating...' : 'Create Tag'}
</button>
</div>
</form>
</div>
</div>
</div>
)}
{/* Ensure File Modal */}
{showEnsureFile && (
<div className="modal-overlay" onClick={() => setShowEnsureFile(false)}>
@@ -921,107 +872,6 @@ function PackagePage() {
</div>
</div>
)}
{/* Dependencies Modal */}
{showDepsModal && selectedTag && (
<div className="modal-overlay" onClick={() => setShowDepsModal(false)}>
<div className="deps-modal" onClick={(e) => e.stopPropagation()}>
<div className="modal-header">
<h3>Dependencies for {selectedTag.version || selectedTag.name}</h3>
<button
className="modal-close"
onClick={() => setShowDepsModal(false)}
title="Close"
>
<svg width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2">
<line x1="18" y1="6" x2="6" y2="18"></line>
<line x1="6" y1="6" x2="18" y2="18"></line>
</svg>
</button>
</div>
<div className="modal-body">
<div className="deps-modal-controls">
<button
className="btn btn-secondary btn-small"
onClick={fetchEnsureFile}
disabled={ensureFileLoading}
>
View Ensure File
</button>
<button
className="btn btn-secondary btn-small"
onClick={() => { setShowDepsModal(false); setShowGraph(true); }}
>
View Graph
</button>
</div>
{depsLoading ? (
<div className="deps-loading">Loading dependencies...</div>
) : depsError ? (
<div className="deps-error">{depsError}</div>
) : dependencies.length === 0 ? (
<div className="deps-empty">No dependencies</div>
) : (
<div className="deps-list">
<div className="deps-summary">
{dependencies.length} {dependencies.length === 1 ? 'dependency' : 'dependencies'}:
</div>
<ul className="deps-items">
{dependencies.map((dep) => (
<li key={dep.id} className="dep-item">
<Link
to={`/project/${dep.project}/${dep.package}`}
className="dep-link"
onClick={() => setShowDepsModal(false)}
>
{dep.project}/{dep.package}
</Link>
<span className="dep-constraint">
@ {dep.version || dep.tag}
</span>
<span className="dep-status dep-status--ok" title="Package exists">
&#10003;
</span>
</li>
))}
</ul>
</div>
)}
</div>
</div>
</div>
)}
{/* Artifact ID Modal */}
{showArtifactIdModal && viewArtifactId && (
<div className="modal-overlay" onClick={() => setShowArtifactIdModal(false)}>
<div className="artifact-id-modal" onClick={(e) => e.stopPropagation()}>
<div className="modal-header">
<h3>Artifact ID</h3>
<button
className="modal-close"
onClick={() => setShowArtifactIdModal(false)}
title="Close"
>
<svg width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2">
<line x1="18" y1="6" x2="6" y2="18"></line>
<line x1="6" y1="6" x2="18" y2="18"></line>
</svg>
</button>
</div>
<div className="modal-body">
<p className="modal-description">SHA256 hash identifying this artifact:</p>
<div className="artifact-id-display">
<code>{viewArtifactId}</code>
<CopyButton text={viewArtifactId} />
</div>
</div>
</div>
</div>
)}
{/* Action Menu Dropdown */}
{renderActionMenu()}
</div>
);
}

View File

@@ -195,9 +195,6 @@ function ProjectPage() {
<Badge variant={project.is_public ? 'public' : 'private'}>
{project.is_public ? 'Public' : 'Private'}
</Badge>
{project.is_system && (
<Badge variant="warning">System Cache</Badge>
)}
{accessLevel && (
<Badge variant={accessLevel === 'admin' ? 'success' : accessLevel === 'write' ? 'info' : 'default'}>
{isOwner ? 'Owner' : accessLevel.charAt(0).toUpperCase() + accessLevel.slice(1)}
@@ -214,7 +211,7 @@ function ProjectPage() {
</div>
</div>
<div className="page-header__actions">
{canAdmin && !project.team_id && !project.is_system && (
{canAdmin && !project.team_id && (
<button
className="btn btn-secondary"
onClick={() => navigate(`/project/${projectName}/settings`)}
@@ -227,11 +224,11 @@ function ProjectPage() {
Settings
</button>
)}
{canWrite && !project.is_system ? (
{canWrite ? (
<button className="btn btn-primary" onClick={() => setShowForm(!showForm)}>
{showForm ? 'Cancel' : '+ New Package'}
</button>
) : user && !project.is_system ? (
) : user ? (
<span className="text-muted" title="You have read-only access to this project">
Read-only access
</span>
@@ -294,7 +291,6 @@ function ProjectPage() {
placeholder="Filter packages..."
className="list-controls__search"
/>
{!project?.is_system && (
<select
className="list-controls__select"
value={format}
@@ -307,7 +303,6 @@ function ProjectPage() {
</option>
))}
</select>
)}
</div>
{hasActiveFilters && (
@@ -343,19 +338,19 @@ function ProjectPage() {
className: 'cell-description',
render: (pkg) => pkg.description || '—',
},
...(!project?.is_system ? [{
{
key: 'format',
header: 'Format',
render: (pkg: Package) => <Badge variant="default">{pkg.format}</Badge>,
}] : []),
...(!project?.is_system ? [{
render: (pkg) => <Badge variant="default">{pkg.format}</Badge>,
},
{
key: 'tag_count',
header: 'Tags',
render: (pkg: Package) => pkg.tag_count ?? '—',
}] : []),
render: (pkg) => pkg.tag_count ?? '—',
},
{
key: 'artifact_count',
header: project?.is_system ? 'Versions' : 'Artifacts',
header: 'Artifacts',
render: (pkg) => pkg.artifact_count ?? '—',
},
{
@@ -364,12 +359,12 @@ function ProjectPage() {
render: (pkg) =>
pkg.total_size !== undefined && pkg.total_size > 0 ? formatBytes(pkg.total_size) : '—',
},
...(!project?.is_system ? [{
{
key: 'latest_tag',
header: 'Latest',
render: (pkg: Package) =>
render: (pkg) =>
pkg.latest_tag ? <strong style={{ color: 'var(--accent-primary)' }}>{pkg.latest_tag}</strong> : '—',
}] : []),
},
{
key: 'created_at',
header: 'Created',

View File

@@ -6,7 +6,6 @@ export interface Project {
name: string;
description: string | null;
is_public: boolean;
is_system?: boolean; // True for system cache projects (_npm, _pypi, etc.)
created_at: string;
updated_at: string;
created_by: string;
@@ -504,56 +503,3 @@ export interface TeamMemberCreate {
export interface TeamMemberUpdate {
role: TeamRole;
}
// Upstream Source types
export type SourceType = 'npm' | 'pypi' | 'maven' | 'docker' | 'helm' | 'nuget' | 'deb' | 'rpm' | 'generic';
export type AuthType = 'none' | 'basic' | 'bearer' | 'api_key';
export interface UpstreamSource {
id: string;
name: string;
source_type: SourceType;
url: string;
enabled: boolean;
auth_type: AuthType;
username: string | null;
has_password: boolean;
has_headers: boolean;
priority: number;
source: 'database' | 'env';
created_at: string | null;
updated_at: string | null;
}
export interface UpstreamSourceCreate {
name: string;
source_type: SourceType;
url: string;
enabled?: boolean;
auth_type?: AuthType;
username?: string;
password?: string;
headers?: Record<string, string>;
priority?: number;
}
export interface UpstreamSourceUpdate {
name?: string;
source_type?: SourceType;
url?: string;
enabled?: boolean;
auth_type?: AuthType;
username?: string;
password?: string;
headers?: Record<string, string> | null;
priority?: number;
}
export interface UpstreamSourceTestResult {
success: boolean;
status_code: number | null;
elapsed_ms: number;
error: string | null;
source_id: string;
source_name: string;
}

View File

@@ -128,10 +128,6 @@ spec:
value: {{ .Values.orchard.rateLimit.login | quote }}
{{- end }}
{{- end }}
{{- if .Values.orchard.purgeSeedData }}
- name: ORCHARD_PURGE_SEED_DATA
value: "true"
{{- end }}
{{- if .Values.orchard.database.poolSize }}
- name: ORCHARD_DATABASE_POOL_SIZE
value: {{ .Values.orchard.database.poolSize | quote }}

View File

@@ -91,7 +91,6 @@ affinity: {}
# Orchard server configuration
orchard:
env: "development" # Allows seed data for testing
purgeSeedData: true # Remove public seed data (npm-public, pypi-public, etc.)
server:
host: "0.0.0.0"
port: 8080

View File

@@ -1,137 +0,0 @@
-- Migration 010: Upstream Artifact Caching
-- Adds support for caching artifacts from upstream registries (npm, PyPI, Maven, etc.)
-- Part of "The cache that never forgets" epic for hermetic builds
-- =============================================================================
-- upstream_sources: Configure upstream registries for artifact caching
-- =============================================================================
CREATE TABLE IF NOT EXISTS upstream_sources (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
name VARCHAR(255) NOT NULL UNIQUE,
source_type VARCHAR(50) NOT NULL DEFAULT 'generic',
url VARCHAR(2048) NOT NULL,
enabled BOOLEAN NOT NULL DEFAULT FALSE,
is_public BOOLEAN NOT NULL DEFAULT TRUE,
auth_type VARCHAR(20) NOT NULL DEFAULT 'none',
username VARCHAR(255),
password_encrypted BYTEA,
headers_encrypted BYTEA,
priority INTEGER NOT NULL DEFAULT 100,
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
-- Source type must be one of the supported types
CONSTRAINT check_source_type CHECK (
source_type IN ('npm', 'pypi', 'maven', 'docker', 'helm', 'nuget', 'deb', 'rpm', 'generic')
),
-- Auth type must be valid
CONSTRAINT check_auth_type CHECK (
auth_type IN ('none', 'basic', 'bearer', 'api_key')
),
-- Priority must be positive
CONSTRAINT check_priority_positive CHECK (priority > 0)
);
-- Indexes for upstream_sources
CREATE INDEX IF NOT EXISTS idx_upstream_sources_enabled ON upstream_sources(enabled);
CREATE INDEX IF NOT EXISTS idx_upstream_sources_source_type ON upstream_sources(source_type);
CREATE INDEX IF NOT EXISTS idx_upstream_sources_is_public ON upstream_sources(is_public);
CREATE INDEX IF NOT EXISTS idx_upstream_sources_priority ON upstream_sources(priority);
-- Comments for upstream_sources
COMMENT ON TABLE upstream_sources IS 'Configuration for upstream artifact registries (npm, PyPI, Maven, etc.)';
COMMENT ON COLUMN upstream_sources.name IS 'Unique human-readable name (e.g., npm-public, artifactory-private)';
COMMENT ON COLUMN upstream_sources.source_type IS 'Type of registry: npm, pypi, maven, docker, helm, nuget, deb, rpm, generic';
COMMENT ON COLUMN upstream_sources.url IS 'Base URL of the upstream registry';
COMMENT ON COLUMN upstream_sources.enabled IS 'Whether this source is active for caching';
COMMENT ON COLUMN upstream_sources.is_public IS 'True if this is a public internet source (for air-gap mode)';
COMMENT ON COLUMN upstream_sources.auth_type IS 'Authentication type: none, basic, bearer, api_key';
COMMENT ON COLUMN upstream_sources.username IS 'Username for basic auth';
COMMENT ON COLUMN upstream_sources.password_encrypted IS 'Fernet-encrypted password/token';
COMMENT ON COLUMN upstream_sources.headers_encrypted IS 'Fernet-encrypted custom headers (JSON)';
COMMENT ON COLUMN upstream_sources.priority IS 'Priority for source selection (lower = higher priority)';
-- =============================================================================
-- cache_settings: Global cache configuration (singleton table)
-- =============================================================================
CREATE TABLE IF NOT EXISTS cache_settings (
id INTEGER PRIMARY KEY DEFAULT 1,
allow_public_internet BOOLEAN NOT NULL DEFAULT TRUE,
auto_create_system_projects BOOLEAN NOT NULL DEFAULT TRUE,
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
-- Singleton constraint
CONSTRAINT check_cache_settings_singleton CHECK (id = 1)
);
-- Insert default row
INSERT INTO cache_settings (id, allow_public_internet, auto_create_system_projects)
VALUES (1, TRUE, TRUE)
ON CONFLICT (id) DO NOTHING;
-- Comments for cache_settings
COMMENT ON TABLE cache_settings IS 'Global cache settings (singleton table)';
COMMENT ON COLUMN cache_settings.allow_public_internet IS 'Air-gap mode: when false, blocks all public internet sources';
COMMENT ON COLUMN cache_settings.auto_create_system_projects IS 'Auto-create system projects (_npm, _pypi, etc.) on first cache';
-- =============================================================================
-- cached_urls: Track URL to artifact mappings for provenance
-- =============================================================================
CREATE TABLE IF NOT EXISTS cached_urls (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
url VARCHAR(4096) NOT NULL,
url_hash VARCHAR(64) NOT NULL,
artifact_id VARCHAR(64) NOT NULL REFERENCES artifacts(id),
source_id UUID REFERENCES upstream_sources(id) ON DELETE SET NULL,
fetched_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT CURRENT_TIMESTAMP,
response_headers JSONB DEFAULT '{}',
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
-- URL hash must be unique (same URL = same cached artifact)
CONSTRAINT unique_url_hash UNIQUE (url_hash)
);
-- Indexes for cached_urls
CREATE INDEX IF NOT EXISTS idx_cached_urls_url_hash ON cached_urls(url_hash);
CREATE INDEX IF NOT EXISTS idx_cached_urls_artifact_id ON cached_urls(artifact_id);
CREATE INDEX IF NOT EXISTS idx_cached_urls_source_id ON cached_urls(source_id);
CREATE INDEX IF NOT EXISTS idx_cached_urls_fetched_at ON cached_urls(fetched_at);
-- Comments for cached_urls
COMMENT ON TABLE cached_urls IS 'Tracks which URLs have been cached and maps to artifacts';
COMMENT ON COLUMN cached_urls.url IS 'Original URL that was fetched';
COMMENT ON COLUMN cached_urls.url_hash IS 'SHA256 hash of URL for fast lookup';
COMMENT ON COLUMN cached_urls.artifact_id IS 'The cached artifact (by SHA256 content hash)';
COMMENT ON COLUMN cached_urls.source_id IS 'Which upstream source provided this (null if manual)';
COMMENT ON COLUMN cached_urls.fetched_at IS 'When the URL was fetched from upstream';
COMMENT ON COLUMN cached_urls.response_headers IS 'Original response headers from upstream (for debugging)';
-- =============================================================================
-- Add is_system column to projects table for system cache projects
-- =============================================================================
DO $$
BEGIN
IF NOT EXISTS (
SELECT 1 FROM information_schema.columns
WHERE table_name = 'projects' AND column_name = 'is_system'
) THEN
ALTER TABLE projects ADD COLUMN is_system BOOLEAN NOT NULL DEFAULT FALSE;
CREATE INDEX IF NOT EXISTS idx_projects_is_system ON projects(is_system);
END IF;
END $$;
COMMENT ON COLUMN projects.is_system IS 'True for system cache projects (_npm, _pypi, etc.)';
-- =============================================================================
-- Seed default upstream sources (disabled by default for safety)
-- =============================================================================
INSERT INTO upstream_sources (id, name, source_type, url, enabled, is_public, auth_type, priority)
VALUES
(gen_random_uuid(), 'npm-public', 'npm', 'https://registry.npmjs.org', FALSE, TRUE, 'none', 100),
(gen_random_uuid(), 'pypi-public', 'pypi', 'https://pypi.org/simple', FALSE, TRUE, 'none', 100),
(gen_random_uuid(), 'maven-central', 'maven', 'https://repo1.maven.org/maven2', FALSE, TRUE, 'none', 100),
(gen_random_uuid(), 'docker-hub', 'docker', 'https://registry-1.docker.io', FALSE, TRUE, 'none', 100)
ON CONFLICT (name) DO NOTHING;