Upstream Caching (Epic #68-#75, #105): - Add upstream_sources and cache_settings tables with migrations - Add cache management API endpoints (CRUD for sources, settings) - Add environment variable overrides for upstream sources and cache settings - Add encryption module for storing credentials securely - Add frontend Admin Cache Management page - Add is_system field to projects for system cache distinction - Add purge_seed_data for transitioning to production-like environments CI Pipeline Refactoring: - Remove reset jobs (reset_stage_pre, reset_stage) - Add ephemeral orchard-test deployment for main branch testing - Run integration tests on ephemeral deployment before promoting to stage - Stage is now long-running pre-prod (smoke tests only) - Disable prosper_setup for tag pipelines
138 lines
7.3 KiB
SQL
138 lines
7.3 KiB
SQL
-- Migration 010: Upstream Artifact Caching
|
|
-- Adds support for caching artifacts from upstream registries (npm, PyPI, Maven, etc.)
|
|
-- Part of "The cache that never forgets" epic for hermetic builds
|
|
|
|
-- =============================================================================
|
|
-- upstream_sources: Configure upstream registries for artifact caching
|
|
-- =============================================================================
|
|
CREATE TABLE IF NOT EXISTS upstream_sources (
|
|
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
|
name VARCHAR(255) NOT NULL UNIQUE,
|
|
source_type VARCHAR(50) NOT NULL DEFAULT 'generic',
|
|
url VARCHAR(2048) NOT NULL,
|
|
enabled BOOLEAN NOT NULL DEFAULT FALSE,
|
|
is_public BOOLEAN NOT NULL DEFAULT TRUE,
|
|
auth_type VARCHAR(20) NOT NULL DEFAULT 'none',
|
|
username VARCHAR(255),
|
|
password_encrypted BYTEA,
|
|
headers_encrypted BYTEA,
|
|
priority INTEGER NOT NULL DEFAULT 100,
|
|
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
|
|
updated_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
|
|
|
|
-- Source type must be one of the supported types
|
|
CONSTRAINT check_source_type CHECK (
|
|
source_type IN ('npm', 'pypi', 'maven', 'docker', 'helm', 'nuget', 'deb', 'rpm', 'generic')
|
|
),
|
|
|
|
-- Auth type must be valid
|
|
CONSTRAINT check_auth_type CHECK (
|
|
auth_type IN ('none', 'basic', 'bearer', 'api_key')
|
|
),
|
|
|
|
-- Priority must be positive
|
|
CONSTRAINT check_priority_positive CHECK (priority > 0)
|
|
);
|
|
|
|
-- Indexes for upstream_sources
|
|
CREATE INDEX IF NOT EXISTS idx_upstream_sources_enabled ON upstream_sources(enabled);
|
|
CREATE INDEX IF NOT EXISTS idx_upstream_sources_source_type ON upstream_sources(source_type);
|
|
CREATE INDEX IF NOT EXISTS idx_upstream_sources_is_public ON upstream_sources(is_public);
|
|
CREATE INDEX IF NOT EXISTS idx_upstream_sources_priority ON upstream_sources(priority);
|
|
|
|
-- Comments for upstream_sources
|
|
COMMENT ON TABLE upstream_sources IS 'Configuration for upstream artifact registries (npm, PyPI, Maven, etc.)';
|
|
COMMENT ON COLUMN upstream_sources.name IS 'Unique human-readable name (e.g., npm-public, artifactory-private)';
|
|
COMMENT ON COLUMN upstream_sources.source_type IS 'Type of registry: npm, pypi, maven, docker, helm, nuget, deb, rpm, generic';
|
|
COMMENT ON COLUMN upstream_sources.url IS 'Base URL of the upstream registry';
|
|
COMMENT ON COLUMN upstream_sources.enabled IS 'Whether this source is active for caching';
|
|
COMMENT ON COLUMN upstream_sources.is_public IS 'True if this is a public internet source (for air-gap mode)';
|
|
COMMENT ON COLUMN upstream_sources.auth_type IS 'Authentication type: none, basic, bearer, api_key';
|
|
COMMENT ON COLUMN upstream_sources.username IS 'Username for basic auth';
|
|
COMMENT ON COLUMN upstream_sources.password_encrypted IS 'Fernet-encrypted password/token';
|
|
COMMENT ON COLUMN upstream_sources.headers_encrypted IS 'Fernet-encrypted custom headers (JSON)';
|
|
COMMENT ON COLUMN upstream_sources.priority IS 'Priority for source selection (lower = higher priority)';
|
|
|
|
-- =============================================================================
|
|
-- cache_settings: Global cache configuration (singleton table)
|
|
-- =============================================================================
|
|
CREATE TABLE IF NOT EXISTS cache_settings (
|
|
id INTEGER PRIMARY KEY DEFAULT 1,
|
|
allow_public_internet BOOLEAN NOT NULL DEFAULT TRUE,
|
|
auto_create_system_projects BOOLEAN NOT NULL DEFAULT TRUE,
|
|
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
|
|
updated_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
|
|
|
|
-- Singleton constraint
|
|
CONSTRAINT check_cache_settings_singleton CHECK (id = 1)
|
|
);
|
|
|
|
-- Insert default row
|
|
INSERT INTO cache_settings (id, allow_public_internet, auto_create_system_projects)
|
|
VALUES (1, TRUE, TRUE)
|
|
ON CONFLICT (id) DO NOTHING;
|
|
|
|
-- Comments for cache_settings
|
|
COMMENT ON TABLE cache_settings IS 'Global cache settings (singleton table)';
|
|
COMMENT ON COLUMN cache_settings.allow_public_internet IS 'Air-gap mode: when false, blocks all public internet sources';
|
|
COMMENT ON COLUMN cache_settings.auto_create_system_projects IS 'Auto-create system projects (_npm, _pypi, etc.) on first cache';
|
|
|
|
-- =============================================================================
|
|
-- cached_urls: Track URL to artifact mappings for provenance
|
|
-- =============================================================================
|
|
CREATE TABLE IF NOT EXISTS cached_urls (
|
|
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
|
url VARCHAR(4096) NOT NULL,
|
|
url_hash VARCHAR(64) NOT NULL,
|
|
artifact_id VARCHAR(64) NOT NULL REFERENCES artifacts(id),
|
|
source_id UUID REFERENCES upstream_sources(id) ON DELETE SET NULL,
|
|
fetched_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
|
response_headers JSONB DEFAULT '{}',
|
|
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
|
|
|
|
-- URL hash must be unique (same URL = same cached artifact)
|
|
CONSTRAINT unique_url_hash UNIQUE (url_hash)
|
|
);
|
|
|
|
-- Indexes for cached_urls
|
|
CREATE INDEX IF NOT EXISTS idx_cached_urls_url_hash ON cached_urls(url_hash);
|
|
CREATE INDEX IF NOT EXISTS idx_cached_urls_artifact_id ON cached_urls(artifact_id);
|
|
CREATE INDEX IF NOT EXISTS idx_cached_urls_source_id ON cached_urls(source_id);
|
|
CREATE INDEX IF NOT EXISTS idx_cached_urls_fetched_at ON cached_urls(fetched_at);
|
|
|
|
-- Comments for cached_urls
|
|
COMMENT ON TABLE cached_urls IS 'Tracks which URLs have been cached and maps to artifacts';
|
|
COMMENT ON COLUMN cached_urls.url IS 'Original URL that was fetched';
|
|
COMMENT ON COLUMN cached_urls.url_hash IS 'SHA256 hash of URL for fast lookup';
|
|
COMMENT ON COLUMN cached_urls.artifact_id IS 'The cached artifact (by SHA256 content hash)';
|
|
COMMENT ON COLUMN cached_urls.source_id IS 'Which upstream source provided this (null if manual)';
|
|
COMMENT ON COLUMN cached_urls.fetched_at IS 'When the URL was fetched from upstream';
|
|
COMMENT ON COLUMN cached_urls.response_headers IS 'Original response headers from upstream (for debugging)';
|
|
|
|
-- =============================================================================
|
|
-- Add is_system column to projects table for system cache projects
|
|
-- =============================================================================
|
|
DO $$
|
|
BEGIN
|
|
IF NOT EXISTS (
|
|
SELECT 1 FROM information_schema.columns
|
|
WHERE table_name = 'projects' AND column_name = 'is_system'
|
|
) THEN
|
|
ALTER TABLE projects ADD COLUMN is_system BOOLEAN NOT NULL DEFAULT FALSE;
|
|
CREATE INDEX IF NOT EXISTS idx_projects_is_system ON projects(is_system);
|
|
END IF;
|
|
END $$;
|
|
|
|
COMMENT ON COLUMN projects.is_system IS 'True for system cache projects (_npm, _pypi, etc.)';
|
|
|
|
-- =============================================================================
|
|
-- Seed default upstream sources (disabled by default for safety)
|
|
-- =============================================================================
|
|
INSERT INTO upstream_sources (id, name, source_type, url, enabled, is_public, auth_type, priority)
|
|
VALUES
|
|
(gen_random_uuid(), 'npm-public', 'npm', 'https://registry.npmjs.org', FALSE, TRUE, 'none', 100),
|
|
(gen_random_uuid(), 'pypi-public', 'pypi', 'https://pypi.org/simple', FALSE, TRUE, 'none', 100),
|
|
(gen_random_uuid(), 'maven-central', 'maven', 'https://repo1.maven.org/maven2', FALSE, TRUE, 'none', 100),
|
|
(gen_random_uuid(), 'docker-hub', 'docker', 'https://registry-1.docker.io', FALSE, TRUE, 'none', 100)
|
|
ON CONFLICT (name) DO NOTHING;
|