Files
orchard/migrations/010_upstream_caching.sql

138 lines
7.3 KiB
SQL

-- Migration 010: Upstream Artifact Caching
-- Adds support for caching artifacts from upstream registries (npm, PyPI, Maven, etc.)
-- Part of "The cache that never forgets" epic for hermetic builds
-- =============================================================================
-- upstream_sources: Configure upstream registries for artifact caching
-- =============================================================================
CREATE TABLE IF NOT EXISTS upstream_sources (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
name VARCHAR(255) NOT NULL UNIQUE,
source_type VARCHAR(50) NOT NULL DEFAULT 'generic',
url VARCHAR(2048) NOT NULL,
enabled BOOLEAN NOT NULL DEFAULT FALSE,
is_public BOOLEAN NOT NULL DEFAULT TRUE,
auth_type VARCHAR(20) NOT NULL DEFAULT 'none',
username VARCHAR(255),
password_encrypted BYTEA,
headers_encrypted BYTEA,
priority INTEGER NOT NULL DEFAULT 100,
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
-- Source type must be one of the supported types
CONSTRAINT check_source_type CHECK (
source_type IN ('npm', 'pypi', 'maven', 'docker', 'helm', 'nuget', 'deb', 'rpm', 'generic')
),
-- Auth type must be valid
CONSTRAINT check_auth_type CHECK (
auth_type IN ('none', 'basic', 'bearer', 'api_key')
),
-- Priority must be positive
CONSTRAINT check_priority_positive CHECK (priority > 0)
);
-- Indexes for upstream_sources
CREATE INDEX IF NOT EXISTS idx_upstream_sources_enabled ON upstream_sources(enabled);
CREATE INDEX IF NOT EXISTS idx_upstream_sources_source_type ON upstream_sources(source_type);
CREATE INDEX IF NOT EXISTS idx_upstream_sources_is_public ON upstream_sources(is_public);
CREATE INDEX IF NOT EXISTS idx_upstream_sources_priority ON upstream_sources(priority);
-- Comments for upstream_sources
COMMENT ON TABLE upstream_sources IS 'Configuration for upstream artifact registries (npm, PyPI, Maven, etc.)';
COMMENT ON COLUMN upstream_sources.name IS 'Unique human-readable name (e.g., npm-public, artifactory-private)';
COMMENT ON COLUMN upstream_sources.source_type IS 'Type of registry: npm, pypi, maven, docker, helm, nuget, deb, rpm, generic';
COMMENT ON COLUMN upstream_sources.url IS 'Base URL of the upstream registry';
COMMENT ON COLUMN upstream_sources.enabled IS 'Whether this source is active for caching';
COMMENT ON COLUMN upstream_sources.is_public IS 'True if this is a public internet source (for air-gap mode)';
COMMENT ON COLUMN upstream_sources.auth_type IS 'Authentication type: none, basic, bearer, api_key';
COMMENT ON COLUMN upstream_sources.username IS 'Username for basic auth';
COMMENT ON COLUMN upstream_sources.password_encrypted IS 'Fernet-encrypted password/token';
COMMENT ON COLUMN upstream_sources.headers_encrypted IS 'Fernet-encrypted custom headers (JSON)';
COMMENT ON COLUMN upstream_sources.priority IS 'Priority for source selection (lower = higher priority)';
-- =============================================================================
-- cache_settings: Global cache configuration (singleton table)
-- =============================================================================
CREATE TABLE IF NOT EXISTS cache_settings (
id INTEGER PRIMARY KEY DEFAULT 1,
allow_public_internet BOOLEAN NOT NULL DEFAULT TRUE,
auto_create_system_projects BOOLEAN NOT NULL DEFAULT TRUE,
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
-- Singleton constraint
CONSTRAINT check_cache_settings_singleton CHECK (id = 1)
);
-- Insert default row
INSERT INTO cache_settings (id, allow_public_internet, auto_create_system_projects)
VALUES (1, TRUE, TRUE)
ON CONFLICT (id) DO NOTHING;
-- Comments for cache_settings
COMMENT ON TABLE cache_settings IS 'Global cache settings (singleton table)';
COMMENT ON COLUMN cache_settings.allow_public_internet IS 'Air-gap mode: when false, blocks all public internet sources';
COMMENT ON COLUMN cache_settings.auto_create_system_projects IS 'Auto-create system projects (_npm, _pypi, etc.) on first cache';
-- =============================================================================
-- cached_urls: Track URL to artifact mappings for provenance
-- =============================================================================
CREATE TABLE IF NOT EXISTS cached_urls (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
url VARCHAR(4096) NOT NULL,
url_hash VARCHAR(64) NOT NULL,
artifact_id VARCHAR(64) NOT NULL REFERENCES artifacts(id),
source_id UUID REFERENCES upstream_sources(id) ON DELETE SET NULL,
fetched_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT CURRENT_TIMESTAMP,
response_headers JSONB DEFAULT '{}',
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
-- URL hash must be unique (same URL = same cached artifact)
CONSTRAINT unique_url_hash UNIQUE (url_hash)
);
-- Indexes for cached_urls
CREATE INDEX IF NOT EXISTS idx_cached_urls_url_hash ON cached_urls(url_hash);
CREATE INDEX IF NOT EXISTS idx_cached_urls_artifact_id ON cached_urls(artifact_id);
CREATE INDEX IF NOT EXISTS idx_cached_urls_source_id ON cached_urls(source_id);
CREATE INDEX IF NOT EXISTS idx_cached_urls_fetched_at ON cached_urls(fetched_at);
-- Comments for cached_urls
COMMENT ON TABLE cached_urls IS 'Tracks which URLs have been cached and maps to artifacts';
COMMENT ON COLUMN cached_urls.url IS 'Original URL that was fetched';
COMMENT ON COLUMN cached_urls.url_hash IS 'SHA256 hash of URL for fast lookup';
COMMENT ON COLUMN cached_urls.artifact_id IS 'The cached artifact (by SHA256 content hash)';
COMMENT ON COLUMN cached_urls.source_id IS 'Which upstream source provided this (null if manual)';
COMMENT ON COLUMN cached_urls.fetched_at IS 'When the URL was fetched from upstream';
COMMENT ON COLUMN cached_urls.response_headers IS 'Original response headers from upstream (for debugging)';
-- =============================================================================
-- Add is_system column to projects table for system cache projects
-- =============================================================================
DO $$
BEGIN
IF NOT EXISTS (
SELECT 1 FROM information_schema.columns
WHERE table_name = 'projects' AND column_name = 'is_system'
) THEN
ALTER TABLE projects ADD COLUMN is_system BOOLEAN NOT NULL DEFAULT FALSE;
CREATE INDEX IF NOT EXISTS idx_projects_is_system ON projects(is_system);
END IF;
END $$;
COMMENT ON COLUMN projects.is_system IS 'True for system cache projects (_npm, _pypi, etc.)';
-- =============================================================================
-- Seed default upstream sources (disabled by default for safety)
-- =============================================================================
INSERT INTO upstream_sources (id, name, source_type, url, enabled, is_public, auth_type, priority)
VALUES
(gen_random_uuid(), 'npm-public', 'npm', 'https://registry.npmjs.org', FALSE, TRUE, 'none', 100),
(gen_random_uuid(), 'pypi-public', 'pypi', 'https://pypi.org/simple', FALSE, TRUE, 'none', 100),
(gen_random_uuid(), 'maven-central', 'maven', 'https://repo1.maven.org/maven2', FALSE, TRUE, 'none', 100),
(gen_random_uuid(), 'docker-hub', 'docker', 'https://registry-1.docker.io', FALSE, TRUE, 'none', 100)
ON CONFLICT (name) DO NOTHING;