-- Migration 010: Upstream Artifact Caching -- Adds support for caching artifacts from upstream registries (npm, PyPI, Maven, etc.) -- Part of "The cache that never forgets" epic for hermetic builds -- ============================================================================= -- upstream_sources: Configure upstream registries for artifact caching -- ============================================================================= CREATE TABLE IF NOT EXISTS upstream_sources ( id UUID PRIMARY KEY DEFAULT gen_random_uuid(), name VARCHAR(255) NOT NULL UNIQUE, source_type VARCHAR(50) NOT NULL DEFAULT 'generic', url VARCHAR(2048) NOT NULL, enabled BOOLEAN NOT NULL DEFAULT FALSE, is_public BOOLEAN NOT NULL DEFAULT TRUE, auth_type VARCHAR(20) NOT NULL DEFAULT 'none', username VARCHAR(255), password_encrypted BYTEA, headers_encrypted BYTEA, priority INTEGER NOT NULL DEFAULT 100, created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP, updated_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP, -- Source type must be one of the supported types CONSTRAINT check_source_type CHECK ( source_type IN ('npm', 'pypi', 'maven', 'docker', 'helm', 'nuget', 'deb', 'rpm', 'generic') ), -- Auth type must be valid CONSTRAINT check_auth_type CHECK ( auth_type IN ('none', 'basic', 'bearer', 'api_key') ), -- Priority must be positive CONSTRAINT check_priority_positive CHECK (priority > 0) ); -- Indexes for upstream_sources CREATE INDEX IF NOT EXISTS idx_upstream_sources_enabled ON upstream_sources(enabled); CREATE INDEX IF NOT EXISTS idx_upstream_sources_source_type ON upstream_sources(source_type); CREATE INDEX IF NOT EXISTS idx_upstream_sources_is_public ON upstream_sources(is_public); CREATE INDEX IF NOT EXISTS idx_upstream_sources_priority ON upstream_sources(priority); -- Comments for upstream_sources COMMENT ON TABLE upstream_sources IS 'Configuration for upstream artifact registries (npm, PyPI, Maven, etc.)'; COMMENT ON COLUMN upstream_sources.name IS 'Unique human-readable name (e.g., npm-public, artifactory-private)'; COMMENT ON COLUMN upstream_sources.source_type IS 'Type of registry: npm, pypi, maven, docker, helm, nuget, deb, rpm, generic'; COMMENT ON COLUMN upstream_sources.url IS 'Base URL of the upstream registry'; COMMENT ON COLUMN upstream_sources.enabled IS 'Whether this source is active for caching'; COMMENT ON COLUMN upstream_sources.is_public IS 'True if this is a public internet source (for air-gap mode)'; COMMENT ON COLUMN upstream_sources.auth_type IS 'Authentication type: none, basic, bearer, api_key'; COMMENT ON COLUMN upstream_sources.username IS 'Username for basic auth'; COMMENT ON COLUMN upstream_sources.password_encrypted IS 'Fernet-encrypted password/token'; COMMENT ON COLUMN upstream_sources.headers_encrypted IS 'Fernet-encrypted custom headers (JSON)'; COMMENT ON COLUMN upstream_sources.priority IS 'Priority for source selection (lower = higher priority)'; -- ============================================================================= -- cache_settings: Global cache configuration (singleton table) -- ============================================================================= CREATE TABLE IF NOT EXISTS cache_settings ( id INTEGER PRIMARY KEY DEFAULT 1, allow_public_internet BOOLEAN NOT NULL DEFAULT TRUE, auto_create_system_projects BOOLEAN NOT NULL DEFAULT TRUE, created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP, updated_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP, -- Singleton constraint CONSTRAINT check_cache_settings_singleton CHECK (id = 1) ); -- Insert default row INSERT INTO cache_settings (id, allow_public_internet, auto_create_system_projects) VALUES (1, TRUE, TRUE) ON CONFLICT (id) DO NOTHING; -- Comments for cache_settings COMMENT ON TABLE cache_settings IS 'Global cache settings (singleton table)'; COMMENT ON COLUMN cache_settings.allow_public_internet IS 'Air-gap mode: when false, blocks all public internet sources'; COMMENT ON COLUMN cache_settings.auto_create_system_projects IS 'Auto-create system projects (_npm, _pypi, etc.) on first cache'; -- ============================================================================= -- cached_urls: Track URL to artifact mappings for provenance -- ============================================================================= CREATE TABLE IF NOT EXISTS cached_urls ( id UUID PRIMARY KEY DEFAULT gen_random_uuid(), url VARCHAR(4096) NOT NULL, url_hash VARCHAR(64) NOT NULL, artifact_id VARCHAR(64) NOT NULL REFERENCES artifacts(id), source_id UUID REFERENCES upstream_sources(id) ON DELETE SET NULL, fetched_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT CURRENT_TIMESTAMP, response_headers JSONB DEFAULT '{}', created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP, -- URL hash must be unique (same URL = same cached artifact) CONSTRAINT unique_url_hash UNIQUE (url_hash) ); -- Indexes for cached_urls CREATE INDEX IF NOT EXISTS idx_cached_urls_url_hash ON cached_urls(url_hash); CREATE INDEX IF NOT EXISTS idx_cached_urls_artifact_id ON cached_urls(artifact_id); CREATE INDEX IF NOT EXISTS idx_cached_urls_source_id ON cached_urls(source_id); CREATE INDEX IF NOT EXISTS idx_cached_urls_fetched_at ON cached_urls(fetched_at); -- Comments for cached_urls COMMENT ON TABLE cached_urls IS 'Tracks which URLs have been cached and maps to artifacts'; COMMENT ON COLUMN cached_urls.url IS 'Original URL that was fetched'; COMMENT ON COLUMN cached_urls.url_hash IS 'SHA256 hash of URL for fast lookup'; COMMENT ON COLUMN cached_urls.artifact_id IS 'The cached artifact (by SHA256 content hash)'; COMMENT ON COLUMN cached_urls.source_id IS 'Which upstream source provided this (null if manual)'; COMMENT ON COLUMN cached_urls.fetched_at IS 'When the URL was fetched from upstream'; COMMENT ON COLUMN cached_urls.response_headers IS 'Original response headers from upstream (for debugging)'; -- ============================================================================= -- Add is_system column to projects table for system cache projects -- ============================================================================= DO $$ BEGIN IF NOT EXISTS ( SELECT 1 FROM information_schema.columns WHERE table_name = 'projects' AND column_name = 'is_system' ) THEN ALTER TABLE projects ADD COLUMN is_system BOOLEAN NOT NULL DEFAULT FALSE; CREATE INDEX IF NOT EXISTS idx_projects_is_system ON projects(is_system); END IF; END $$; COMMENT ON COLUMN projects.is_system IS 'True for system cache projects (_npm, _pypi, etc.)'; -- ============================================================================= -- Seed default upstream sources (disabled by default for safety) -- ============================================================================= INSERT INTO upstream_sources (id, name, source_type, url, enabled, is_public, auth_type, priority) VALUES (gen_random_uuid(), 'npm-public', 'npm', 'https://registry.npmjs.org', FALSE, TRUE, 'none', 100), (gen_random_uuid(), 'pypi-public', 'pypi', 'https://pypi.org/simple', FALSE, TRUE, 'none', 100), (gen_random_uuid(), 'maven-central', 'maven', 'https://repo1.maven.org/maven2', FALSE, TRUE, 'none', 100), (gen_random_uuid(), 'docker-hub', 'docker', 'https://registry-1.docker.io', FALSE, TRUE, 'none', 100) ON CONFLICT (name) DO NOTHING;