diff --git a/CHANGELOG.md b/CHANGELOG.md index 698e43b..076d3fd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,20 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Added +- Added `format` and `platform` fields to packages table (#16) +- Added `checksum_md5` and `metadata` JSONB fields to artifacts table (#16) +- Added `updated_at` field to tags table (#16) +- Added `tag_name`, `user_agent`, `duration_ms`, `deduplicated`, `checksum_verified` fields to uploads table (#16) +- Added `change_type` field to tag_history table (#16) +- Added composite indexes for common query patterns (#16) +- Added GIN indexes on JSONB fields for efficient JSON queries (#16) +- Added partial index for public projects (#16) +- Added database triggers for `updated_at` timestamps (#16) +- Added database triggers for maintaining artifact `ref_count` accuracy (#16) +- Added CHECK constraints for data integrity (`size > 0`, `ref_count >= 0`) (#16) +- Added migration script `002_schema_enhancements.sql` for existing databases (#16) + ## [0.1.0] - 2025-12-12 ### Changed - Changed the Dockerfile npm build arg to use the deps.global.bsf.tools URL as the default registry (#45) diff --git a/backend/app/models.py b/backend/app/models.py index 7e1a08a..1e1256f 100644 --- a/backend/app/models.py +++ b/backend/app/models.py @@ -73,11 +73,12 @@ class Artifact(Base): size = Column(BigInteger, nullable=False) content_type = Column(String(255)) original_name = Column(String(1024)) + checksum_md5 = Column(String(32)) # MD5 hash for additional verification + metadata = Column(JSON, default=dict) # Format-specific metadata created_at = Column(DateTime(timezone=True), default=datetime.utcnow) created_by = Column(String(255), nullable=False) ref_count = Column(Integer, default=1) s3_key = Column(String(1024), nullable=False) - format_metadata = Column(JSON, default=dict) # Format-specific metadata (version, etc.) tags = relationship("Tag", back_populates="artifact") uploads = relationship("Upload", back_populates="artifact") @@ -99,6 +100,7 @@ class Tag(Base): name = Column(String(255), nullable=False) artifact_id = Column(String(64), ForeignKey("artifacts.id"), nullable=False) created_at = Column(DateTime(timezone=True), default=datetime.utcnow) + updated_at = Column(DateTime(timezone=True), default=datetime.utcnow, onupdate=datetime.utcnow) created_by = Column(String(255), nullable=False) package = relationship("Package", back_populates="tags") @@ -120,6 +122,7 @@ class TagHistory(Base): tag_id = Column(UUID(as_uuid=True), ForeignKey("tags.id", ondelete="CASCADE"), nullable=False) old_artifact_id = Column(String(64), ForeignKey("artifacts.id")) new_artifact_id = Column(String(64), ForeignKey("artifacts.id"), nullable=False) + change_type = Column(String(20), nullable=False, default="update") changed_at = Column(DateTime(timezone=True), default=datetime.utcnow) changed_by = Column(String(255), nullable=False) @@ -127,6 +130,8 @@ class TagHistory(Base): __table_args__ = ( Index("idx_tag_history_tag_id", "tag_id"), + Index("idx_tag_history_changed_at", "changed_at"), + CheckConstraint("change_type IN ('create', 'update', 'delete')", name="check_change_type"), ) @@ -137,6 +142,11 @@ class Upload(Base): artifact_id = Column(String(64), ForeignKey("artifacts.id"), nullable=False) package_id = Column(UUID(as_uuid=True), ForeignKey("packages.id"), nullable=False) original_name = Column(String(1024)) + tag_name = Column(String(255)) # Tag assigned during upload + user_agent = Column(String(512)) # Client identification + duration_ms = Column(Integer) # Upload timing in milliseconds + deduplicated = Column(Boolean, default=False) # Whether artifact was deduplicated + checksum_verified = Column(Boolean, default=True) # Whether checksum was verified uploaded_at = Column(DateTime(timezone=True), default=datetime.utcnow) uploaded_by = Column(String(255), nullable=False) source_ip = Column(String(45)) @@ -148,6 +158,8 @@ class Upload(Base): Index("idx_uploads_artifact_id", "artifact_id"), Index("idx_uploads_package_id", "package_id"), Index("idx_uploads_uploaded_at", "uploaded_at"), + Index("idx_uploads_package_uploaded_at", "package_id", "uploaded_at"), + Index("idx_uploads_uploaded_by_at", "uploaded_by", "uploaded_at"), ) @@ -220,4 +232,6 @@ class AuditLog(Base): Index("idx_audit_logs_resource", "resource"), Index("idx_audit_logs_user_id", "user_id"), Index("idx_audit_logs_timestamp", "timestamp"), + Index("idx_audit_logs_resource_timestamp", "resource", "timestamp"), + Index("idx_audit_logs_user_timestamp", "user_id", "timestamp"), ) diff --git a/migrations/001_initial.sql b/migrations/001_initial.sql index 9f31947..983b96e 100644 --- a/migrations/001_initial.sql +++ b/migrations/001_initial.sql @@ -14,6 +14,7 @@ CREATE TABLE IF NOT EXISTS projects ( CREATE INDEX idx_projects_name ON projects(name); CREATE INDEX idx_projects_created_by ON projects(created_by); +CREATE INDEX idx_projects_public ON projects(name) WHERE is_public = true; -- Packages (collections within projects) CREATE TABLE IF NOT EXISTS packages ( @@ -21,6 +22,8 @@ CREATE TABLE IF NOT EXISTS packages ( project_id UUID NOT NULL REFERENCES projects(id) ON DELETE CASCADE, name VARCHAR(255) NOT NULL, description TEXT, + format VARCHAR(50) DEFAULT 'generic', -- package type: generic, npm, pypi, docker, etc. + platform VARCHAR(50) DEFAULT 'any', -- target platform: any, linux, darwin, windows, etc. created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(), updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(), UNIQUE(project_id, name) @@ -28,21 +31,26 @@ CREATE TABLE IF NOT EXISTS packages ( CREATE INDEX idx_packages_project_id ON packages(project_id); CREATE INDEX idx_packages_name ON packages(name); +CREATE INDEX idx_packages_format ON packages(format); +CREATE INDEX idx_packages_platform ON packages(platform); -- Artifacts (Content-Addressable) CREATE TABLE IF NOT EXISTS artifacts ( id VARCHAR(64) PRIMARY KEY, -- SHA256 hash - size BIGINT NOT NULL, + size BIGINT NOT NULL CHECK (size > 0), content_type VARCHAR(255), original_name VARCHAR(1024), + checksum_md5 VARCHAR(32), -- MD5 hash for additional verification + metadata JSONB, -- format-specific metadata created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(), created_by VARCHAR(255) NOT NULL, - ref_count INTEGER DEFAULT 1, + ref_count INTEGER DEFAULT 1 CHECK (ref_count >= 0), s3_key VARCHAR(1024) NOT NULL ); CREATE INDEX idx_artifacts_created_at ON artifacts(created_at); CREATE INDEX idx_artifacts_created_by ON artifacts(created_by); +CREATE INDEX idx_artifacts_metadata ON artifacts USING GIN (metadata); -- Tags (Aliases pointing to artifacts) CREATE TABLE IF NOT EXISTS tags ( @@ -51,12 +59,14 @@ CREATE TABLE IF NOT EXISTS tags ( name VARCHAR(255) NOT NULL, artifact_id VARCHAR(64) NOT NULL REFERENCES artifacts(id), created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(), + updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(), created_by VARCHAR(255) NOT NULL, UNIQUE(package_id, name) ); CREATE INDEX idx_tags_package_id ON tags(package_id); CREATE INDEX idx_tags_artifact_id ON tags(artifact_id); +CREATE INDEX idx_tags_package_created_at ON tags(package_id, created_at DESC); -- Tag History (for rollback capability) CREATE TABLE IF NOT EXISTS tag_history ( @@ -64,11 +74,13 @@ CREATE TABLE IF NOT EXISTS tag_history ( tag_id UUID NOT NULL REFERENCES tags(id) ON DELETE CASCADE, old_artifact_id VARCHAR(64) REFERENCES artifacts(id), new_artifact_id VARCHAR(64) NOT NULL REFERENCES artifacts(id), + change_type VARCHAR(20) NOT NULL DEFAULT 'update' CHECK (change_type IN ('create', 'update', 'delete')), changed_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(), changed_by VARCHAR(255) NOT NULL ); CREATE INDEX idx_tag_history_tag_id ON tag_history(tag_id); +CREATE INDEX idx_tag_history_changed_at ON tag_history(changed_at); -- Uploads (upload event records) CREATE TABLE IF NOT EXISTS uploads ( @@ -76,6 +88,11 @@ CREATE TABLE IF NOT EXISTS uploads ( artifact_id VARCHAR(64) NOT NULL REFERENCES artifacts(id), package_id UUID NOT NULL REFERENCES packages(id), original_name VARCHAR(1024), + tag_name VARCHAR(255), -- tag assigned during upload + user_agent VARCHAR(512), -- client identification + duration_ms INTEGER, -- upload timing in milliseconds + deduplicated BOOLEAN DEFAULT false, -- whether artifact was deduplicated + checksum_verified BOOLEAN DEFAULT true, -- whether checksum was verified uploaded_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(), uploaded_by VARCHAR(255) NOT NULL, source_ip VARCHAR(45) @@ -84,6 +101,8 @@ CREATE TABLE IF NOT EXISTS uploads ( CREATE INDEX idx_uploads_artifact_id ON uploads(artifact_id); CREATE INDEX idx_uploads_package_id ON uploads(package_id); CREATE INDEX idx_uploads_uploaded_at ON uploads(uploaded_at); +CREATE INDEX idx_uploads_package_uploaded_at ON uploads(package_id, uploaded_at DESC); +CREATE INDEX idx_uploads_uploaded_by_at ON uploads(uploaded_by, uploaded_at DESC); -- Consumers (Dependency tracking) CREATE TABLE IF NOT EXISTS consumers ( @@ -141,14 +160,17 @@ CREATE INDEX idx_audit_logs_action ON audit_logs(action); CREATE INDEX idx_audit_logs_resource ON audit_logs(resource); CREATE INDEX idx_audit_logs_user_id ON audit_logs(user_id); CREATE INDEX idx_audit_logs_timestamp ON audit_logs(timestamp); +CREATE INDEX idx_audit_logs_resource_timestamp ON audit_logs(resource, timestamp DESC); +CREATE INDEX idx_audit_logs_user_timestamp ON audit_logs(user_id, timestamp DESC); +CREATE INDEX idx_audit_logs_details ON audit_logs USING GIN (details); -- Trigger to update tag history on changes CREATE OR REPLACE FUNCTION track_tag_changes() RETURNS TRIGGER AS $$ BEGIN IF TG_OP = 'UPDATE' AND OLD.artifact_id != NEW.artifact_id THEN - INSERT INTO tag_history (id, tag_id, old_artifact_id, new_artifact_id, changed_at, changed_by) - VALUES (gen_random_uuid(), NEW.id, OLD.artifact_id, NEW.artifact_id, NOW(), NEW.created_by); + INSERT INTO tag_history (id, tag_id, old_artifact_id, new_artifact_id, change_type, changed_at, changed_by) + VALUES (gen_random_uuid(), NEW.id, OLD.artifact_id, NEW.artifact_id, 'update', NOW(), NEW.created_by); END IF; RETURN NEW; END; @@ -158,3 +180,72 @@ CREATE TRIGGER tag_changes_trigger AFTER UPDATE ON tags FOR EACH ROW EXECUTE FUNCTION track_tag_changes(); + +-- Trigger to auto-update updated_at timestamps +CREATE OR REPLACE FUNCTION update_updated_at_column() +RETURNS TRIGGER AS $$ +BEGIN + NEW.updated_at = NOW(); + RETURN NEW; +END; +$$ LANGUAGE plpgsql; + +CREATE TRIGGER projects_updated_at_trigger + BEFORE UPDATE ON projects + FOR EACH ROW + EXECUTE FUNCTION update_updated_at_column(); + +CREATE TRIGGER packages_updated_at_trigger + BEFORE UPDATE ON packages + FOR EACH ROW + EXECUTE FUNCTION update_updated_at_column(); + +CREATE TRIGGER tags_updated_at_trigger + BEFORE UPDATE ON tags + FOR EACH ROW + EXECUTE FUNCTION update_updated_at_column(); + +-- Triggers for maintaining artifact ref_count accuracy +CREATE OR REPLACE FUNCTION increment_artifact_ref_count() +RETURNS TRIGGER AS $$ +BEGIN + UPDATE artifacts SET ref_count = ref_count + 1 WHERE id = NEW.artifact_id; + RETURN NEW; +END; +$$ LANGUAGE plpgsql; + +CREATE OR REPLACE FUNCTION decrement_artifact_ref_count() +RETURNS TRIGGER AS $$ +BEGIN + UPDATE artifacts SET ref_count = ref_count - 1 WHERE id = OLD.artifact_id; + RETURN OLD; +END; +$$ LANGUAGE plpgsql; + +CREATE OR REPLACE FUNCTION update_artifact_ref_count() +RETURNS TRIGGER AS $$ +BEGIN + IF OLD.artifact_id != NEW.artifact_id THEN + UPDATE artifacts SET ref_count = ref_count - 1 WHERE id = OLD.artifact_id; + UPDATE artifacts SET ref_count = ref_count + 1 WHERE id = NEW.artifact_id; + END IF; + RETURN NEW; +END; +$$ LANGUAGE plpgsql; + +-- Note: ref_count triggers on tags table +-- These track how many tags reference each artifact +CREATE TRIGGER tags_ref_count_insert_trigger + AFTER INSERT ON tags + FOR EACH ROW + EXECUTE FUNCTION increment_artifact_ref_count(); + +CREATE TRIGGER tags_ref_count_delete_trigger + AFTER DELETE ON tags + FOR EACH ROW + EXECUTE FUNCTION decrement_artifact_ref_count(); + +CREATE TRIGGER tags_ref_count_update_trigger + AFTER UPDATE ON tags + FOR EACH ROW + EXECUTE FUNCTION update_artifact_ref_count(); diff --git a/migrations/002_schema_enhancements.sql b/migrations/002_schema_enhancements.sql new file mode 100644 index 0000000..c32b13b --- /dev/null +++ b/migrations/002_schema_enhancements.sql @@ -0,0 +1,170 @@ +-- Migration 002: Schema Enhancements +-- Adds new fields, indexes, and triggers for improved functionality + +-- ============================================ +-- Packages: Add format and platform fields +-- ============================================ +ALTER TABLE packages ADD COLUMN IF NOT EXISTS format VARCHAR(50) DEFAULT 'generic'; +ALTER TABLE packages ADD COLUMN IF NOT EXISTS platform VARCHAR(50) DEFAULT 'any'; + +CREATE INDEX IF NOT EXISTS idx_packages_format ON packages(format); +CREATE INDEX IF NOT EXISTS idx_packages_platform ON packages(platform); + +-- ============================================ +-- Artifacts: Add checksum_md5, metadata, and CHECK constraints +-- ============================================ +ALTER TABLE artifacts ADD COLUMN IF NOT EXISTS checksum_md5 VARCHAR(32); +ALTER TABLE artifacts ADD COLUMN IF NOT EXISTS metadata JSONB; + +-- Add CHECK constraints (will fail if data violates them) +DO $$ +BEGIN + IF NOT EXISTS (SELECT 1 FROM pg_constraint WHERE conname = 'artifacts_ref_count_check') THEN + ALTER TABLE artifacts ADD CONSTRAINT artifacts_ref_count_check CHECK (ref_count >= 0); + END IF; + IF NOT EXISTS (SELECT 1 FROM pg_constraint WHERE conname = 'artifacts_size_check') THEN + ALTER TABLE artifacts ADD CONSTRAINT artifacts_size_check CHECK (size > 0); + END IF; +END $$; + +CREATE INDEX IF NOT EXISTS idx_artifacts_metadata ON artifacts USING GIN (metadata); + +-- ============================================ +-- Tags: Add updated_at and composite index +-- ============================================ +ALTER TABLE tags ADD COLUMN IF NOT EXISTS updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(); + +CREATE INDEX IF NOT EXISTS idx_tags_package_created_at ON tags(package_id, created_at DESC); + +-- ============================================ +-- Tag History: Add change_type and index +-- ============================================ +ALTER TABLE tag_history ADD COLUMN IF NOT EXISTS change_type VARCHAR(20) DEFAULT 'update'; + +DO $$ +BEGIN + IF NOT EXISTS (SELECT 1 FROM pg_constraint WHERE conname = 'tag_history_change_type_check') THEN + ALTER TABLE tag_history ADD CONSTRAINT tag_history_change_type_check + CHECK (change_type IN ('create', 'update', 'delete')); + END IF; +END $$; + +CREATE INDEX IF NOT EXISTS idx_tag_history_changed_at ON tag_history(changed_at); + +-- ============================================ +-- Uploads: Add new fields and composite indexes +-- ============================================ +ALTER TABLE uploads ADD COLUMN IF NOT EXISTS tag_name VARCHAR(255); +ALTER TABLE uploads ADD COLUMN IF NOT EXISTS user_agent VARCHAR(512); +ALTER TABLE uploads ADD COLUMN IF NOT EXISTS duration_ms INTEGER; +ALTER TABLE uploads ADD COLUMN IF NOT EXISTS deduplicated BOOLEAN DEFAULT false; +ALTER TABLE uploads ADD COLUMN IF NOT EXISTS checksum_verified BOOLEAN DEFAULT true; + +CREATE INDEX IF NOT EXISTS idx_uploads_package_uploaded_at ON uploads(package_id, uploaded_at DESC); +CREATE INDEX IF NOT EXISTS idx_uploads_uploaded_by_at ON uploads(uploaded_by, uploaded_at DESC); + +-- ============================================ +-- Audit Logs: Add composite indexes and GIN index +-- ============================================ +CREATE INDEX IF NOT EXISTS idx_audit_logs_resource_timestamp ON audit_logs(resource, timestamp DESC); +CREATE INDEX IF NOT EXISTS idx_audit_logs_user_timestamp ON audit_logs(user_id, timestamp DESC); +CREATE INDEX IF NOT EXISTS idx_audit_logs_details ON audit_logs USING GIN (details); + +-- ============================================ +-- Projects: Add partial index for public projects +-- ============================================ +CREATE INDEX IF NOT EXISTS idx_projects_public ON projects(name) WHERE is_public = true; + +-- ============================================ +-- Triggers: Update tag_changes trigger for change_type +-- ============================================ +CREATE OR REPLACE FUNCTION track_tag_changes() +RETURNS TRIGGER AS $$ +BEGIN + IF TG_OP = 'UPDATE' AND OLD.artifact_id != NEW.artifact_id THEN + INSERT INTO tag_history (id, tag_id, old_artifact_id, new_artifact_id, change_type, changed_at, changed_by) + VALUES (gen_random_uuid(), NEW.id, OLD.artifact_id, NEW.artifact_id, 'update', NOW(), NEW.created_by); + END IF; + RETURN NEW; +END; +$$ LANGUAGE plpgsql; + +-- ============================================ +-- Triggers: Auto-update updated_at timestamps +-- ============================================ +CREATE OR REPLACE FUNCTION update_updated_at_column() +RETURNS TRIGGER AS $$ +BEGIN + NEW.updated_at = NOW(); + RETURN NEW; +END; +$$ LANGUAGE plpgsql; + +-- Drop triggers if they exist, then recreate +DROP TRIGGER IF EXISTS projects_updated_at_trigger ON projects; +CREATE TRIGGER projects_updated_at_trigger + BEFORE UPDATE ON projects + FOR EACH ROW + EXECUTE FUNCTION update_updated_at_column(); + +DROP TRIGGER IF EXISTS packages_updated_at_trigger ON packages; +CREATE TRIGGER packages_updated_at_trigger + BEFORE UPDATE ON packages + FOR EACH ROW + EXECUTE FUNCTION update_updated_at_column(); + +DROP TRIGGER IF EXISTS tags_updated_at_trigger ON tags; +CREATE TRIGGER tags_updated_at_trigger + BEFORE UPDATE ON tags + FOR EACH ROW + EXECUTE FUNCTION update_updated_at_column(); + +-- ============================================ +-- Triggers: Maintain artifact ref_count accuracy +-- ============================================ +CREATE OR REPLACE FUNCTION increment_artifact_ref_count() +RETURNS TRIGGER AS $$ +BEGIN + UPDATE artifacts SET ref_count = ref_count + 1 WHERE id = NEW.artifact_id; + RETURN NEW; +END; +$$ LANGUAGE plpgsql; + +CREATE OR REPLACE FUNCTION decrement_artifact_ref_count() +RETURNS TRIGGER AS $$ +BEGIN + UPDATE artifacts SET ref_count = ref_count - 1 WHERE id = OLD.artifact_id; + RETURN OLD; +END; +$$ LANGUAGE plpgsql; + +CREATE OR REPLACE FUNCTION update_artifact_ref_count() +RETURNS TRIGGER AS $$ +BEGIN + IF OLD.artifact_id != NEW.artifact_id THEN + UPDATE artifacts SET ref_count = ref_count - 1 WHERE id = OLD.artifact_id; + UPDATE artifacts SET ref_count = ref_count + 1 WHERE id = NEW.artifact_id; + END IF; + RETURN NEW; +END; +$$ LANGUAGE plpgsql; + +-- Note: ref_count triggers on tags table +-- These track how many tags reference each artifact +DROP TRIGGER IF EXISTS tags_ref_count_insert_trigger ON tags; +CREATE TRIGGER tags_ref_count_insert_trigger + AFTER INSERT ON tags + FOR EACH ROW + EXECUTE FUNCTION increment_artifact_ref_count(); + +DROP TRIGGER IF EXISTS tags_ref_count_delete_trigger ON tags; +CREATE TRIGGER tags_ref_count_delete_trigger + AFTER DELETE ON tags + FOR EACH ROW + EXECUTE FUNCTION decrement_artifact_ref_count(); + +DROP TRIGGER IF EXISTS tags_ref_count_update_trigger ON tags; +CREATE TRIGGER tags_ref_count_update_trigger + AFTER UPDATE ON tags + FOR EACH ROW + EXECUTE FUNCTION update_artifact_ref_count();