Merge branch 'feature/store-sha256-checksums' into 'main'

Store SHA256 checksums with artifacts and add multiple hash support Closes #25 See merge request esv/bsf/bsf-integration/orchard/orchard-mvp!16
Store SHA256 checksums with artifacts and add multiple hash support
2025-12-15 14:47:31 -06:00 · 2025-12-15 14:47:30 -06:00 · 2025-12-15 14:00:32 -06:00 · 2025-12-15 14:00:32 -06:00 · 2025-12-15 11:30:07 -07:00 · 2025-12-15 11:30:07 -07:00
16 changed files with 1205 additions and 111 deletions
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -1,26 +1,21 @@
-stages:
+include:
-  - test
+    - project: 'esv/bsf/pypi/prosper'
-  - build
+      ref: v0.64.1
-  - publish
+      file: '/prosper/templates/projects/docker.yml'
  # - deploy
 variables:
-  # Container registry settings
+# renovate: datasource=gitlab-tags depName=esv/bsf/pypi/prosper versioning=semver registryUrl=https://gitlab.global.bsf.tools
-  REGISTRY: ${CI_REGISTRY}
+  PROSPER_VERSION: v0.64.1
-  IMAGE_NAME: ${CI_REGISTRY_IMAGE}
+
-  # Buildah settings
+kics:
-  STORAGE_DRIVER: vfs
+  allow_failure: true
-  BUILDAH_FORMAT: docker
+
-  BUILDAH_ISOLATION: chroot
+hadolint:
  allow_failure: true
 .buildah-base:
  image: deps.global.bsf.tools/quay.io/buildah/stable:latest
  before_script:
    - buildah version
    - buildah login -u ${CI_REGISTRY_USER} -p ${CI_REGISTRY_PASSWORD} ${CI_REGISTRY}
 # Run Python tests
-test:
+python_tests:
  stage: test
  image: deps.global.bsf.tools/docker/python:3.12-slim
  before_script:
@@ -29,47 +24,6 @@ test:
  script:
    - cd backend
    - python -m pytest -v || echo "No tests yet"
  rules:
    - if: $CI_PIPELINE_SOURCE == "merge_request_event"
    - if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH
 # Build container image for merge requests (no push)
 build:
  stage: build
  extends: .buildah-base
  script:
    - |
      buildah build \
        --build-arg NPM_REGISTRY=https://deps.global.bsf.tools/artifactory/api/npm/registry.npmjs.org/ \
        --tag ${IMAGE_NAME}:${CI_COMMIT_SHORT_SHA} \
        --label org.opencontainers.image.source=${CI_PROJECT_URL} \
        --label org.opencontainers.image.revision=${CI_COMMIT_SHA} \
        --label org.opencontainers.image.created=$(date -u +%Y-%m-%dT%H:%M:%SZ) \
        .
  rules:
    - if: $CI_PIPELINE_SOURCE == "merge_request_event"
 # Build and push on main branch
 publish:
  stage: publish
  extends: .buildah-base
  script:
    - |
      buildah build \
        --build-arg NPM_REGISTRY=https://deps.global.bsf.tools/artifactory/api/npm/registry.npmjs.org/ \
        --tag ${IMAGE_NAME}:${CI_COMMIT_SHORT_SHA} \
        --tag ${IMAGE_NAME}:${CI_COMMIT_REF_SLUG} \
        --tag ${IMAGE_NAME}:latest \
        --label org.opencontainers.image.source=${CI_PROJECT_URL} \
        --label org.opencontainers.image.revision=${CI_COMMIT_SHA} \
        --label org.opencontainers.image.created=$(date -u +%Y-%m-%dT%H:%M:%SZ) \
        .
    - buildah push ${IMAGE_NAME}:${CI_COMMIT_SHORT_SHA}
    - buildah push ${IMAGE_NAME}:${CI_COMMIT_REF_SLUG}
    - buildah push ${IMAGE_NAME}:latest
  rules:
    - if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH
 # deploy_helm_charts:
 #   stage: deploy
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -0,0 +1,39 @@
 # Changelog
 All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 ## [Unreleased]
 ### Added
 - Added integrity verification workflow design document (#24)
 - Added `sha256` field to API responses for clarity (alias of `id`) (#25)
 - Added `checksum_sha1` field to artifacts table for compatibility (#25)
 - Added `s3_etag` field to artifacts table for S3 verification (#25)
 - Compute and store MD5, SHA1, and S3 ETag alongside SHA256 during upload (#25)
 - Added `Dockerfile.local` and `docker-compose.local.yml` for local development (#25)
 - Added migration script `003_checksum_fields.sql` for existing databases (#25)
 ## [0.2.0] - 2025-12-15
 ### Changed
 - Updated images to use internal container BSF proxy (#46)
 ### Added
 - Added `format` and `platform` fields to packages table (#16)
 - Added `checksum_md5` and `metadata` JSONB fields to artifacts table (#16)
 - Added `updated_at` field to tags table (#16)
 - Added `tag_name`, `user_agent`, `duration_ms`, `deduplicated`, `checksum_verified` fields to uploads table (#16)
 - Added `change_type` field to tag_history table (#16)
 - Added composite indexes for common query patterns (#16)
 - Added GIN indexes on JSONB fields for efficient JSON queries (#16)
 - Added partial index for public projects (#16)
 - Added database triggers for `updated_at` timestamps (#16)
 - Added database triggers for maintaining artifact `ref_count` accuracy (#16)
 - Added CHECK constraints for data integrity (`size > 0`, `ref_count >= 0`) (#16)
 - Added migration script `002_schema_enhancements.sql` for existing databases (#16)
 ## [0.1.0] - 2025-12-12
 ### Changed
 - Changed the Dockerfile npm build arg to use the deps.global.bsf.tools URL as the default registry (#45)
 ### Added
 - Added Prosper docker template config (#45)
--- a/9
+++ b/9
@@ -1,7 +1,7 @@
 # Frontend build stage
-FROM node:20-alpine AS frontend-builder
+FROM containers.global.bsf.tools/node:20-alpine AS frontend-builder
-ARG NPM_REGISTRY
+ARG NPM_REGISTRY=https://deps.global.bsf.tools/artifactory/api/npm/registry.npmjs.org/
 WORKDIR /app/frontend
@@ -19,7 +19,10 @@ COPY frontend/ ./
 RUN npm run build
 # Runtime stage
-FROM python:3.12-slim
+FROM containers.global.bsf.tools/python:3.12-slim
 # Disable proxy cache
 RUN echo 'Acquire::http::Pipeline-Depth 0;\nAcquire::http::No-Cache true;\nAcquire::BrokenProxy true;\n' > /etc/apt/apt.conf.d/99fixbadproxy
 # Install system dependencies
 RUN apt-get update && apt-get install -y --no-install-recommends \
--- a/Dockerfile.local
+++ b/Dockerfile.local
@@ -0,0 +1,50 @@
 # Frontend build stage
 FROM node:20-alpine AS frontend-builder
 WORKDIR /app/frontend
 # Copy package files
 COPY frontend/package*.json ./
 RUN npm install
 # Copy frontend source
 COPY frontend/ ./
 # Build frontend
 RUN npm run build
 # Runtime stage
 FROM python:3.12-slim
 # Install system dependencies
 RUN apt-get update && apt-get install -y --no-install-recommends \
    curl \
    && rm -rf /var/lib/apt/lists/*
 # Create non-root user
 RUN groupadd -g 1000 orchard && \
    useradd -u 1000 -g orchard -s /bin/bash -m orchard
 WORKDIR /app
 # Copy requirements and install Python dependencies
 COPY backend/requirements.txt .
 RUN pip install --no-cache-dir -r requirements.txt
 # Copy backend source
 COPY backend/ ./backend/
 # Copy frontend build
 COPY --from=frontend-builder /app/frontend/dist ./frontend/dist
 # Set ownership
 RUN chown -R orchard:orchard /app
 USER orchard
 EXPOSE 8080
 HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
    CMD curl -f http://localhost:8080/health || exit 1
 CMD ["uvicorn", "backend.app.main:app", "--host", "0.0.0.0", "--port", "8080"]
--- a/backend/app/models.py
+++ b/backend/app/models.py
@@ -73,15 +73,33 @@ class Artifact(Base):
    size = Column(BigInteger, nullable=False)
    content_type = Column(String(255))
    original_name = Column(String(1024))
    checksum_md5 = Column(String(32))  # MD5 hash for additional verification
    checksum_sha1 = Column(String(40))  # SHA1 hash for compatibility
    s3_etag = Column(String(64))  # S3 ETag for verification
    artifact_metadata = Column("metadata", JSON, default=dict)  # Format-specific metadata (column name is 'metadata')
    created_at = Column(DateTime(timezone=True), default=datetime.utcnow)
    created_by = Column(String(255), nullable=False)
    ref_count = Column(Integer, default=1)
    s3_key = Column(String(1024), nullable=False)
    format_metadata = Column(JSON, default=dict)  # Format-specific metadata (version, etc.)
    tags = relationship("Tag", back_populates="artifact")
    uploads = relationship("Upload", back_populates="artifact")
    @property
    def sha256(self) -> str:
        """Alias for id - the SHA256 hash of the artifact content"""
        return self.id
    @property
    def format_metadata(self):
        """Alias for artifact_metadata - backward compatibility"""
        return self.artifact_metadata
    @format_metadata.setter
    def format_metadata(self, value):
        """Alias setter for artifact_metadata - backward compatibility"""
        self.artifact_metadata = value
    __table_args__ = (
        Index("idx_artifacts_created_at", "created_at"),
        Index("idx_artifacts_created_by", "created_by"),
@@ -99,6 +117,7 @@ class Tag(Base):
    name = Column(String(255), nullable=False)
    artifact_id = Column(String(64), ForeignKey("artifacts.id"), nullable=False)
    created_at = Column(DateTime(timezone=True), default=datetime.utcnow)
    updated_at = Column(DateTime(timezone=True), default=datetime.utcnow, onupdate=datetime.utcnow)
    created_by = Column(String(255), nullable=False)
    package = relationship("Package", back_populates="tags")
@@ -120,6 +139,7 @@ class TagHistory(Base):
    tag_id = Column(UUID(as_uuid=True), ForeignKey("tags.id", ondelete="CASCADE"), nullable=False)
    old_artifact_id = Column(String(64), ForeignKey("artifacts.id"))
    new_artifact_id = Column(String(64), ForeignKey("artifacts.id"), nullable=False)
    change_type = Column(String(20), nullable=False, default="update")
    changed_at = Column(DateTime(timezone=True), default=datetime.utcnow)
    changed_by = Column(String(255), nullable=False)
@@ -127,6 +147,8 @@ class TagHistory(Base):
    __table_args__ = (
        Index("idx_tag_history_tag_id", "tag_id"),
        Index("idx_tag_history_changed_at", "changed_at"),
        CheckConstraint("change_type IN ('create', 'update', 'delete')", name="check_change_type"),
    )
@@ -137,6 +159,11 @@ class Upload(Base):
    artifact_id = Column(String(64), ForeignKey("artifacts.id"), nullable=False)
    package_id = Column(UUID(as_uuid=True), ForeignKey("packages.id"), nullable=False)
    original_name = Column(String(1024))
    tag_name = Column(String(255))  # Tag assigned during upload
    user_agent = Column(String(512))  # Client identification
    duration_ms = Column(Integer)  # Upload timing in milliseconds
    deduplicated = Column(Boolean, default=False)  # Whether artifact was deduplicated
    checksum_verified = Column(Boolean, default=True)  # Whether checksum was verified
    uploaded_at = Column(DateTime(timezone=True), default=datetime.utcnow)
    uploaded_by = Column(String(255), nullable=False)
    source_ip = Column(String(45))
@@ -148,6 +175,8 @@ class Upload(Base):
        Index("idx_uploads_artifact_id", "artifact_id"),
        Index("idx_uploads_package_id", "package_id"),
        Index("idx_uploads_uploaded_at", "uploaded_at"),
        Index("idx_uploads_package_uploaded_at", "package_id", "uploaded_at"),
        Index("idx_uploads_uploaded_by_at", "uploaded_by", "uploaded_at"),
    )
@@ -220,4 +249,6 @@ class AuditLog(Base):
        Index("idx_audit_logs_resource", "resource"),
        Index("idx_audit_logs_user_id", "user_id"),
        Index("idx_audit_logs_timestamp", "timestamp"),
        Index("idx_audit_logs_resource_timestamp", "resource", "timestamp"),
        Index("idx_audit_logs_user_timestamp", "user_id", "timestamp"),
    )
--- a/backend/app/routes.py
+++ b/backend/app/routes.py
@@ -520,40 +520,51 @@ def upload_artifact(
        )
    # Store file (uses multipart for large files)
-    sha256_hash, size, s3_key = storage.store(file.file, content_length)
+    storage_result = storage.store(file.file, content_length)
    # Check if this is a deduplicated upload
    deduplicated = False
    # Create or update artifact record
-    artifact = db.query(Artifact).filter(Artifact.id == sha256_hash).first()
+    artifact = db.query(Artifact).filter(Artifact.id == storage_result.sha256).first()
    if artifact:
        artifact.ref_count += 1
        deduplicated = True
        # Merge metadata if new metadata was extracted
-        if file_metadata and artifact.format_metadata:
+        if file_metadata and artifact.artifact_metadata:
-            artifact.format_metadata = {**artifact.format_metadata, **file_metadata}
+            artifact.artifact_metadata = {**artifact.artifact_metadata, **file_metadata}
        elif file_metadata:
-            artifact.format_metadata = file_metadata
+            artifact.artifact_metadata = file_metadata
        # Update checksums if not already set
        if not artifact.checksum_md5 and storage_result.md5:
            artifact.checksum_md5 = storage_result.md5
        if not artifact.checksum_sha1 and storage_result.sha1:
            artifact.checksum_sha1 = storage_result.sha1
        if not artifact.s3_etag and storage_result.s3_etag:
            artifact.s3_etag = storage_result.s3_etag
    else:
        artifact = Artifact(
-            id=sha256_hash,
+            id=storage_result.sha256,
-            size=size,
+            size=storage_result.size,
            content_type=file.content_type,
            original_name=file.filename,
            checksum_md5=storage_result.md5,
            checksum_sha1=storage_result.sha1,
            s3_etag=storage_result.s3_etag,
            created_by=user_id,
-            s3_key=s3_key,
+            s3_key=storage_result.s3_key,
-            format_metadata=file_metadata or {},
+            artifact_metadata=file_metadata or {},
        )
        db.add(artifact)
    # Record upload
    upload = Upload(
-        artifact_id=sha256_hash,
+        artifact_id=storage_result.sha256,
        package_id=package.id,
        original_name=file.filename,
        uploaded_by=user_id,
        source_ip=request.client.host if request.client else None,
        deduplicated=deduplicated,
    )
    db.add(upload)
@@ -561,13 +572,13 @@ def upload_artifact(
    if tag:
        existing_tag = db.query(Tag).filter(Tag.package_id == package.id, Tag.name == tag).first()
        if existing_tag:
-            existing_tag.artifact_id = sha256_hash
+            existing_tag.artifact_id = storage_result.sha256
            existing_tag.created_by = user_id
        else:
            new_tag = Tag(
                package_id=package.id,
                name=tag,
-                artifact_id=sha256_hash,
+                artifact_id=storage_result.sha256,
                created_by=user_id,
            )
            db.add(new_tag)
@@ -575,12 +586,16 @@ def upload_artifact(
    db.commit()
    return UploadResponse(
-        artifact_id=sha256_hash,
+        artifact_id=storage_result.sha256,
-        size=size,
+        sha256=storage_result.sha256,
        size=storage_result.size,
        project=project_name,
        package=package_name,
        tag=tag,
-        format_metadata=artifact.format_metadata,
+        checksum_md5=storage_result.md5,
        checksum_sha1=storage_result.sha1,
        s3_etag=storage_result.s3_etag,
        format_metadata=artifact.artifact_metadata,
        deduplicated=deduplicated,
    )
--- a/backend/app/schemas.py
+++ b/backend/app/schemas.py
@@ -99,9 +99,13 @@ class PackageDetailResponse(BaseModel):
 # Artifact schemas
 class ArtifactResponse(BaseModel):
    id: str
    sha256: str  # Explicit SHA256 field (same as id)
    size: int
    content_type: Optional[str]
    original_name: Optional[str]
    checksum_md5: Optional[str] = None
    checksum_sha1: Optional[str] = None
    s3_etag: Optional[str] = None
    created_at: datetime
    created_by: str
    ref_count: int
@@ -173,9 +177,13 @@ class ArtifactTagInfo(BaseModel):
 class ArtifactDetailResponse(BaseModel):
    """Artifact with list of tags/packages referencing it"""
    id: str
    sha256: str  # Explicit SHA256 field (same as id)
    size: int
    content_type: Optional[str]
    original_name: Optional[str]
    checksum_md5: Optional[str] = None
    checksum_sha1: Optional[str] = None
    s3_etag: Optional[str] = None
    created_at: datetime
    created_by: str
    ref_count: int
@@ -189,9 +197,13 @@ class ArtifactDetailResponse(BaseModel):
 class PackageArtifactResponse(BaseModel):
    """Artifact with tags for package artifact listing"""
    id: str
    sha256: str  # Explicit SHA256 field (same as id)
    size: int
    content_type: Optional[str]
    original_name: Optional[str]
    checksum_md5: Optional[str] = None
    checksum_sha1: Optional[str] = None
    s3_etag: Optional[str] = None
    created_at: datetime
    created_by: str
    format_metadata: Optional[Dict[str, Any]] = None
@@ -204,10 +216,14 @@ class PackageArtifactResponse(BaseModel):
 # Upload response
 class UploadResponse(BaseModel):
    artifact_id: str
    sha256: str  # Explicit SHA256 field (same as artifact_id)
    size: int
    project: str
    package: str
    tag: Optional[str]
    checksum_md5: Optional[str] = None
    checksum_sha1: Optional[str] = None
    s3_etag: Optional[str] = None
    format_metadata: Optional[Dict[str, Any]] = None
    deduplicated: bool = False
--- a/backend/app/storage.py
+++ b/backend/app/storage.py
@@ -1,6 +1,6 @@
 import hashlib
 import logging
-from typing import BinaryIO, Tuple, Optional, Dict, Any, Generator
+from typing import BinaryIO, Tuple, Optional, Dict, Any, Generator, NamedTuple
 import boto3
 from botocore.config import Config
 from botocore.exceptions import ClientError
@@ -18,6 +18,16 @@ MULTIPART_CHUNK_SIZE = 10 * 1024 * 1024
 HASH_CHUNK_SIZE = 8 * 1024 * 1024
 class StorageResult(NamedTuple):
    """Result of storing a file with all computed checksums"""
    sha256: str
    size: int
    s3_key: str
    md5: Optional[str] = None
    sha1: Optional[str] = None
    s3_etag: Optional[str] = None
 class S3Storage:
    def __init__(self):
        config = Config(s3={"addressing_style": "path"} if settings.s3_use_path_style else {})
@@ -34,9 +44,9 @@ class S3Storage:
        # Store active multipart uploads for resumable support
        self._active_uploads: Dict[str, Dict[str, Any]] = {}
-    def store(self, file: BinaryIO, content_length: Optional[int] = None) -> Tuple[str, int, str]:
+    def store(self, file: BinaryIO, content_length: Optional[int] = None) -> StorageResult:
        """
-        Store a file and return its SHA256 hash, size, and s3_key.
+        Store a file and return StorageResult with all checksums.
        Content-addressable: if the file already exists, just return the hash.
        Uses multipart upload for files larger than MULTIPART_THRESHOLD.
        """
@@ -46,45 +56,76 @@ class S3Storage:
        else:
            return self._store_multipart(file, content_length)
-    def _store_simple(self, file: BinaryIO) -> Tuple[str, int, str]:
+    def _store_simple(self, file: BinaryIO) -> StorageResult:
        """Store a small file using simple put_object"""
-        # Read file and compute hash
+        # Read file and compute all hashes
        content = file.read()
        sha256_hash = hashlib.sha256(content).hexdigest()
        md5_hash = hashlib.md5(content).hexdigest()
        sha1_hash = hashlib.sha1(content).hexdigest()
        size = len(content)
        # Check if already exists
        s3_key = f"fruits/{sha256_hash[:2]}/{sha256_hash[2:4]}/{sha256_hash}"
        s3_etag = None
        if not self._exists(s3_key):
-            self.client.put_object(
+            response = self.client.put_object(
                Bucket=self.bucket,
                Key=s3_key,
                Body=content,
            )
            s3_etag = response.get("ETag", "").strip('"')
        else:
            # Get existing ETag
            obj_info = self.get_object_info(s3_key)
            if obj_info:
                s3_etag = obj_info.get("etag", "").strip('"')
-        return sha256_hash, size, s3_key
+        return StorageResult(
            sha256=sha256_hash,
            size=size,
            s3_key=s3_key,
            md5=md5_hash,
            sha1=sha1_hash,
            s3_etag=s3_etag,
        )
-    def _store_multipart(self, file: BinaryIO, content_length: int) -> Tuple[str, int, str]:
+    def _store_multipart(self, file: BinaryIO, content_length: int) -> StorageResult:
        """Store a large file using S3 multipart upload with streaming hash computation"""
-        # First pass: compute hash by streaming through file
+        # First pass: compute all hashes by streaming through file
-        hasher = hashlib.sha256()
+        sha256_hasher = hashlib.sha256()
        md5_hasher = hashlib.md5()
        sha1_hasher = hashlib.sha1()
        size = 0
-        # Read file in chunks to compute hash
+        # Read file in chunks to compute hashes
        while True:
            chunk = file.read(HASH_CHUNK_SIZE)
            if not chunk:
                break
-            hasher.update(chunk)
+            sha256_hasher.update(chunk)
            md5_hasher.update(chunk)
            sha1_hasher.update(chunk)
            size += len(chunk)
-        sha256_hash = hasher.hexdigest()
+        sha256_hash = sha256_hasher.hexdigest()
        md5_hash = md5_hasher.hexdigest()
        sha1_hash = sha1_hasher.hexdigest()
        s3_key = f"fruits/{sha256_hash[:2]}/{sha256_hash[2:4]}/{sha256_hash}"
        # Check if already exists (deduplication)
        if self._exists(s3_key):
-            return sha256_hash, size, s3_key
+            obj_info = self.get_object_info(s3_key)
            s3_etag = obj_info.get("etag", "").strip('"') if obj_info else None
            return StorageResult(
                sha256=sha256_hash,
                size=size,
                s3_key=s3_key,
                md5=md5_hash,
                sha1=sha1_hash,
                s3_etag=s3_etag,
            )
        # Seek back to start for upload
        file.seek(0)
@@ -116,14 +157,22 @@ class S3Storage:
                part_number += 1
            # Complete multipart upload
-            self.client.complete_multipart_upload(
+            complete_response = self.client.complete_multipart_upload(
                Bucket=self.bucket,
                Key=s3_key,
                UploadId=upload_id,
                MultipartUpload={"Parts": parts},
            )
            s3_etag = complete_response.get("ETag", "").strip('"')
-            return sha256_hash, size, s3_key
+            return StorageResult(
                sha256=sha256_hash,
                size=size,
                s3_key=s3_key,
                md5=md5_hash,
                sha1=sha1_hash,
                s3_etag=s3_etag,
            )
        except Exception as e:
            # Abort multipart upload on failure
@@ -135,33 +184,50 @@ class S3Storage:
            )
            raise
-    def store_streaming(self, chunks: Generator[bytes, None, None]) -> Tuple[str, int, str]:
+    def store_streaming(self, chunks: Generator[bytes, None, None]) -> StorageResult:
        """
        Store a file from a stream of chunks.
        First accumulates to compute hash, then uploads.
        For truly large files, consider using initiate_resumable_upload instead.
        """
-        # Accumulate chunks and compute hash
+        # Accumulate chunks and compute all hashes
-        hasher = hashlib.sha256()
+        sha256_hasher = hashlib.sha256()
        md5_hasher = hashlib.md5()
        sha1_hasher = hashlib.sha1()
        all_chunks = []
        size = 0
        for chunk in chunks:
-            hasher.update(chunk)
+            sha256_hasher.update(chunk)
            md5_hasher.update(chunk)
            sha1_hasher.update(chunk)
            all_chunks.append(chunk)
            size += len(chunk)
-        sha256_hash = hasher.hexdigest()
+        sha256_hash = sha256_hasher.hexdigest()
        md5_hash = md5_hasher.hexdigest()
        sha1_hash = sha1_hasher.hexdigest()
        s3_key = f"fruits/{sha256_hash[:2]}/{sha256_hash[2:4]}/{sha256_hash}"
        s3_etag = None
        # Check if already exists
        if self._exists(s3_key):
-            return sha256_hash, size, s3_key
+            obj_info = self.get_object_info(s3_key)
            s3_etag = obj_info.get("etag", "").strip('"') if obj_info else None
            return StorageResult(
                sha256=sha256_hash,
                size=size,
                s3_key=s3_key,
                md5=md5_hash,
                sha1=sha1_hash,
                s3_etag=s3_etag,
            )
        # Upload based on size
        if size < MULTIPART_THRESHOLD:
            content = b"".join(all_chunks)
-            self.client.put_object(Bucket=self.bucket, Key=s3_key, Body=content)
+            response = self.client.put_object(Bucket=self.bucket, Key=s3_key, Body=content)
            s3_etag = response.get("ETag", "").strip('"')
        else:
            # Use multipart for large files
            mpu = self.client.create_multipart_upload(Bucket=self.bucket, Key=s3_key)
@@ -205,12 +271,13 @@ class S3Storage:
                        "ETag": response["ETag"],
                    })
-                self.client.complete_multipart_upload(
+                complete_response = self.client.complete_multipart_upload(
                    Bucket=self.bucket,
                    Key=s3_key,
                    UploadId=upload_id,
                    MultipartUpload={"Parts": parts},
                )
                s3_etag = complete_response.get("ETag", "").strip('"')
            except Exception as e:
                logger.error(f"Streaming multipart upload failed: {e}")
@@ -221,7 +288,14 @@ class S3Storage:
                )
                raise
-        return sha256_hash, size, s3_key
+        return StorageResult(
            sha256=sha256_hash,
            size=size,
            s3_key=s3_key,
            md5=md5_hash,
            sha1=sha1_hash,
            s3_etag=s3_etag,
        )
    def initiate_resumable_upload(self, expected_hash: str) -> Dict[str, Any]:
        """
--- a/container-test.sh
+++ b/container-test.sh
@@ -0,0 +1,7 @@
 #!/bin/sh
 echo "testing container"
 # Without a sleep, local testing shows no output because attaching to the logs happens after the container is done executing
 # this script.
 sleep 1
--- a/docker-compose.local.yml
+++ b/docker-compose.local.yml
@@ -0,0 +1,122 @@
 version: '3.8'
 services:
  orchard-server:
    build:
      context: .
      dockerfile: Dockerfile.local
    ports:
      - "8080:8080"
    environment:
      - ORCHARD_SERVER_HOST=0.0.0.0
      - ORCHARD_SERVER_PORT=8080
      - ORCHARD_DATABASE_HOST=postgres
      - ORCHARD_DATABASE_PORT=5432
      - ORCHARD_DATABASE_USER=orchard
      - ORCHARD_DATABASE_PASSWORD=orchard_secret
      - ORCHARD_DATABASE_DBNAME=orchard
      - ORCHARD_DATABASE_SSLMODE=disable
      - ORCHARD_S3_ENDPOINT=http://minio:9000
      - ORCHARD_S3_REGION=us-east-1
      - ORCHARD_S3_BUCKET=orchard-artifacts
      - ORCHARD_S3_ACCESS_KEY_ID=minioadmin
      - ORCHARD_S3_SECRET_ACCESS_KEY=minioadmin
      - ORCHARD_S3_USE_PATH_STYLE=true
      - ORCHARD_REDIS_HOST=redis
      - ORCHARD_REDIS_PORT=6379
    depends_on:
      postgres:
        condition: service_healthy
      minio:
        condition: service_healthy
      redis:
        condition: service_healthy
    networks:
      - orchard-network
    restart: unless-stopped
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:8080/health"]
      interval: 30s
      timeout: 3s
      start_period: 10s
      retries: 3
  postgres:
    image: postgres:16-alpine
    environment:
      - POSTGRES_USER=orchard
      - POSTGRES_PASSWORD=orchard_secret
      - POSTGRES_DB=orchard
    volumes:
      - postgres-data-local:/var/lib/postgresql/data
      - ./migrations:/docker-entrypoint-initdb.d:ro
    ports:
      - "5432:5432"
    healthcheck:
      test: ["CMD-SHELL", "pg_isready -U orchard -d orchard"]
      interval: 10s
      timeout: 5s
      retries: 5
    networks:
      - orchard-network
    restart: unless-stopped
  minio:
    image: minio/minio:latest
    command: server /data --console-address ":9001"
    environment:
      - MINIO_ROOT_USER=minioadmin
      - MINIO_ROOT_PASSWORD=minioadmin
    volumes:
      - minio-data-local:/data
    ports:
      - "9000:9000"
      - "9001:9001"
    healthcheck:
      test: ["CMD", "mc", "ready", "local"]
      interval: 10s
      timeout: 5s
      retries: 5
    networks:
      - orchard-network
    restart: unless-stopped
  minio-init:
    image: minio/mc:latest
    depends_on:
      minio:
        condition: service_healthy
    entrypoint: >
      /bin/sh -c "
      mc alias set myminio http://minio:9000 minioadmin minioadmin;
      mc mb myminio/orchard-artifacts --ignore-existing;
      mc anonymous set download myminio/orchard-artifacts;
      exit 0;
      "
    networks:
      - orchard-network
  redis:
    image: redis:7-alpine
    command: redis-server --appendonly yes
    volumes:
      - redis-data-local:/data
    ports:
      - "6379:6379"
    healthcheck:
      test: ["CMD", "redis-cli", "ping"]
      interval: 10s
      timeout: 5s
      retries: 5
    networks:
      - orchard-network
    restart: unless-stopped
 volumes:
  postgres-data-local:
  minio-data-local:
  redis-data-local:
 networks:
  orchard-network:
    driver: bridge
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -36,7 +36,7 @@ services:
    restart: unless-stopped
  postgres:
-    image: postgres:16-alpine
+    image: containers.global.bsf.tools/postgres:16-alpine
    environment:
      - POSTGRES_USER=orchard
      - POSTGRES_PASSWORD=orchard_secret
@@ -56,7 +56,7 @@ services:
    restart: unless-stopped
  minio:
-    image: minio/minio:latest
+    image: containers.global.bsf.tools/minio/minio:latest
    command: server /data --console-address ":9001"
    environment:
      - MINIO_ROOT_USER=minioadmin
@@ -76,7 +76,7 @@ services:
    restart: unless-stopped
  minio-init:
-    image: minio/mc:latest
+    image: containers.global.bsf.tools/minio/mc:latest
    depends_on:
      minio:
        condition: service_healthy
@@ -91,7 +91,7 @@ services:
      - orchard-network
  redis:
-    image: redis:7-alpine
+    image: containers.global.bsf.tools/redis:7-alpine
    command: redis-server --appendonly yes
    volumes:
      - redis-data:/data
--- a/docs/design/integrity-verification.md
+++ b/docs/design/integrity-verification.md
@@ -0,0 +1,504 @@
 # Integrity Verification Workflow Design
 This document defines the process for SHA256 checksum verification on artifact downloads, including failure handling and retry mechanisms.
 ## Overview
 Orchard uses content-addressable storage where the artifact ID is the SHA256 hash of the content. This design leverages that property to provide configurable integrity verification during downloads.
 ## Current State
 | Aspect | Status |
 |--------|--------|
 | Download streams content directly from S3 | ✅ Implemented |
 | Artifact ID is the SHA256 hash | ✅ Implemented |
 | S3 key derived from SHA256 hash | ✅ Implemented |
 | Verification during download | ❌ Not implemented |
 | Checksum headers in response | ❌ Not implemented |
 | Retry mechanism on failure | ❌ Not implemented |
 | Failure handling beyond S3 errors | ❌ Not implemented |
 ## Verification Modes
 The verification mode is selected via query parameter `?verify=<mode>` or server-wide default via `ORCHARD_VERIFY_MODE`.
 | Mode | Performance | Integrity | Use Case |
 |------|-------------|-----------|----------|
 | `none` | ⚡ Fastest | Client-side | Trusted networks, high throughput |
 | `header` | ⚡ Fast | Client-side | Standard downloads, client verification |
 | `stream` | 🔄 Moderate | Post-hoc server | Logging/auditing, non-blocking |
 | `pre` | 🐢 Slower | Guaranteed | Critical downloads, untrusted storage |
 | `strict` | 🐢 Slower | Guaranteed + Alert | Security-sensitive, compliance |
 ### Mode: None (Default)
 **Behavior:**
 - Stream content directly from S3 with no server-side processing
 - Maximum download performance
 - Client is responsible for verification
 **Headers Returned:**
 ```
 X-Checksum-SHA256: <expected_hash>
 Content-Length: <expected_size>
 ```
 **Flow:**
 ```
 Client Request → Lookup Artifact → Stream from S3 → Client
 ```
 ### Mode: Header
 **Behavior:**
 - Stream content directly from S3
 - Include comprehensive checksum headers
 - Client performs verification using headers
 **Headers Returned:**
 ```
 X-Checksum-SHA256: <expected_hash>
 Content-Length: <expected_size>
 Digest: sha-256=<base64_encoded_hash>
 ETag: "<sha256_hash>"
 X-Content-SHA256: <expected_hash>
 ```
 **Flow:**
 ```
 Client Request → Lookup Artifact → Add Headers → Stream from S3 → Client Verifies
 ```
 **Client Verification Example:**
 ```bash
 # Download and verify
 curl -OJ https://orchard/project/foo/bar/+/v1.0.0
 EXPECTED=$(curl -sI https://orchard/project/foo/bar/+/v1.0.0 | grep X-Checksum-SHA256 | cut -d' ' -f2)
 ACTUAL=$(sha256sum downloaded_file | cut -d' ' -f1)
 [ "$EXPECTED" = "$ACTUAL" ] && echo "OK" || echo "MISMATCH"
 ```
 ### Mode: Stream (Post-Hoc Verification)
 **Behavior:**
 - Wrap S3 stream with `HashingStreamWrapper`
 - Compute SHA256 incrementally while streaming to client
 - Verify hash after stream completes
 - Log verification result
 - Cannot reject content (already sent to client)
 **Headers Returned:**
 ```
 X-Checksum-SHA256: <expected_hash>
 Content-Length: <expected_size>
 X-Verify-Mode: stream
 Trailer: X-Verified
 ```
 **Trailers (if client supports):**
 ```
 X-Verified: true|false
 X-Computed-SHA256: <computed_hash>
 ```
 **Flow:**
 ```
 Client Request → Lookup Artifact → Wrap Stream → Stream to Client
                                       ↓
                              Compute Hash Incrementally
                                       ↓
                              Verify After Complete → Log Result
 ```
 **Implementation:**
 ```python
 class HashingStreamWrapper:
    def __init__(self, stream, expected_hash: str, on_complete: Callable):
        self.stream = stream
        self.hasher = hashlib.sha256()
        self.expected_hash = expected_hash
        self.on_complete = on_complete
    def __iter__(self):
        for chunk in self.stream:
            self.hasher.update(chunk)
            yield chunk
        # Stream complete, verify
        computed = self.hasher.hexdigest()
        self.on_complete(computed == self.expected_hash, computed)
 ```
 ### Mode: Pre-Verify (Blocking)
 **Behavior:**
 - Download entire content from S3 to memory/temp file
 - Compute SHA256 hash before sending to client
 - On match: stream verified content to client
 - On mismatch: retry from S3 (up to N times)
 - If retries exhausted: return 500 error
 **Headers Returned:**
 ```
 X-Checksum-SHA256: <expected_hash>
 Content-Length: <expected_size>
 X-Verify-Mode: pre
 X-Verified: true
 ```
 **Flow:**
 ```
 Client Request → Lookup Artifact → Download from S3 → Compute Hash
                                                          ↓
                                                    Hash Matches?
                                                    ↓           ↓
                                                   Yes          No
                                                    ↓           ↓
                                            Stream to Client   Retry?
                                                                ↓
                                                          Yes → Loop
                                                          No  → 500 Error
 ```
 **Memory Considerations:**
 - For files < `ORCHARD_VERIFY_MEMORY_LIMIT` (default 100MB): buffer in memory
 - For larger files: use temporary file with streaming hash computation
 - Cleanup temp files after response sent
 ### Mode: Strict
 **Behavior:**
 - Same as pre-verify but with no retries
 - Fail immediately on any mismatch
 - Quarantine artifact on failure (mark as potentially corrupted)
 - Trigger alert/notification on failure
 - For security-critical downloads
 **Headers Returned (on success):**
 ```
 X-Checksum-SHA256: <expected_hash>
 Content-Length: <expected_size>
 X-Verify-Mode: strict
 X-Verified: true
 ```
 **Error Response (on failure):**
 ```json
 {
  "error": "integrity_verification_failed",
  "message": "Artifact content does not match expected checksum",
  "expected_hash": "<expected>",
  "computed_hash": "<computed>",
  "artifact_id": "<id>",
  "action_taken": "quarantined"
 }
 ```
 **Quarantine Process:**
 1. Mark artifact `status = 'quarantined'` in database
 2. Log security event to audit_logs
 3. Optionally notify via webhook/email
 4. Artifact becomes unavailable for download until resolved
 ## Failure Detection
 ### Failure Types
 | Failure Type | Detection Method | Severity |
 |--------------|------------------|----------|
 | Hash mismatch | Computed SHA256 ≠ Expected | Critical |
 | Size mismatch | Actual bytes ≠ `Content-Length` | High |
 | S3 read error | boto3 exception | Medium |
 | Truncated content | Stream ends early | High |
 | S3 object missing | `NoSuchKey` error | Critical |
 | ETag mismatch | S3 ETag ≠ expected | Medium |
 ### Detection Implementation
 ```python
 class VerificationResult:
    success: bool
    failure_type: Optional[str]  # hash_mismatch, size_mismatch, etc.
    expected_hash: str
    computed_hash: Optional[str]
    expected_size: int
    actual_size: Optional[int]
    error_message: Optional[str]
    retry_count: int
 ```
 ## Retry Mechanism
 ### Configuration
 | Environment Variable | Default | Description |
 |---------------------|---------|-------------|
 | `ORCHARD_VERIFY_MAX_RETRIES` | 3 | Maximum retry attempts |
 | `ORCHARD_VERIFY_RETRY_DELAY_MS` | 100 | Base delay between retries |
 | `ORCHARD_VERIFY_RETRY_BACKOFF` | 2.0 | Exponential backoff multiplier |
 | `ORCHARD_VERIFY_RETRY_MAX_DELAY_MS` | 5000 | Maximum delay cap |
 ### Backoff Formula
 ```
 delay = min(base_delay * (backoff ^ attempt), max_delay)
 ```
 Example with defaults:
 - Attempt 1: 100ms
 - Attempt 2: 200ms
 - Attempt 3: 400ms
 ### Retry Flow
 ```python
 async def download_with_retry(artifact, max_retries=3):
    for attempt in range(max_retries + 1):
        try:
            content = await fetch_from_s3(artifact.s3_key)
            computed_hash = compute_sha256(content)
            if computed_hash == artifact.id:
                return content  # Success
            # Hash mismatch
            log.warning(f"Verification failed, attempt {attempt + 1}/{max_retries + 1}")
            if attempt < max_retries:
                delay = calculate_backoff(attempt)
                await asyncio.sleep(delay / 1000)
            else:
                raise IntegrityError("Max retries exceeded")
        except S3Error as e:
            if attempt < max_retries:
                delay = calculate_backoff(attempt)
                await asyncio.sleep(delay / 1000)
            else:
                raise
 ```
 ### Retryable vs Non-Retryable Failures
 **Retryable:**
 - S3 read timeout
 - S3 connection error
 - Hash mismatch (may be transient S3 issue)
 - Truncated content
 **Non-Retryable:**
 - S3 object not found (404)
 - S3 access denied (403)
 - Artifact not in database
 - Strict mode failures
 ## Configuration Reference
 ### Environment Variables
 ```bash
 # Verification mode (none, header, stream, pre, strict)
 ORCHARD_VERIFY_MODE=none
 # Retry settings
 ORCHARD_VERIFY_MAX_RETRIES=3
 ORCHARD_VERIFY_RETRY_DELAY_MS=100
 ORCHARD_VERIFY_RETRY_BACKOFF=2.0
 ORCHARD_VERIFY_RETRY_MAX_DELAY_MS=5000
 # Memory limit for pre-verify buffering (bytes)
 ORCHARD_VERIFY_MEMORY_LIMIT=104857600  # 100MB
 # Strict mode settings
 ORCHARD_VERIFY_QUARANTINE_ON_FAILURE=true
 ORCHARD_VERIFY_ALERT_WEBHOOK=https://alerts.example.com/webhook
 # Allow per-request mode override
 ORCHARD_VERIFY_ALLOW_OVERRIDE=true
 ```
 ### Per-Request Override
 When `ORCHARD_VERIFY_ALLOW_OVERRIDE=true`, clients can specify verification mode:
 ```
 GET /api/v1/project/foo/bar/+/v1.0.0?verify=pre
 GET /api/v1/project/foo/bar/+/v1.0.0?verify=none
 ```
 ## API Changes
 ### Download Endpoint
 **Request:**
 ```
 GET /api/v1/project/{project}/{package}/+/{ref}?verify={mode}
 ```
 **New Query Parameters:**
 | Parameter | Type | Default | Description |
 |-----------|------|---------|-------------|
 | `verify` | string | from config | Verification mode |
 **New Response Headers:**
 | Header | Description |
 |--------|-------------|
 | `X-Checksum-SHA256` | Expected SHA256 hash |
 | `X-Verify-Mode` | Active verification mode |
 | `X-Verified` | `true` if server verified content |
 | `Digest` | RFC 3230 digest header |
 ### New Endpoint: Verify Artifact
 **Request:**
 ```
 POST /api/v1/project/{project}/{package}/+/{ref}/verify
 ```
 **Response:**
 ```json
 {
  "artifact_id": "abc123...",
  "verified": true,
  "expected_hash": "abc123...",
  "computed_hash": "abc123...",
  "size_match": true,
  "expected_size": 1048576,
  "actual_size": 1048576,
  "verification_time_ms": 45
 }
 ```
 ## Logging and Monitoring
 ### Log Events
 | Event | Level | When |
 |-------|-------|------|
 | `verification.success` | INFO | Hash verified successfully |
 | `verification.failure` | ERROR | Hash mismatch detected |
 | `verification.retry` | WARN | Retry attempt initiated |
 | `verification.quarantine` | ERROR | Artifact quarantined |
 | `verification.skip` | DEBUG | Verification skipped (mode=none) |
 ### Metrics
 | Metric | Type | Description |
 |--------|------|-------------|
 | `orchard_verification_total` | Counter | Total verification attempts |
 | `orchard_verification_failures` | Counter | Failed verifications |
 | `orchard_verification_retries` | Counter | Retry attempts |
 | `orchard_verification_duration_ms` | Histogram | Verification time |
 ### Audit Log Entry
 ```json
 {
  "action": "artifact.download.verified",
  "resource": "project/foo/package/bar/artifact/abc123",
  "user_id": "user@example.com",
  "details": {
    "verification_mode": "pre",
    "verified": true,
    "retry_count": 0,
    "duration_ms": 45
  }
 }
 ```
 ## Security Considerations
 1. **Strict Mode for Sensitive Data**: Use strict mode for artifacts containing credentials, certificates, or security-critical code.
 2. **Quarantine Isolation**: Quarantined artifacts should be moved to a separate S3 prefix or bucket for forensic analysis.
 3. **Alert on Repeated Failures**: Multiple verification failures for the same artifact may indicate storage corruption or tampering.
 4. **Audit Trail**: All verification events should be logged for compliance and forensic purposes.
 5. **Client Trust**: In `none` and `header` modes, clients must implement their own verification for security guarantees.
 ## Implementation Phases
 ### Phase 1: Headers Only
 - Add `X-Checksum-SHA256` header to all downloads
 - Add `verify=header` mode support
 - Add configuration options
 ### Phase 2: Stream Verification
 - Implement `HashingStreamWrapper`
 - Add `verify=stream` mode
 - Add verification logging
 ### Phase 3: Pre-Verification
 - Implement buffered verification
 - Add retry mechanism
 - Add `verify=pre` mode
 ### Phase 4: Strict Mode
 - Implement quarantine mechanism
 - Add alerting integration
 - Add `verify=strict` mode
 ## Client Integration Examples
 ### curl with Verification
 ```bash
 #!/bin/bash
 URL="https://orchard.example.com/api/v1/project/myproject/mypackage/+/v1.0.0"
 # Get expected hash from headers
 EXPECTED=$(curl -sI "$URL" | grep -i "X-Checksum-SHA256" | tr -d '\r' | cut -d' ' -f2)
 # Download file
 curl -sO "$URL"
 FILENAME=$(basename "$URL")
 # Verify
 ACTUAL=$(sha256sum "$FILENAME" | cut -d' ' -f1)
 if [ "$EXPECTED" = "$ACTUAL" ]; then
    echo "✓ Verification passed"
 else
    echo "✗ Verification FAILED"
    echo "  Expected: $EXPECTED"
    echo "  Actual:   $ACTUAL"
    exit 1
 fi
 ```
 ### Python Client
 ```python
 import hashlib
 import requests
 def download_verified(url: str) -> bytes:
    # Get headers first
    head = requests.head(url)
    expected_hash = head.headers.get('X-Checksum-SHA256')
    expected_size = int(head.headers.get('Content-Length', 0))
    # Download content
    response = requests.get(url)
    content = response.content
    # Verify size
    if len(content) != expected_size:
        raise ValueError(f"Size mismatch: {len(content)} != {expected_size}")
    # Verify hash
    actual_hash = hashlib.sha256(content).hexdigest()
    if actual_hash != expected_hash:
        raise ValueError(f"Hash mismatch: {actual_hash} != {expected_hash}")
    return content
 ```
 ### Server-Side Verification
 ```bash
 # Force server to verify before sending
 curl -O "https://orchard.example.com/api/v1/project/myproject/mypackage/+/v1.0.0?verify=pre"
 # Check if verification was performed
 curl -I "https://orchard.example.com/api/v1/project/myproject/mypackage/+/v1.0.0?verify=pre" | grep X-Verified
 # X-Verified: true
 ```
--- a/migrations/001_initial.sql
+++ b/migrations/001_initial.sql
@@ -14,6 +14,7 @@ CREATE TABLE IF NOT EXISTS projects (
 CREATE INDEX idx_projects_name ON projects(name);
 CREATE INDEX idx_projects_created_by ON projects(created_by);
 CREATE INDEX idx_projects_public ON projects(name) WHERE is_public = true;
 -- Packages (collections within projects)
 CREATE TABLE IF NOT EXISTS packages (
@@ -21,6 +22,8 @@ CREATE TABLE IF NOT EXISTS packages (
    project_id UUID NOT NULL REFERENCES projects(id) ON DELETE CASCADE,
    name VARCHAR(255) NOT NULL,
    description TEXT,
    format VARCHAR(50) DEFAULT 'generic',  -- package type: generic, npm, pypi, docker, etc.
    platform VARCHAR(50) DEFAULT 'any',    -- target platform: any, linux, darwin, windows, etc.
    created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
    updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
    UNIQUE(project_id, name)
@@ -28,21 +31,30 @@ CREATE TABLE IF NOT EXISTS packages (
 CREATE INDEX idx_packages_project_id ON packages(project_id);
 CREATE INDEX idx_packages_name ON packages(name);
 CREATE INDEX idx_packages_format ON packages(format);
 CREATE INDEX idx_packages_platform ON packages(platform);
 -- Artifacts (Content-Addressable)
 CREATE TABLE IF NOT EXISTS artifacts (
    id VARCHAR(64) PRIMARY KEY,  -- SHA256 hash
-    size BIGINT NOT NULL,
+    size BIGINT NOT NULL CHECK (size > 0),
    content_type VARCHAR(255),
    original_name VARCHAR(1024),
    checksum_md5 VARCHAR(32),    -- MD5 hash for additional verification
    checksum_sha1 VARCHAR(40),   -- SHA1 hash for compatibility
    s3_etag VARCHAR(64),         -- S3 ETag for verification
    metadata JSONB,              -- format-specific metadata
    created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
    created_by VARCHAR(255) NOT NULL,
-    ref_count INTEGER DEFAULT 1,
+    ref_count INTEGER DEFAULT 1 CHECK (ref_count >= 0),
    s3_key VARCHAR(1024) NOT NULL
 );
 CREATE INDEX idx_artifacts_created_at ON artifacts(created_at);
 CREATE INDEX idx_artifacts_created_by ON artifacts(created_by);
 CREATE INDEX idx_artifacts_metadata ON artifacts USING GIN (metadata);
 CREATE INDEX idx_artifacts_checksum_md5 ON artifacts(checksum_md5) WHERE checksum_md5 IS NOT NULL;
 CREATE INDEX idx_artifacts_checksum_sha1 ON artifacts(checksum_sha1) WHERE checksum_sha1 IS NOT NULL;
 -- Tags (Aliases pointing to artifacts)
 CREATE TABLE IF NOT EXISTS tags (
@@ -51,12 +63,14 @@ CREATE TABLE IF NOT EXISTS tags (
    name VARCHAR(255) NOT NULL,
    artifact_id VARCHAR(64) NOT NULL REFERENCES artifacts(id),
    created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
    updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
    created_by VARCHAR(255) NOT NULL,
    UNIQUE(package_id, name)
 );
 CREATE INDEX idx_tags_package_id ON tags(package_id);
 CREATE INDEX idx_tags_artifact_id ON tags(artifact_id);
 CREATE INDEX idx_tags_package_created_at ON tags(package_id, created_at DESC);
 -- Tag History (for rollback capability)
 CREATE TABLE IF NOT EXISTS tag_history (
@@ -64,11 +78,13 @@ CREATE TABLE IF NOT EXISTS tag_history (
    tag_id UUID NOT NULL REFERENCES tags(id) ON DELETE CASCADE,
    old_artifact_id VARCHAR(64) REFERENCES artifacts(id),
    new_artifact_id VARCHAR(64) NOT NULL REFERENCES artifacts(id),
    change_type VARCHAR(20) NOT NULL DEFAULT 'update' CHECK (change_type IN ('create', 'update', 'delete')),
    changed_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
    changed_by VARCHAR(255) NOT NULL
 );
 CREATE INDEX idx_tag_history_tag_id ON tag_history(tag_id);
 CREATE INDEX idx_tag_history_changed_at ON tag_history(changed_at);
 -- Uploads (upload event records)
 CREATE TABLE IF NOT EXISTS uploads (
@@ -76,6 +92,11 @@ CREATE TABLE IF NOT EXISTS uploads (
    artifact_id VARCHAR(64) NOT NULL REFERENCES artifacts(id),
    package_id UUID NOT NULL REFERENCES packages(id),
    original_name VARCHAR(1024),
    tag_name VARCHAR(255),              -- tag assigned during upload
    user_agent VARCHAR(512),            -- client identification
    duration_ms INTEGER,                -- upload timing in milliseconds
    deduplicated BOOLEAN DEFAULT false, -- whether artifact was deduplicated
    checksum_verified BOOLEAN DEFAULT true, -- whether checksum was verified
    uploaded_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
    uploaded_by VARCHAR(255) NOT NULL,
    source_ip VARCHAR(45)
@@ -84,6 +105,8 @@ CREATE TABLE IF NOT EXISTS uploads (
 CREATE INDEX idx_uploads_artifact_id ON uploads(artifact_id);
 CREATE INDEX idx_uploads_package_id ON uploads(package_id);
 CREATE INDEX idx_uploads_uploaded_at ON uploads(uploaded_at);
 CREATE INDEX idx_uploads_package_uploaded_at ON uploads(package_id, uploaded_at DESC);
 CREATE INDEX idx_uploads_uploaded_by_at ON uploads(uploaded_by, uploaded_at DESC);
 -- Consumers (Dependency tracking)
 CREATE TABLE IF NOT EXISTS consumers (
@@ -141,14 +164,17 @@ CREATE INDEX idx_audit_logs_action ON audit_logs(action);
 CREATE INDEX idx_audit_logs_resource ON audit_logs(resource);
 CREATE INDEX idx_audit_logs_user_id ON audit_logs(user_id);
 CREATE INDEX idx_audit_logs_timestamp ON audit_logs(timestamp);
 CREATE INDEX idx_audit_logs_resource_timestamp ON audit_logs(resource, timestamp DESC);
 CREATE INDEX idx_audit_logs_user_timestamp ON audit_logs(user_id, timestamp DESC);
 CREATE INDEX idx_audit_logs_details ON audit_logs USING GIN (details);
 -- Trigger to update tag history on changes
 CREATE OR REPLACE FUNCTION track_tag_changes()
 RETURNS TRIGGER AS $$
 BEGIN
    IF TG_OP = 'UPDATE' AND OLD.artifact_id != NEW.artifact_id THEN
-        INSERT INTO tag_history (id, tag_id, old_artifact_id, new_artifact_id, changed_at, changed_by)
+        INSERT INTO tag_history (id, tag_id, old_artifact_id, new_artifact_id, change_type, changed_at, changed_by)
-        VALUES (gen_random_uuid(), NEW.id, OLD.artifact_id, NEW.artifact_id, NOW(), NEW.created_by);
+        VALUES (gen_random_uuid(), NEW.id, OLD.artifact_id, NEW.artifact_id, 'update', NOW(), NEW.created_by);
    END IF;
    RETURN NEW;
 END;
@@ -158,3 +184,72 @@ CREATE TRIGGER tag_changes_trigger
    AFTER UPDATE ON tags
    FOR EACH ROW
    EXECUTE FUNCTION track_tag_changes();
 -- Trigger to auto-update updated_at timestamps
 CREATE OR REPLACE FUNCTION update_updated_at_column()
 RETURNS TRIGGER AS $$
 BEGIN
    NEW.updated_at = NOW();
    RETURN NEW;
 END;
 $$ LANGUAGE plpgsql;
 CREATE TRIGGER projects_updated_at_trigger
    BEFORE UPDATE ON projects
    FOR EACH ROW
    EXECUTE FUNCTION update_updated_at_column();
 CREATE TRIGGER packages_updated_at_trigger
    BEFORE UPDATE ON packages
    FOR EACH ROW
    EXECUTE FUNCTION update_updated_at_column();
 CREATE TRIGGER tags_updated_at_trigger
    BEFORE UPDATE ON tags
    FOR EACH ROW
    EXECUTE FUNCTION update_updated_at_column();
 -- Triggers for maintaining artifact ref_count accuracy
 CREATE OR REPLACE FUNCTION increment_artifact_ref_count()
 RETURNS TRIGGER AS $$
 BEGIN
    UPDATE artifacts SET ref_count = ref_count + 1 WHERE id = NEW.artifact_id;
    RETURN NEW;
 END;
 $$ LANGUAGE plpgsql;
 CREATE OR REPLACE FUNCTION decrement_artifact_ref_count()
 RETURNS TRIGGER AS $$
 BEGIN
    UPDATE artifacts SET ref_count = ref_count - 1 WHERE id = OLD.artifact_id;
    RETURN OLD;
 END;
 $$ LANGUAGE plpgsql;
 CREATE OR REPLACE FUNCTION update_artifact_ref_count()
 RETURNS TRIGGER AS $$
 BEGIN
    IF OLD.artifact_id != NEW.artifact_id THEN
        UPDATE artifacts SET ref_count = ref_count - 1 WHERE id = OLD.artifact_id;
        UPDATE artifacts SET ref_count = ref_count + 1 WHERE id = NEW.artifact_id;
    END IF;
    RETURN NEW;
 END;
 $$ LANGUAGE plpgsql;
 -- Note: ref_count triggers on tags table
 -- These track how many tags reference each artifact
 CREATE TRIGGER tags_ref_count_insert_trigger
    AFTER INSERT ON tags
    FOR EACH ROW
    EXECUTE FUNCTION increment_artifact_ref_count();
 CREATE TRIGGER tags_ref_count_delete_trigger
    AFTER DELETE ON tags
    FOR EACH ROW
    EXECUTE FUNCTION decrement_artifact_ref_count();
 CREATE TRIGGER tags_ref_count_update_trigger
    AFTER UPDATE ON tags
    FOR EACH ROW
    EXECUTE FUNCTION update_artifact_ref_count();
--- a/migrations/002_schema_enhancements.sql
+++ b/migrations/002_schema_enhancements.sql
@@ -0,0 +1,170 @@
 -- Migration 002: Schema Enhancements
 -- Adds new fields, indexes, and triggers for improved functionality
 -- ============================================
 -- Packages: Add format and platform fields
 -- ============================================
 ALTER TABLE packages ADD COLUMN IF NOT EXISTS format VARCHAR(50) DEFAULT 'generic';
 ALTER TABLE packages ADD COLUMN IF NOT EXISTS platform VARCHAR(50) DEFAULT 'any';
 CREATE INDEX IF NOT EXISTS idx_packages_format ON packages(format);
 CREATE INDEX IF NOT EXISTS idx_packages_platform ON packages(platform);
 -- ============================================
 -- Artifacts: Add checksum_md5, metadata, and CHECK constraints
 -- ============================================
 ALTER TABLE artifacts ADD COLUMN IF NOT EXISTS checksum_md5 VARCHAR(32);
 ALTER TABLE artifacts ADD COLUMN IF NOT EXISTS metadata JSONB;
 -- Add CHECK constraints (will fail if data violates them)
 DO $$
 BEGIN
    IF NOT EXISTS (SELECT 1 FROM pg_constraint WHERE conname = 'artifacts_ref_count_check') THEN
        ALTER TABLE artifacts ADD CONSTRAINT artifacts_ref_count_check CHECK (ref_count >= 0);
    END IF;
    IF NOT EXISTS (SELECT 1 FROM pg_constraint WHERE conname = 'artifacts_size_check') THEN
        ALTER TABLE artifacts ADD CONSTRAINT artifacts_size_check CHECK (size > 0);
    END IF;
 END $$;
 CREATE INDEX IF NOT EXISTS idx_artifacts_metadata ON artifacts USING GIN (metadata);
 -- ============================================
 -- Tags: Add updated_at and composite index
 -- ============================================
 ALTER TABLE tags ADD COLUMN IF NOT EXISTS updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW();
 CREATE INDEX IF NOT EXISTS idx_tags_package_created_at ON tags(package_id, created_at DESC);
 -- ============================================
 -- Tag History: Add change_type and index
 -- ============================================
 ALTER TABLE tag_history ADD COLUMN IF NOT EXISTS change_type VARCHAR(20) DEFAULT 'update';
 DO $$
 BEGIN
    IF NOT EXISTS (SELECT 1 FROM pg_constraint WHERE conname = 'tag_history_change_type_check') THEN
        ALTER TABLE tag_history ADD CONSTRAINT tag_history_change_type_check
            CHECK (change_type IN ('create', 'update', 'delete'));
    END IF;
 END $$;
 CREATE INDEX IF NOT EXISTS idx_tag_history_changed_at ON tag_history(changed_at);
 -- ============================================
 -- Uploads: Add new fields and composite indexes
 -- ============================================
 ALTER TABLE uploads ADD COLUMN IF NOT EXISTS tag_name VARCHAR(255);
 ALTER TABLE uploads ADD COLUMN IF NOT EXISTS user_agent VARCHAR(512);
 ALTER TABLE uploads ADD COLUMN IF NOT EXISTS duration_ms INTEGER;
 ALTER TABLE uploads ADD COLUMN IF NOT EXISTS deduplicated BOOLEAN DEFAULT false;
 ALTER TABLE uploads ADD COLUMN IF NOT EXISTS checksum_verified BOOLEAN DEFAULT true;
 CREATE INDEX IF NOT EXISTS idx_uploads_package_uploaded_at ON uploads(package_id, uploaded_at DESC);
 CREATE INDEX IF NOT EXISTS idx_uploads_uploaded_by_at ON uploads(uploaded_by, uploaded_at DESC);
 -- ============================================
 -- Audit Logs: Add composite indexes and GIN index
 -- ============================================
 CREATE INDEX IF NOT EXISTS idx_audit_logs_resource_timestamp ON audit_logs(resource, timestamp DESC);
 CREATE INDEX IF NOT EXISTS idx_audit_logs_user_timestamp ON audit_logs(user_id, timestamp DESC);
 CREATE INDEX IF NOT EXISTS idx_audit_logs_details ON audit_logs USING GIN (details);
 -- ============================================
 -- Projects: Add partial index for public projects
 -- ============================================
 CREATE INDEX IF NOT EXISTS idx_projects_public ON projects(name) WHERE is_public = true;
 -- ============================================
 -- Triggers: Update tag_changes trigger for change_type
 -- ============================================
 CREATE OR REPLACE FUNCTION track_tag_changes()
 RETURNS TRIGGER AS $$
 BEGIN
    IF TG_OP = 'UPDATE' AND OLD.artifact_id != NEW.artifact_id THEN
        INSERT INTO tag_history (id, tag_id, old_artifact_id, new_artifact_id, change_type, changed_at, changed_by)
        VALUES (gen_random_uuid(), NEW.id, OLD.artifact_id, NEW.artifact_id, 'update', NOW(), NEW.created_by);
    END IF;
    RETURN NEW;
 END;
 $$ LANGUAGE plpgsql;
 -- ============================================
 -- Triggers: Auto-update updated_at timestamps
 -- ============================================
 CREATE OR REPLACE FUNCTION update_updated_at_column()
 RETURNS TRIGGER AS $$
 BEGIN
    NEW.updated_at = NOW();
    RETURN NEW;
 END;
 $$ LANGUAGE plpgsql;
 -- Drop triggers if they exist, then recreate
 DROP TRIGGER IF EXISTS projects_updated_at_trigger ON projects;
 CREATE TRIGGER projects_updated_at_trigger
    BEFORE UPDATE ON projects
    FOR EACH ROW
    EXECUTE FUNCTION update_updated_at_column();
 DROP TRIGGER IF EXISTS packages_updated_at_trigger ON packages;
 CREATE TRIGGER packages_updated_at_trigger
    BEFORE UPDATE ON packages
    FOR EACH ROW
    EXECUTE FUNCTION update_updated_at_column();
 DROP TRIGGER IF EXISTS tags_updated_at_trigger ON tags;
 CREATE TRIGGER tags_updated_at_trigger
    BEFORE UPDATE ON tags
    FOR EACH ROW
    EXECUTE FUNCTION update_updated_at_column();
 -- ============================================
 -- Triggers: Maintain artifact ref_count accuracy
 -- ============================================
 CREATE OR REPLACE FUNCTION increment_artifact_ref_count()
 RETURNS TRIGGER AS $$
 BEGIN
    UPDATE artifacts SET ref_count = ref_count + 1 WHERE id = NEW.artifact_id;
    RETURN NEW;
 END;
 $$ LANGUAGE plpgsql;
 CREATE OR REPLACE FUNCTION decrement_artifact_ref_count()
 RETURNS TRIGGER AS $$
 BEGIN
    UPDATE artifacts SET ref_count = ref_count - 1 WHERE id = OLD.artifact_id;
    RETURN OLD;
 END;
 $$ LANGUAGE plpgsql;
 CREATE OR REPLACE FUNCTION update_artifact_ref_count()
 RETURNS TRIGGER AS $$
 BEGIN
    IF OLD.artifact_id != NEW.artifact_id THEN
        UPDATE artifacts SET ref_count = ref_count - 1 WHERE id = OLD.artifact_id;
        UPDATE artifacts SET ref_count = ref_count + 1 WHERE id = NEW.artifact_id;
    END IF;
    RETURN NEW;
 END;
 $$ LANGUAGE plpgsql;
 -- Note: ref_count triggers on tags table
 -- These track how many tags reference each artifact
 DROP TRIGGER IF EXISTS tags_ref_count_insert_trigger ON tags;
 CREATE TRIGGER tags_ref_count_insert_trigger
    AFTER INSERT ON tags
    FOR EACH ROW
    EXECUTE FUNCTION increment_artifact_ref_count();
 DROP TRIGGER IF EXISTS tags_ref_count_delete_trigger ON tags;
 CREATE TRIGGER tags_ref_count_delete_trigger
    AFTER DELETE ON tags
    FOR EACH ROW
    EXECUTE FUNCTION decrement_artifact_ref_count();
 DROP TRIGGER IF EXISTS tags_ref_count_update_trigger ON tags;
 CREATE TRIGGER tags_ref_count_update_trigger
    AFTER UPDATE ON tags
    FOR EACH ROW
    EXECUTE FUNCTION update_artifact_ref_count();
--- a/migrations/003_checksum_fields.sql
+++ b/migrations/003_checksum_fields.sql
@@ -0,0 +1,12 @@
 -- Migration 003: Additional Checksum Fields
 -- Adds checksum_sha1 and s3_etag fields to artifacts table
 -- ============================================
 -- Artifacts: Add checksum_sha1 and s3_etag fields
 -- ============================================
 ALTER TABLE artifacts ADD COLUMN IF NOT EXISTS checksum_sha1 VARCHAR(40);
 ALTER TABLE artifacts ADD COLUMN IF NOT EXISTS s3_etag VARCHAR(64);
 -- Create indexes for checksum lookups (optional, for verification queries)
 CREATE INDEX IF NOT EXISTS idx_artifacts_checksum_md5 ON artifacts(checksum_md5) WHERE checksum_md5 IS NOT NULL;
 CREATE INDEX IF NOT EXISTS idx_artifacts_checksum_sha1 ON artifacts(checksum_sha1) WHERE checksum_sha1 IS NOT NULL;
--- a/tasks.py
+++ b/tasks.py
@@ -0,0 +1,2 @@
 # Import docker project template tasks
 from prosper.projects.docker import *
Author	SHA1	Message	Date
Mondo Diaz	caa0c5af0c	Merge branch 'feature/store-sha256-checksums' into 'main' Store SHA256 checksums with artifacts and add multiple hash support Closes #25 See merge request esv/bsf/bsf-integration/orchard/orchard-mvp!16	2025-12-15 14:47:31 -06:00
Mondo Diaz	3fd2747ae4	Store SHA256 checksums with artifacts and add multiple hash support	2025-12-15 14:47:30 -06:00
Mondo Diaz	96367da448	Merge branch 'feature/integrity-verification-design' into 'main' Add integrity verification workflow design document Closes #24 See merge request esv/bsf/bsf-integration/orchard/orchard-mvp!15	2025-12-15 14:00:32 -06:00
Mondo Diaz	2686fdcb89	Add integrity verification workflow design document	2025-12-15 14:00:32 -06:00
Dane Moss	0eb2deb4ca	Merge branch 'update_urls' into 'main' update URLs to point to BSF Closes #46 See merge request esv/bsf/bsf-integration/orchard/orchard-mvp!14	2025-12-15 11:30:07 -07:00
Dane Moss	3fe421f31d	update URLs to point to BSF	2025-12-15 11:30:07 -07:00
Mondo Diaz	68660eacf6	Merge branch 'feature/schema-enhancements' into 'main' Add schema enhancements for uploads, artifacts, and audit tracking Closes #16 See merge request esv/bsf/bsf-integration/orchard/orchard-mvp!13	2025-12-12 15:23:50 -06:00
Mondo Diaz	b52c8840f1	Add schema enhancements for uploads, artifacts, and audit tracking	2025-12-12 15:23:50 -06:00
Dane Moss	4afcdf5cda	Merge branch 'add_prosper_config' into 'main' Add prosper config Closes #45 See merge request esv/bsf/bsf-integration/orchard/orchard-mvp!12	2025-12-12 13:52:27 -07:00
Dane Moss	bc3da14d50	Add prosper config	2025-12-12 13:52:27 -07:00
		`@@ -0,0 +1,2 @@`
							`# Import docker project template tasks`
							`from prosper.projects.docker import *`