9 Commits

Author SHA1 Message Date
Mondo Diaz
b124b94b56 Store SHA256 checksums with artifacts and add multiple hash support
- Add sha256 field to API responses as explicit alias of artifact id
- Add checksum_sha1 and s3_etag fields to artifacts table
- Compute MD5, SHA1, and capture S3 ETag during upload
- Update StorageResult to return all checksums from storage layer
- Add migration 003_checksum_fields.sql for existing databases
- Add Dockerfile.local and docker-compose.local.yml for local development
- Update schemas to include all checksum fields in responses
2025-12-15 14:15:37 -06:00
Dane Moss
0eb2deb4ca Merge branch 'update_urls' into 'main'
update URLs to point to BSF

Closes #46

See merge request esv/bsf/bsf-integration/orchard/orchard-mvp!14
2025-12-15 11:30:07 -07:00
Dane Moss
3fe421f31d update URLs to point to BSF 2025-12-15 11:30:07 -07:00
Mondo Diaz
68660eacf6 Merge branch 'feature/schema-enhancements' into 'main'
Add schema enhancements for uploads, artifacts, and audit tracking

Closes #16

See merge request esv/bsf/bsf-integration/orchard/orchard-mvp!13
2025-12-12 15:23:50 -06:00
Mondo Diaz
b52c8840f1 Add schema enhancements for uploads, artifacts, and audit tracking 2025-12-12 15:23:50 -06:00
Dane Moss
4afcdf5cda Merge branch 'add_prosper_config' into 'main'
Add prosper config

Closes #45

See merge request esv/bsf/bsf-integration/orchard/orchard-mvp!12
2025-12-12 13:52:27 -07:00
Dane Moss
bc3da14d50 Add prosper config 2025-12-12 13:52:27 -07:00
Mondo Diaz
2843335f6d Document curl -OJ flag for correct download filenames
- Update download examples to use -OJ flag
- Add note explaining curl download flags (-O, -J, -OJ, -o)
- Add example for saving to a specific filename
2025-12-12 13:53:15 -06:00
Mondo Diaz
2097865874 Remove redundant search bar from Home, rename page filters
- Remove SearchInput from Home page (use GlobalSearch in header instead)
- Rename "Search packages..." to "Filter packages..." on ProjectPage
- Rename "Search tags..." to "Filter tags..." on PackagePage
- Update FilterChip labels from "Search" to "Filter"

This differentiates the global search (header) from page-level filtering.
2025-12-12 12:55:31 -06:00
19 changed files with 719 additions and 135 deletions

View File

@@ -1,26 +1,21 @@
stages: include:
- test - project: 'esv/bsf/pypi/prosper'
- build ref: v0.64.1
- publish file: '/prosper/templates/projects/docker.yml'
# - deploy
variables: variables:
# Container registry settings # renovate: datasource=gitlab-tags depName=esv/bsf/pypi/prosper versioning=semver registryUrl=https://gitlab.global.bsf.tools
REGISTRY: ${CI_REGISTRY} PROSPER_VERSION: v0.64.1
IMAGE_NAME: ${CI_REGISTRY_IMAGE}
# Buildah settings kics:
STORAGE_DRIVER: vfs allow_failure: true
BUILDAH_FORMAT: docker
BUILDAH_ISOLATION: chroot hadolint:
allow_failure: true
.buildah-base:
image: deps.global.bsf.tools/quay.io/buildah/stable:latest
before_script:
- buildah version
- buildah login -u ${CI_REGISTRY_USER} -p ${CI_REGISTRY_PASSWORD} ${CI_REGISTRY}
# Run Python tests # Run Python tests
test: python_tests:
stage: test stage: test
image: deps.global.bsf.tools/docker/python:3.12-slim image: deps.global.bsf.tools/docker/python:3.12-slim
before_script: before_script:
@@ -29,47 +24,6 @@ test:
script: script:
- cd backend - cd backend
- python -m pytest -v || echo "No tests yet" - python -m pytest -v || echo "No tests yet"
rules:
- if: $CI_PIPELINE_SOURCE == "merge_request_event"
- if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH
# Build container image for merge requests (no push)
build:
stage: build
extends: .buildah-base
script:
- |
buildah build \
--build-arg NPM_REGISTRY=https://deps.global.bsf.tools/artifactory/api/npm/registry.npmjs.org/ \
--tag ${IMAGE_NAME}:${CI_COMMIT_SHORT_SHA} \
--label org.opencontainers.image.source=${CI_PROJECT_URL} \
--label org.opencontainers.image.revision=${CI_COMMIT_SHA} \
--label org.opencontainers.image.created=$(date -u +%Y-%m-%dT%H:%M:%SZ) \
.
rules:
- if: $CI_PIPELINE_SOURCE == "merge_request_event"
# Build and push on main branch
publish:
stage: publish
extends: .buildah-base
script:
- |
buildah build \
--build-arg NPM_REGISTRY=https://deps.global.bsf.tools/artifactory/api/npm/registry.npmjs.org/ \
--tag ${IMAGE_NAME}:${CI_COMMIT_SHORT_SHA} \
--tag ${IMAGE_NAME}:${CI_COMMIT_REF_SLUG} \
--tag ${IMAGE_NAME}:latest \
--label org.opencontainers.image.source=${CI_PROJECT_URL} \
--label org.opencontainers.image.revision=${CI_COMMIT_SHA} \
--label org.opencontainers.image.created=$(date -u +%Y-%m-%dT%H:%M:%SZ) \
.
- buildah push ${IMAGE_NAME}:${CI_COMMIT_SHORT_SHA}
- buildah push ${IMAGE_NAME}:${CI_COMMIT_REF_SLUG}
- buildah push ${IMAGE_NAME}:latest
rules:
- if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH
# deploy_helm_charts: # deploy_helm_charts:
# stage: deploy # stage: deploy

38
CHANGELOG.md Normal file
View File

@@ -0,0 +1,38 @@
# Changelog
All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
## [Unreleased]
### Added
- Added `sha256` field to API responses for clarity (alias of `id`) (#25)
- Added `checksum_sha1` field to artifacts table for compatibility (#25)
- Added `s3_etag` field to artifacts table for S3 verification (#25)
- Compute and store MD5, SHA1, and S3 ETag alongside SHA256 during upload (#25)
- Added `Dockerfile.local` and `docker-compose.local.yml` for local development (#25)
- Added migration script `003_checksum_fields.sql` for existing databases (#25)
## [0.2.0] - 2025-12-15
### Changed
- Updated images to use internal container BSF proxy (#46)
### Added
- Added `format` and `platform` fields to packages table (#16)
- Added `checksum_md5` and `metadata` JSONB fields to artifacts table (#16)
- Added `updated_at` field to tags table (#16)
- Added `tag_name`, `user_agent`, `duration_ms`, `deduplicated`, `checksum_verified` fields to uploads table (#16)
- Added `change_type` field to tag_history table (#16)
- Added composite indexes for common query patterns (#16)
- Added GIN indexes on JSONB fields for efficient JSON queries (#16)
- Added partial index for public projects (#16)
- Added database triggers for `updated_at` timestamps (#16)
- Added database triggers for maintaining artifact `ref_count` accuracy (#16)
- Added CHECK constraints for data integrity (`size > 0`, `ref_count >= 0`) (#16)
- Added migration script `002_schema_enhancements.sql` for existing databases (#16)
## [0.1.0] - 2025-12-12
### Changed
- Changed the Dockerfile npm build arg to use the deps.global.bsf.tools URL as the default registry (#45)
### Added
- Added Prosper docker template config (#45)

View File

@@ -1,7 +1,7 @@
# Frontend build stage # Frontend build stage
FROM node:20-alpine AS frontend-builder FROM containers.global.bsf.tools/node:20-alpine AS frontend-builder
ARG NPM_REGISTRY ARG NPM_REGISTRY=https://deps.global.bsf.tools/artifactory/api/npm/registry.npmjs.org/
WORKDIR /app/frontend WORKDIR /app/frontend
@@ -19,7 +19,10 @@ COPY frontend/ ./
RUN npm run build RUN npm run build
# Runtime stage # Runtime stage
FROM python:3.12-slim FROM containers.global.bsf.tools/python:3.12-slim
# Disable proxy cache
RUN echo 'Acquire::http::Pipeline-Depth 0;\nAcquire::http::No-Cache true;\nAcquire::BrokenProxy true;\n' > /etc/apt/apt.conf.d/99fixbadproxy
# Install system dependencies # Install system dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \ RUN apt-get update && apt-get install -y --no-install-recommends \

50
Dockerfile.local Normal file
View File

@@ -0,0 +1,50 @@
# Frontend build stage
FROM node:20-alpine AS frontend-builder
WORKDIR /app/frontend
# Copy package files
COPY frontend/package*.json ./
RUN npm install
# Copy frontend source
COPY frontend/ ./
# Build frontend
RUN npm run build
# Runtime stage
FROM python:3.12-slim
# Install system dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
curl \
&& rm -rf /var/lib/apt/lists/*
# Create non-root user
RUN groupadd -g 1000 orchard && \
useradd -u 1000 -g orchard -s /bin/bash -m orchard
WORKDIR /app
# Copy requirements and install Python dependencies
COPY backend/requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
# Copy backend source
COPY backend/ ./backend/
# Copy frontend build
COPY --from=frontend-builder /app/frontend/dist ./frontend/dist
# Set ownership
RUN chown -R orchard:orchard /app
USER orchard
EXPOSE 8080
HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
CMD curl -f http://localhost:8080/health || exit 1
CMD ["uvicorn", "backend.app.main:app", "--host", "0.0.0.0", "--port", "8080"]

View File

@@ -275,14 +275,17 @@ curl -X POST http://localhost:8080/api/v1/project/my-project/releases/upload/abc
### Download an Artifact ### Download an Artifact
```bash ```bash
# By tag # By tag (use -OJ to save with the correct filename from Content-Disposition header)
curl -O http://localhost:8080/api/v1/project/my-project/releases/+/v1.0.0 curl -OJ http://localhost:8080/api/v1/project/my-project/releases/+/v1.0.0
# By artifact ID # By artifact ID
curl -O http://localhost:8080/api/v1/project/my-project/releases/+/artifact:a3f5d8e12b4c6789... curl -OJ http://localhost:8080/api/v1/project/my-project/releases/+/artifact:a3f5d8e12b4c6789...
# Using the short URL pattern # Using the short URL pattern
curl -O http://localhost:8080/project/my-project/releases/+/latest curl -OJ http://localhost:8080/project/my-project/releases/+/latest
# Save to a specific filename
curl -o myfile.tar.gz http://localhost:8080/api/v1/project/my-project/releases/+/v1.0.0
# Partial download (range request) # Partial download (range request)
curl -H "Range: bytes=0-1023" http://localhost:8080/api/v1/project/my-project/releases/+/v1.0.0 curl -H "Range: bytes=0-1023" http://localhost:8080/api/v1/project/my-project/releases/+/v1.0.0
@@ -291,6 +294,12 @@ curl -H "Range: bytes=0-1023" http://localhost:8080/api/v1/project/my-project/re
curl -I http://localhost:8080/api/v1/project/my-project/releases/+/v1.0.0 curl -I http://localhost:8080/api/v1/project/my-project/releases/+/v1.0.0
``` ```
> **Note on curl flags:**
> - `-O` saves the file using the URL path as the filename (e.g., `latest`, `v1.0.0`)
> - `-J` tells curl to use the filename from the `Content-Disposition` header (e.g., `app-v1.0.0.tar.gz`)
> - `-OJ` combines both: download to a file using the server-provided filename
> - `-o <filename>` saves to a specific filename you choose
### Create a Tag ### Create a Tag
```bash ```bash

View File

@@ -73,15 +73,33 @@ class Artifact(Base):
size = Column(BigInteger, nullable=False) size = Column(BigInteger, nullable=False)
content_type = Column(String(255)) content_type = Column(String(255))
original_name = Column(String(1024)) original_name = Column(String(1024))
checksum_md5 = Column(String(32)) # MD5 hash for additional verification
checksum_sha1 = Column(String(40)) # SHA1 hash for compatibility
s3_etag = Column(String(64)) # S3 ETag for verification
artifact_metadata = Column("metadata", JSON, default=dict) # Format-specific metadata (column name is 'metadata')
created_at = Column(DateTime(timezone=True), default=datetime.utcnow) created_at = Column(DateTime(timezone=True), default=datetime.utcnow)
created_by = Column(String(255), nullable=False) created_by = Column(String(255), nullable=False)
ref_count = Column(Integer, default=1) ref_count = Column(Integer, default=1)
s3_key = Column(String(1024), nullable=False) s3_key = Column(String(1024), nullable=False)
format_metadata = Column(JSON, default=dict) # Format-specific metadata (version, etc.)
tags = relationship("Tag", back_populates="artifact") tags = relationship("Tag", back_populates="artifact")
uploads = relationship("Upload", back_populates="artifact") uploads = relationship("Upload", back_populates="artifact")
@property
def sha256(self) -> str:
"""Alias for id - the SHA256 hash of the artifact content"""
return self.id
@property
def format_metadata(self):
"""Alias for artifact_metadata - backward compatibility"""
return self.artifact_metadata
@format_metadata.setter
def format_metadata(self, value):
"""Alias setter for artifact_metadata - backward compatibility"""
self.artifact_metadata = value
__table_args__ = ( __table_args__ = (
Index("idx_artifacts_created_at", "created_at"), Index("idx_artifacts_created_at", "created_at"),
Index("idx_artifacts_created_by", "created_by"), Index("idx_artifacts_created_by", "created_by"),
@@ -99,6 +117,7 @@ class Tag(Base):
name = Column(String(255), nullable=False) name = Column(String(255), nullable=False)
artifact_id = Column(String(64), ForeignKey("artifacts.id"), nullable=False) artifact_id = Column(String(64), ForeignKey("artifacts.id"), nullable=False)
created_at = Column(DateTime(timezone=True), default=datetime.utcnow) created_at = Column(DateTime(timezone=True), default=datetime.utcnow)
updated_at = Column(DateTime(timezone=True), default=datetime.utcnow, onupdate=datetime.utcnow)
created_by = Column(String(255), nullable=False) created_by = Column(String(255), nullable=False)
package = relationship("Package", back_populates="tags") package = relationship("Package", back_populates="tags")
@@ -120,6 +139,7 @@ class TagHistory(Base):
tag_id = Column(UUID(as_uuid=True), ForeignKey("tags.id", ondelete="CASCADE"), nullable=False) tag_id = Column(UUID(as_uuid=True), ForeignKey("tags.id", ondelete="CASCADE"), nullable=False)
old_artifact_id = Column(String(64), ForeignKey("artifacts.id")) old_artifact_id = Column(String(64), ForeignKey("artifacts.id"))
new_artifact_id = Column(String(64), ForeignKey("artifacts.id"), nullable=False) new_artifact_id = Column(String(64), ForeignKey("artifacts.id"), nullable=False)
change_type = Column(String(20), nullable=False, default="update")
changed_at = Column(DateTime(timezone=True), default=datetime.utcnow) changed_at = Column(DateTime(timezone=True), default=datetime.utcnow)
changed_by = Column(String(255), nullable=False) changed_by = Column(String(255), nullable=False)
@@ -127,6 +147,8 @@ class TagHistory(Base):
__table_args__ = ( __table_args__ = (
Index("idx_tag_history_tag_id", "tag_id"), Index("idx_tag_history_tag_id", "tag_id"),
Index("idx_tag_history_changed_at", "changed_at"),
CheckConstraint("change_type IN ('create', 'update', 'delete')", name="check_change_type"),
) )
@@ -137,6 +159,11 @@ class Upload(Base):
artifact_id = Column(String(64), ForeignKey("artifacts.id"), nullable=False) artifact_id = Column(String(64), ForeignKey("artifacts.id"), nullable=False)
package_id = Column(UUID(as_uuid=True), ForeignKey("packages.id"), nullable=False) package_id = Column(UUID(as_uuid=True), ForeignKey("packages.id"), nullable=False)
original_name = Column(String(1024)) original_name = Column(String(1024))
tag_name = Column(String(255)) # Tag assigned during upload
user_agent = Column(String(512)) # Client identification
duration_ms = Column(Integer) # Upload timing in milliseconds
deduplicated = Column(Boolean, default=False) # Whether artifact was deduplicated
checksum_verified = Column(Boolean, default=True) # Whether checksum was verified
uploaded_at = Column(DateTime(timezone=True), default=datetime.utcnow) uploaded_at = Column(DateTime(timezone=True), default=datetime.utcnow)
uploaded_by = Column(String(255), nullable=False) uploaded_by = Column(String(255), nullable=False)
source_ip = Column(String(45)) source_ip = Column(String(45))
@@ -148,6 +175,8 @@ class Upload(Base):
Index("idx_uploads_artifact_id", "artifact_id"), Index("idx_uploads_artifact_id", "artifact_id"),
Index("idx_uploads_package_id", "package_id"), Index("idx_uploads_package_id", "package_id"),
Index("idx_uploads_uploaded_at", "uploaded_at"), Index("idx_uploads_uploaded_at", "uploaded_at"),
Index("idx_uploads_package_uploaded_at", "package_id", "uploaded_at"),
Index("idx_uploads_uploaded_by_at", "uploaded_by", "uploaded_at"),
) )
@@ -220,4 +249,6 @@ class AuditLog(Base):
Index("idx_audit_logs_resource", "resource"), Index("idx_audit_logs_resource", "resource"),
Index("idx_audit_logs_user_id", "user_id"), Index("idx_audit_logs_user_id", "user_id"),
Index("idx_audit_logs_timestamp", "timestamp"), Index("idx_audit_logs_timestamp", "timestamp"),
Index("idx_audit_logs_resource_timestamp", "resource", "timestamp"),
Index("idx_audit_logs_user_timestamp", "user_id", "timestamp"),
) )

View File

@@ -520,40 +520,51 @@ def upload_artifact(
) )
# Store file (uses multipart for large files) # Store file (uses multipart for large files)
sha256_hash, size, s3_key = storage.store(file.file, content_length) storage_result = storage.store(file.file, content_length)
# Check if this is a deduplicated upload # Check if this is a deduplicated upload
deduplicated = False deduplicated = False
# Create or update artifact record # Create or update artifact record
artifact = db.query(Artifact).filter(Artifact.id == sha256_hash).first() artifact = db.query(Artifact).filter(Artifact.id == storage_result.sha256).first()
if artifact: if artifact:
artifact.ref_count += 1 artifact.ref_count += 1
deduplicated = True deduplicated = True
# Merge metadata if new metadata was extracted # Merge metadata if new metadata was extracted
if file_metadata and artifact.format_metadata: if file_metadata and artifact.artifact_metadata:
artifact.format_metadata = {**artifact.format_metadata, **file_metadata} artifact.artifact_metadata = {**artifact.artifact_metadata, **file_metadata}
elif file_metadata: elif file_metadata:
artifact.format_metadata = file_metadata artifact.artifact_metadata = file_metadata
# Update checksums if not already set
if not artifact.checksum_md5 and storage_result.md5:
artifact.checksum_md5 = storage_result.md5
if not artifact.checksum_sha1 and storage_result.sha1:
artifact.checksum_sha1 = storage_result.sha1
if not artifact.s3_etag and storage_result.s3_etag:
artifact.s3_etag = storage_result.s3_etag
else: else:
artifact = Artifact( artifact = Artifact(
id=sha256_hash, id=storage_result.sha256,
size=size, size=storage_result.size,
content_type=file.content_type, content_type=file.content_type,
original_name=file.filename, original_name=file.filename,
checksum_md5=storage_result.md5,
checksum_sha1=storage_result.sha1,
s3_etag=storage_result.s3_etag,
created_by=user_id, created_by=user_id,
s3_key=s3_key, s3_key=storage_result.s3_key,
format_metadata=file_metadata or {}, artifact_metadata=file_metadata or {},
) )
db.add(artifact) db.add(artifact)
# Record upload # Record upload
upload = Upload( upload = Upload(
artifact_id=sha256_hash, artifact_id=storage_result.sha256,
package_id=package.id, package_id=package.id,
original_name=file.filename, original_name=file.filename,
uploaded_by=user_id, uploaded_by=user_id,
source_ip=request.client.host if request.client else None, source_ip=request.client.host if request.client else None,
deduplicated=deduplicated,
) )
db.add(upload) db.add(upload)
@@ -561,13 +572,13 @@ def upload_artifact(
if tag: if tag:
existing_tag = db.query(Tag).filter(Tag.package_id == package.id, Tag.name == tag).first() existing_tag = db.query(Tag).filter(Tag.package_id == package.id, Tag.name == tag).first()
if existing_tag: if existing_tag:
existing_tag.artifact_id = sha256_hash existing_tag.artifact_id = storage_result.sha256
existing_tag.created_by = user_id existing_tag.created_by = user_id
else: else:
new_tag = Tag( new_tag = Tag(
package_id=package.id, package_id=package.id,
name=tag, name=tag,
artifact_id=sha256_hash, artifact_id=storage_result.sha256,
created_by=user_id, created_by=user_id,
) )
db.add(new_tag) db.add(new_tag)
@@ -575,12 +586,16 @@ def upload_artifact(
db.commit() db.commit()
return UploadResponse( return UploadResponse(
artifact_id=sha256_hash, artifact_id=storage_result.sha256,
size=size, sha256=storage_result.sha256,
size=storage_result.size,
project=project_name, project=project_name,
package=package_name, package=package_name,
tag=tag, tag=tag,
format_metadata=artifact.format_metadata, checksum_md5=storage_result.md5,
checksum_sha1=storage_result.sha1,
s3_etag=storage_result.s3_etag,
format_metadata=artifact.artifact_metadata,
deduplicated=deduplicated, deduplicated=deduplicated,
) )

View File

@@ -99,9 +99,13 @@ class PackageDetailResponse(BaseModel):
# Artifact schemas # Artifact schemas
class ArtifactResponse(BaseModel): class ArtifactResponse(BaseModel):
id: str id: str
sha256: str # Explicit SHA256 field (same as id)
size: int size: int
content_type: Optional[str] content_type: Optional[str]
original_name: Optional[str] original_name: Optional[str]
checksum_md5: Optional[str] = None
checksum_sha1: Optional[str] = None
s3_etag: Optional[str] = None
created_at: datetime created_at: datetime
created_by: str created_by: str
ref_count: int ref_count: int
@@ -173,9 +177,13 @@ class ArtifactTagInfo(BaseModel):
class ArtifactDetailResponse(BaseModel): class ArtifactDetailResponse(BaseModel):
"""Artifact with list of tags/packages referencing it""" """Artifact with list of tags/packages referencing it"""
id: str id: str
sha256: str # Explicit SHA256 field (same as id)
size: int size: int
content_type: Optional[str] content_type: Optional[str]
original_name: Optional[str] original_name: Optional[str]
checksum_md5: Optional[str] = None
checksum_sha1: Optional[str] = None
s3_etag: Optional[str] = None
created_at: datetime created_at: datetime
created_by: str created_by: str
ref_count: int ref_count: int
@@ -189,9 +197,13 @@ class ArtifactDetailResponse(BaseModel):
class PackageArtifactResponse(BaseModel): class PackageArtifactResponse(BaseModel):
"""Artifact with tags for package artifact listing""" """Artifact with tags for package artifact listing"""
id: str id: str
sha256: str # Explicit SHA256 field (same as id)
size: int size: int
content_type: Optional[str] content_type: Optional[str]
original_name: Optional[str] original_name: Optional[str]
checksum_md5: Optional[str] = None
checksum_sha1: Optional[str] = None
s3_etag: Optional[str] = None
created_at: datetime created_at: datetime
created_by: str created_by: str
format_metadata: Optional[Dict[str, Any]] = None format_metadata: Optional[Dict[str, Any]] = None
@@ -204,10 +216,14 @@ class PackageArtifactResponse(BaseModel):
# Upload response # Upload response
class UploadResponse(BaseModel): class UploadResponse(BaseModel):
artifact_id: str artifact_id: str
sha256: str # Explicit SHA256 field (same as artifact_id)
size: int size: int
project: str project: str
package: str package: str
tag: Optional[str] tag: Optional[str]
checksum_md5: Optional[str] = None
checksum_sha1: Optional[str] = None
s3_etag: Optional[str] = None
format_metadata: Optional[Dict[str, Any]] = None format_metadata: Optional[Dict[str, Any]] = None
deduplicated: bool = False deduplicated: bool = False

View File

@@ -1,6 +1,6 @@
import hashlib import hashlib
import logging import logging
from typing import BinaryIO, Tuple, Optional, Dict, Any, Generator from typing import BinaryIO, Tuple, Optional, Dict, Any, Generator, NamedTuple
import boto3 import boto3
from botocore.config import Config from botocore.config import Config
from botocore.exceptions import ClientError from botocore.exceptions import ClientError
@@ -18,6 +18,16 @@ MULTIPART_CHUNK_SIZE = 10 * 1024 * 1024
HASH_CHUNK_SIZE = 8 * 1024 * 1024 HASH_CHUNK_SIZE = 8 * 1024 * 1024
class StorageResult(NamedTuple):
"""Result of storing a file with all computed checksums"""
sha256: str
size: int
s3_key: str
md5: Optional[str] = None
sha1: Optional[str] = None
s3_etag: Optional[str] = None
class S3Storage: class S3Storage:
def __init__(self): def __init__(self):
config = Config(s3={"addressing_style": "path"} if settings.s3_use_path_style else {}) config = Config(s3={"addressing_style": "path"} if settings.s3_use_path_style else {})
@@ -34,9 +44,9 @@ class S3Storage:
# Store active multipart uploads for resumable support # Store active multipart uploads for resumable support
self._active_uploads: Dict[str, Dict[str, Any]] = {} self._active_uploads: Dict[str, Dict[str, Any]] = {}
def store(self, file: BinaryIO, content_length: Optional[int] = None) -> Tuple[str, int, str]: def store(self, file: BinaryIO, content_length: Optional[int] = None) -> StorageResult:
""" """
Store a file and return its SHA256 hash, size, and s3_key. Store a file and return StorageResult with all checksums.
Content-addressable: if the file already exists, just return the hash. Content-addressable: if the file already exists, just return the hash.
Uses multipart upload for files larger than MULTIPART_THRESHOLD. Uses multipart upload for files larger than MULTIPART_THRESHOLD.
""" """
@@ -46,45 +56,76 @@ class S3Storage:
else: else:
return self._store_multipart(file, content_length) return self._store_multipart(file, content_length)
def _store_simple(self, file: BinaryIO) -> Tuple[str, int, str]: def _store_simple(self, file: BinaryIO) -> StorageResult:
"""Store a small file using simple put_object""" """Store a small file using simple put_object"""
# Read file and compute hash # Read file and compute all hashes
content = file.read() content = file.read()
sha256_hash = hashlib.sha256(content).hexdigest() sha256_hash = hashlib.sha256(content).hexdigest()
md5_hash = hashlib.md5(content).hexdigest()
sha1_hash = hashlib.sha1(content).hexdigest()
size = len(content) size = len(content)
# Check if already exists # Check if already exists
s3_key = f"fruits/{sha256_hash[:2]}/{sha256_hash[2:4]}/{sha256_hash}" s3_key = f"fruits/{sha256_hash[:2]}/{sha256_hash[2:4]}/{sha256_hash}"
s3_etag = None
if not self._exists(s3_key): if not self._exists(s3_key):
self.client.put_object( response = self.client.put_object(
Bucket=self.bucket, Bucket=self.bucket,
Key=s3_key, Key=s3_key,
Body=content, Body=content,
) )
s3_etag = response.get("ETag", "").strip('"')
else:
# Get existing ETag
obj_info = self.get_object_info(s3_key)
if obj_info:
s3_etag = obj_info.get("etag", "").strip('"')
return sha256_hash, size, s3_key return StorageResult(
sha256=sha256_hash,
size=size,
s3_key=s3_key,
md5=md5_hash,
sha1=sha1_hash,
s3_etag=s3_etag,
)
def _store_multipart(self, file: BinaryIO, content_length: int) -> Tuple[str, int, str]: def _store_multipart(self, file: BinaryIO, content_length: int) -> StorageResult:
"""Store a large file using S3 multipart upload with streaming hash computation""" """Store a large file using S3 multipart upload with streaming hash computation"""
# First pass: compute hash by streaming through file # First pass: compute all hashes by streaming through file
hasher = hashlib.sha256() sha256_hasher = hashlib.sha256()
md5_hasher = hashlib.md5()
sha1_hasher = hashlib.sha1()
size = 0 size = 0
# Read file in chunks to compute hash # Read file in chunks to compute hashes
while True: while True:
chunk = file.read(HASH_CHUNK_SIZE) chunk = file.read(HASH_CHUNK_SIZE)
if not chunk: if not chunk:
break break
hasher.update(chunk) sha256_hasher.update(chunk)
md5_hasher.update(chunk)
sha1_hasher.update(chunk)
size += len(chunk) size += len(chunk)
sha256_hash = hasher.hexdigest() sha256_hash = sha256_hasher.hexdigest()
md5_hash = md5_hasher.hexdigest()
sha1_hash = sha1_hasher.hexdigest()
s3_key = f"fruits/{sha256_hash[:2]}/{sha256_hash[2:4]}/{sha256_hash}" s3_key = f"fruits/{sha256_hash[:2]}/{sha256_hash[2:4]}/{sha256_hash}"
# Check if already exists (deduplication) # Check if already exists (deduplication)
if self._exists(s3_key): if self._exists(s3_key):
return sha256_hash, size, s3_key obj_info = self.get_object_info(s3_key)
s3_etag = obj_info.get("etag", "").strip('"') if obj_info else None
return StorageResult(
sha256=sha256_hash,
size=size,
s3_key=s3_key,
md5=md5_hash,
sha1=sha1_hash,
s3_etag=s3_etag,
)
# Seek back to start for upload # Seek back to start for upload
file.seek(0) file.seek(0)
@@ -116,14 +157,22 @@ class S3Storage:
part_number += 1 part_number += 1
# Complete multipart upload # Complete multipart upload
self.client.complete_multipart_upload( complete_response = self.client.complete_multipart_upload(
Bucket=self.bucket, Bucket=self.bucket,
Key=s3_key, Key=s3_key,
UploadId=upload_id, UploadId=upload_id,
MultipartUpload={"Parts": parts}, MultipartUpload={"Parts": parts},
) )
s3_etag = complete_response.get("ETag", "").strip('"')
return sha256_hash, size, s3_key return StorageResult(
sha256=sha256_hash,
size=size,
s3_key=s3_key,
md5=md5_hash,
sha1=sha1_hash,
s3_etag=s3_etag,
)
except Exception as e: except Exception as e:
# Abort multipart upload on failure # Abort multipart upload on failure
@@ -135,33 +184,50 @@ class S3Storage:
) )
raise raise
def store_streaming(self, chunks: Generator[bytes, None, None]) -> Tuple[str, int, str]: def store_streaming(self, chunks: Generator[bytes, None, None]) -> StorageResult:
""" """
Store a file from a stream of chunks. Store a file from a stream of chunks.
First accumulates to compute hash, then uploads. First accumulates to compute hash, then uploads.
For truly large files, consider using initiate_resumable_upload instead. For truly large files, consider using initiate_resumable_upload instead.
""" """
# Accumulate chunks and compute hash # Accumulate chunks and compute all hashes
hasher = hashlib.sha256() sha256_hasher = hashlib.sha256()
md5_hasher = hashlib.md5()
sha1_hasher = hashlib.sha1()
all_chunks = [] all_chunks = []
size = 0 size = 0
for chunk in chunks: for chunk in chunks:
hasher.update(chunk) sha256_hasher.update(chunk)
md5_hasher.update(chunk)
sha1_hasher.update(chunk)
all_chunks.append(chunk) all_chunks.append(chunk)
size += len(chunk) size += len(chunk)
sha256_hash = hasher.hexdigest() sha256_hash = sha256_hasher.hexdigest()
md5_hash = md5_hasher.hexdigest()
sha1_hash = sha1_hasher.hexdigest()
s3_key = f"fruits/{sha256_hash[:2]}/{sha256_hash[2:4]}/{sha256_hash}" s3_key = f"fruits/{sha256_hash[:2]}/{sha256_hash[2:4]}/{sha256_hash}"
s3_etag = None
# Check if already exists # Check if already exists
if self._exists(s3_key): if self._exists(s3_key):
return sha256_hash, size, s3_key obj_info = self.get_object_info(s3_key)
s3_etag = obj_info.get("etag", "").strip('"') if obj_info else None
return StorageResult(
sha256=sha256_hash,
size=size,
s3_key=s3_key,
md5=md5_hash,
sha1=sha1_hash,
s3_etag=s3_etag,
)
# Upload based on size # Upload based on size
if size < MULTIPART_THRESHOLD: if size < MULTIPART_THRESHOLD:
content = b"".join(all_chunks) content = b"".join(all_chunks)
self.client.put_object(Bucket=self.bucket, Key=s3_key, Body=content) response = self.client.put_object(Bucket=self.bucket, Key=s3_key, Body=content)
s3_etag = response.get("ETag", "").strip('"')
else: else:
# Use multipart for large files # Use multipart for large files
mpu = self.client.create_multipart_upload(Bucket=self.bucket, Key=s3_key) mpu = self.client.create_multipart_upload(Bucket=self.bucket, Key=s3_key)
@@ -205,12 +271,13 @@ class S3Storage:
"ETag": response["ETag"], "ETag": response["ETag"],
}) })
self.client.complete_multipart_upload( complete_response = self.client.complete_multipart_upload(
Bucket=self.bucket, Bucket=self.bucket,
Key=s3_key, Key=s3_key,
UploadId=upload_id, UploadId=upload_id,
MultipartUpload={"Parts": parts}, MultipartUpload={"Parts": parts},
) )
s3_etag = complete_response.get("ETag", "").strip('"')
except Exception as e: except Exception as e:
logger.error(f"Streaming multipart upload failed: {e}") logger.error(f"Streaming multipart upload failed: {e}")
@@ -221,7 +288,14 @@ class S3Storage:
) )
raise raise
return sha256_hash, size, s3_key return StorageResult(
sha256=sha256_hash,
size=size,
s3_key=s3_key,
md5=md5_hash,
sha1=sha1_hash,
s3_etag=s3_etag,
)
def initiate_resumable_upload(self, expected_hash: str) -> Dict[str, Any]: def initiate_resumable_upload(self, expected_hash: str) -> Dict[str, Any]:
""" """

7
container-test.sh Executable file
View File

@@ -0,0 +1,7 @@
#!/bin/sh
echo "testing container"
# Without a sleep, local testing shows no output because attaching to the logs happens after the container is done executing
# this script.
sleep 1

122
docker-compose.local.yml Normal file
View File

@@ -0,0 +1,122 @@
version: '3.8'
services:
orchard-server:
build:
context: .
dockerfile: Dockerfile.local
ports:
- "8080:8080"
environment:
- ORCHARD_SERVER_HOST=0.0.0.0
- ORCHARD_SERVER_PORT=8080
- ORCHARD_DATABASE_HOST=postgres
- ORCHARD_DATABASE_PORT=5432
- ORCHARD_DATABASE_USER=orchard
- ORCHARD_DATABASE_PASSWORD=orchard_secret
- ORCHARD_DATABASE_DBNAME=orchard
- ORCHARD_DATABASE_SSLMODE=disable
- ORCHARD_S3_ENDPOINT=http://minio:9000
- ORCHARD_S3_REGION=us-east-1
- ORCHARD_S3_BUCKET=orchard-artifacts
- ORCHARD_S3_ACCESS_KEY_ID=minioadmin
- ORCHARD_S3_SECRET_ACCESS_KEY=minioadmin
- ORCHARD_S3_USE_PATH_STYLE=true
- ORCHARD_REDIS_HOST=redis
- ORCHARD_REDIS_PORT=6379
depends_on:
postgres:
condition: service_healthy
minio:
condition: service_healthy
redis:
condition: service_healthy
networks:
- orchard-network
restart: unless-stopped
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8080/health"]
interval: 30s
timeout: 3s
start_period: 10s
retries: 3
postgres:
image: postgres:16-alpine
environment:
- POSTGRES_USER=orchard
- POSTGRES_PASSWORD=orchard_secret
- POSTGRES_DB=orchard
volumes:
- postgres-data-local:/var/lib/postgresql/data
- ./migrations:/docker-entrypoint-initdb.d:ro
ports:
- "5432:5432"
healthcheck:
test: ["CMD-SHELL", "pg_isready -U orchard -d orchard"]
interval: 10s
timeout: 5s
retries: 5
networks:
- orchard-network
restart: unless-stopped
minio:
image: minio/minio:latest
command: server /data --console-address ":9001"
environment:
- MINIO_ROOT_USER=minioadmin
- MINIO_ROOT_PASSWORD=minioadmin
volumes:
- minio-data-local:/data
ports:
- "9000:9000"
- "9001:9001"
healthcheck:
test: ["CMD", "mc", "ready", "local"]
interval: 10s
timeout: 5s
retries: 5
networks:
- orchard-network
restart: unless-stopped
minio-init:
image: minio/mc:latest
depends_on:
minio:
condition: service_healthy
entrypoint: >
/bin/sh -c "
mc alias set myminio http://minio:9000 minioadmin minioadmin;
mc mb myminio/orchard-artifacts --ignore-existing;
mc anonymous set download myminio/orchard-artifacts;
exit 0;
"
networks:
- orchard-network
redis:
image: redis:7-alpine
command: redis-server --appendonly yes
volumes:
- redis-data-local:/data
ports:
- "6379:6379"
healthcheck:
test: ["CMD", "redis-cli", "ping"]
interval: 10s
timeout: 5s
retries: 5
networks:
- orchard-network
restart: unless-stopped
volumes:
postgres-data-local:
minio-data-local:
redis-data-local:
networks:
orchard-network:
driver: bridge

View File

@@ -36,7 +36,7 @@ services:
restart: unless-stopped restart: unless-stopped
postgres: postgres:
image: postgres:16-alpine image: containers.global.bsf.tools/postgres:16-alpine
environment: environment:
- POSTGRES_USER=orchard - POSTGRES_USER=orchard
- POSTGRES_PASSWORD=orchard_secret - POSTGRES_PASSWORD=orchard_secret
@@ -56,7 +56,7 @@ services:
restart: unless-stopped restart: unless-stopped
minio: minio:
image: minio/minio:latest image: containers.global.bsf.tools/minio/minio:latest
command: server /data --console-address ":9001" command: server /data --console-address ":9001"
environment: environment:
- MINIO_ROOT_USER=minioadmin - MINIO_ROOT_USER=minioadmin
@@ -76,7 +76,7 @@ services:
restart: unless-stopped restart: unless-stopped
minio-init: minio-init:
image: minio/mc:latest image: containers.global.bsf.tools/minio/mc:latest
depends_on: depends_on:
minio: minio:
condition: service_healthy condition: service_healthy
@@ -91,7 +91,7 @@ services:
- orchard-network - orchard-network
redis: redis:
image: redis:7-alpine image: containers.global.bsf.tools/redis:7-alpine
command: redis-server --appendonly yes command: redis-server --appendonly yes
volumes: volumes:
- redis-data:/data - redis-data:/data

View File

@@ -3,7 +3,6 @@ import { Link, useSearchParams } from 'react-router-dom';
import { Project, PaginatedResponse } from '../types'; import { Project, PaginatedResponse } from '../types';
import { listProjects, createProject } from '../api'; import { listProjects, createProject } from '../api';
import { Badge } from '../components/Badge'; import { Badge } from '../components/Badge';
import { SearchInput } from '../components/SearchInput';
import { SortDropdown, SortOption } from '../components/SortDropdown'; import { SortDropdown, SortOption } from '../components/SortDropdown';
import { FilterDropdown, FilterOption } from '../components/FilterDropdown'; import { FilterDropdown, FilterOption } from '../components/FilterDropdown';
import { FilterChip, FilterChipGroup } from '../components/FilterChip'; import { FilterChip, FilterChipGroup } from '../components/FilterChip';
@@ -34,7 +33,6 @@ function Home() {
// Get params from URL // Get params from URL
const page = parseInt(searchParams.get('page') || '1', 10); const page = parseInt(searchParams.get('page') || '1', 10);
const search = searchParams.get('search') || '';
const sort = searchParams.get('sort') || 'name'; const sort = searchParams.get('sort') || 'name';
const order = (searchParams.get('order') || 'asc') as 'asc' | 'desc'; const order = (searchParams.get('order') || 'asc') as 'asc' | 'desc';
const visibility = searchParams.get('visibility') || ''; const visibility = searchParams.get('visibility') || '';
@@ -59,7 +57,6 @@ function Home() {
setLoading(true); setLoading(true);
const data = await listProjects({ const data = await listProjects({
page, page,
search,
sort, sort,
order, order,
visibility: visibility as 'public' | 'private' | undefined || undefined, visibility: visibility as 'public' | 'private' | undefined || undefined,
@@ -71,7 +68,7 @@ function Home() {
} finally { } finally {
setLoading(false); setLoading(false);
} }
}, [page, search, sort, order, visibility]); }, [page, sort, order, visibility]);
useEffect(() => { useEffect(() => {
loadProjects(); loadProjects();
@@ -92,10 +89,6 @@ function Home() {
} }
} }
const handleSearchChange = (value: string) => {
updateParams({ search: value, page: '1' });
};
const handleSortChange = (newSort: string, newOrder: 'asc' | 'desc') => { const handleSortChange = (newSort: string, newOrder: 'asc' | 'desc') => {
updateParams({ sort: newSort, order: newOrder, page: '1' }); updateParams({ sort: newSort, order: newOrder, page: '1' });
}; };
@@ -112,7 +105,7 @@ function Home() {
setSearchParams({}); setSearchParams({});
}; };
const hasActiveFilters = search !== '' || visibility !== ''; const hasActiveFilters = visibility !== '';
const projects = projectsData?.items || []; const projects = projectsData?.items || [];
const pagination = projectsData?.pagination; const pagination = projectsData?.pagination;
@@ -172,12 +165,6 @@ function Home() {
)} )}
<div className="list-controls"> <div className="list-controls">
<SearchInput
value={search}
onChange={handleSearchChange}
placeholder="Search projects..."
className="list-controls__search"
/>
<FilterDropdown <FilterDropdown
label="Visibility" label="Visibility"
options={VISIBILITY_OPTIONS} options={VISIBILITY_OPTIONS}
@@ -189,7 +176,6 @@ function Home() {
{hasActiveFilters && ( {hasActiveFilters && (
<FilterChipGroup onClearAll={clearFilters}> <FilterChipGroup onClearAll={clearFilters}>
{search && <FilterChip label="Search" value={search} onRemove={() => handleSearchChange('')} />}
{visibility && ( {visibility && (
<FilterChip <FilterChip
label="Visibility" label="Visibility"

View File

@@ -325,7 +325,7 @@ function PackagePage() {
<SearchInput <SearchInput
value={search} value={search}
onChange={handleSearchChange} onChange={handleSearchChange}
placeholder="Search tags..." placeholder="Filter tags..."
className="list-controls__search" className="list-controls__search"
/> />
<SortDropdown options={SORT_OPTIONS} value={sort} order={order} onChange={handleSortChange} /> <SortDropdown options={SORT_OPTIONS} value={sort} order={order} onChange={handleSortChange} />
@@ -333,7 +333,7 @@ function PackagePage() {
{hasActiveFilters && ( {hasActiveFilters && (
<FilterChipGroup onClearAll={clearFilters}> <FilterChipGroup onClearAll={clearFilters}>
{search && <FilterChip label="Search" value={search} onRemove={() => handleSearchChange('')} />} {search && <FilterChip label="Filter" value={search} onRemove={() => handleSearchChange('')} />}
</FilterChipGroup> </FilterChipGroup>
)} )}

View File

@@ -226,7 +226,7 @@ function ProjectPage() {
<SearchInput <SearchInput
value={search} value={search}
onChange={handleSearchChange} onChange={handleSearchChange}
placeholder="Search packages..." placeholder="Filter packages..."
className="list-controls__search" className="list-controls__search"
/> />
<select <select
@@ -246,7 +246,7 @@ function ProjectPage() {
{hasActiveFilters && ( {hasActiveFilters && (
<FilterChipGroup onClearAll={clearFilters}> <FilterChipGroup onClearAll={clearFilters}>
{search && <FilterChip label="Search" value={search} onRemove={() => handleSearchChange('')} />} {search && <FilterChip label="Filter" value={search} onRemove={() => handleSearchChange('')} />}
{format && <FilterChip label="Format" value={format} onRemove={() => handleFormatChange('')} />} {format && <FilterChip label="Format" value={format} onRemove={() => handleFormatChange('')} />}
</FilterChipGroup> </FilterChipGroup>
)} )}

View File

@@ -14,6 +14,7 @@ CREATE TABLE IF NOT EXISTS projects (
CREATE INDEX idx_projects_name ON projects(name); CREATE INDEX idx_projects_name ON projects(name);
CREATE INDEX idx_projects_created_by ON projects(created_by); CREATE INDEX idx_projects_created_by ON projects(created_by);
CREATE INDEX idx_projects_public ON projects(name) WHERE is_public = true;
-- Packages (collections within projects) -- Packages (collections within projects)
CREATE TABLE IF NOT EXISTS packages ( CREATE TABLE IF NOT EXISTS packages (
@@ -21,6 +22,8 @@ CREATE TABLE IF NOT EXISTS packages (
project_id UUID NOT NULL REFERENCES projects(id) ON DELETE CASCADE, project_id UUID NOT NULL REFERENCES projects(id) ON DELETE CASCADE,
name VARCHAR(255) NOT NULL, name VARCHAR(255) NOT NULL,
description TEXT, description TEXT,
format VARCHAR(50) DEFAULT 'generic', -- package type: generic, npm, pypi, docker, etc.
platform VARCHAR(50) DEFAULT 'any', -- target platform: any, linux, darwin, windows, etc.
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(), created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(), updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
UNIQUE(project_id, name) UNIQUE(project_id, name)
@@ -28,21 +31,30 @@ CREATE TABLE IF NOT EXISTS packages (
CREATE INDEX idx_packages_project_id ON packages(project_id); CREATE INDEX idx_packages_project_id ON packages(project_id);
CREATE INDEX idx_packages_name ON packages(name); CREATE INDEX idx_packages_name ON packages(name);
CREATE INDEX idx_packages_format ON packages(format);
CREATE INDEX idx_packages_platform ON packages(platform);
-- Artifacts (Content-Addressable) -- Artifacts (Content-Addressable)
CREATE TABLE IF NOT EXISTS artifacts ( CREATE TABLE IF NOT EXISTS artifacts (
id VARCHAR(64) PRIMARY KEY, -- SHA256 hash id VARCHAR(64) PRIMARY KEY, -- SHA256 hash
size BIGINT NOT NULL, size BIGINT NOT NULL CHECK (size > 0),
content_type VARCHAR(255), content_type VARCHAR(255),
original_name VARCHAR(1024), original_name VARCHAR(1024),
checksum_md5 VARCHAR(32), -- MD5 hash for additional verification
checksum_sha1 VARCHAR(40), -- SHA1 hash for compatibility
s3_etag VARCHAR(64), -- S3 ETag for verification
metadata JSONB, -- format-specific metadata
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(), created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
created_by VARCHAR(255) NOT NULL, created_by VARCHAR(255) NOT NULL,
ref_count INTEGER DEFAULT 1, ref_count INTEGER DEFAULT 1 CHECK (ref_count >= 0),
s3_key VARCHAR(1024) NOT NULL s3_key VARCHAR(1024) NOT NULL
); );
CREATE INDEX idx_artifacts_created_at ON artifacts(created_at); CREATE INDEX idx_artifacts_created_at ON artifacts(created_at);
CREATE INDEX idx_artifacts_created_by ON artifacts(created_by); CREATE INDEX idx_artifacts_created_by ON artifacts(created_by);
CREATE INDEX idx_artifacts_metadata ON artifacts USING GIN (metadata);
CREATE INDEX idx_artifacts_checksum_md5 ON artifacts(checksum_md5) WHERE checksum_md5 IS NOT NULL;
CREATE INDEX idx_artifacts_checksum_sha1 ON artifacts(checksum_sha1) WHERE checksum_sha1 IS NOT NULL;
-- Tags (Aliases pointing to artifacts) -- Tags (Aliases pointing to artifacts)
CREATE TABLE IF NOT EXISTS tags ( CREATE TABLE IF NOT EXISTS tags (
@@ -51,12 +63,14 @@ CREATE TABLE IF NOT EXISTS tags (
name VARCHAR(255) NOT NULL, name VARCHAR(255) NOT NULL,
artifact_id VARCHAR(64) NOT NULL REFERENCES artifacts(id), artifact_id VARCHAR(64) NOT NULL REFERENCES artifacts(id),
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(), created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
created_by VARCHAR(255) NOT NULL, created_by VARCHAR(255) NOT NULL,
UNIQUE(package_id, name) UNIQUE(package_id, name)
); );
CREATE INDEX idx_tags_package_id ON tags(package_id); CREATE INDEX idx_tags_package_id ON tags(package_id);
CREATE INDEX idx_tags_artifact_id ON tags(artifact_id); CREATE INDEX idx_tags_artifact_id ON tags(artifact_id);
CREATE INDEX idx_tags_package_created_at ON tags(package_id, created_at DESC);
-- Tag History (for rollback capability) -- Tag History (for rollback capability)
CREATE TABLE IF NOT EXISTS tag_history ( CREATE TABLE IF NOT EXISTS tag_history (
@@ -64,11 +78,13 @@ CREATE TABLE IF NOT EXISTS tag_history (
tag_id UUID NOT NULL REFERENCES tags(id) ON DELETE CASCADE, tag_id UUID NOT NULL REFERENCES tags(id) ON DELETE CASCADE,
old_artifact_id VARCHAR(64) REFERENCES artifacts(id), old_artifact_id VARCHAR(64) REFERENCES artifacts(id),
new_artifact_id VARCHAR(64) NOT NULL REFERENCES artifacts(id), new_artifact_id VARCHAR(64) NOT NULL REFERENCES artifacts(id),
change_type VARCHAR(20) NOT NULL DEFAULT 'update' CHECK (change_type IN ('create', 'update', 'delete')),
changed_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(), changed_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
changed_by VARCHAR(255) NOT NULL changed_by VARCHAR(255) NOT NULL
); );
CREATE INDEX idx_tag_history_tag_id ON tag_history(tag_id); CREATE INDEX idx_tag_history_tag_id ON tag_history(tag_id);
CREATE INDEX idx_tag_history_changed_at ON tag_history(changed_at);
-- Uploads (upload event records) -- Uploads (upload event records)
CREATE TABLE IF NOT EXISTS uploads ( CREATE TABLE IF NOT EXISTS uploads (
@@ -76,6 +92,11 @@ CREATE TABLE IF NOT EXISTS uploads (
artifact_id VARCHAR(64) NOT NULL REFERENCES artifacts(id), artifact_id VARCHAR(64) NOT NULL REFERENCES artifacts(id),
package_id UUID NOT NULL REFERENCES packages(id), package_id UUID NOT NULL REFERENCES packages(id),
original_name VARCHAR(1024), original_name VARCHAR(1024),
tag_name VARCHAR(255), -- tag assigned during upload
user_agent VARCHAR(512), -- client identification
duration_ms INTEGER, -- upload timing in milliseconds
deduplicated BOOLEAN DEFAULT false, -- whether artifact was deduplicated
checksum_verified BOOLEAN DEFAULT true, -- whether checksum was verified
uploaded_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(), uploaded_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
uploaded_by VARCHAR(255) NOT NULL, uploaded_by VARCHAR(255) NOT NULL,
source_ip VARCHAR(45) source_ip VARCHAR(45)
@@ -84,6 +105,8 @@ CREATE TABLE IF NOT EXISTS uploads (
CREATE INDEX idx_uploads_artifact_id ON uploads(artifact_id); CREATE INDEX idx_uploads_artifact_id ON uploads(artifact_id);
CREATE INDEX idx_uploads_package_id ON uploads(package_id); CREATE INDEX idx_uploads_package_id ON uploads(package_id);
CREATE INDEX idx_uploads_uploaded_at ON uploads(uploaded_at); CREATE INDEX idx_uploads_uploaded_at ON uploads(uploaded_at);
CREATE INDEX idx_uploads_package_uploaded_at ON uploads(package_id, uploaded_at DESC);
CREATE INDEX idx_uploads_uploaded_by_at ON uploads(uploaded_by, uploaded_at DESC);
-- Consumers (Dependency tracking) -- Consumers (Dependency tracking)
CREATE TABLE IF NOT EXISTS consumers ( CREATE TABLE IF NOT EXISTS consumers (
@@ -141,14 +164,17 @@ CREATE INDEX idx_audit_logs_action ON audit_logs(action);
CREATE INDEX idx_audit_logs_resource ON audit_logs(resource); CREATE INDEX idx_audit_logs_resource ON audit_logs(resource);
CREATE INDEX idx_audit_logs_user_id ON audit_logs(user_id); CREATE INDEX idx_audit_logs_user_id ON audit_logs(user_id);
CREATE INDEX idx_audit_logs_timestamp ON audit_logs(timestamp); CREATE INDEX idx_audit_logs_timestamp ON audit_logs(timestamp);
CREATE INDEX idx_audit_logs_resource_timestamp ON audit_logs(resource, timestamp DESC);
CREATE INDEX idx_audit_logs_user_timestamp ON audit_logs(user_id, timestamp DESC);
CREATE INDEX idx_audit_logs_details ON audit_logs USING GIN (details);
-- Trigger to update tag history on changes -- Trigger to update tag history on changes
CREATE OR REPLACE FUNCTION track_tag_changes() CREATE OR REPLACE FUNCTION track_tag_changes()
RETURNS TRIGGER AS $$ RETURNS TRIGGER AS $$
BEGIN BEGIN
IF TG_OP = 'UPDATE' AND OLD.artifact_id != NEW.artifact_id THEN IF TG_OP = 'UPDATE' AND OLD.artifact_id != NEW.artifact_id THEN
INSERT INTO tag_history (id, tag_id, old_artifact_id, new_artifact_id, changed_at, changed_by) INSERT INTO tag_history (id, tag_id, old_artifact_id, new_artifact_id, change_type, changed_at, changed_by)
VALUES (gen_random_uuid(), NEW.id, OLD.artifact_id, NEW.artifact_id, NOW(), NEW.created_by); VALUES (gen_random_uuid(), NEW.id, OLD.artifact_id, NEW.artifact_id, 'update', NOW(), NEW.created_by);
END IF; END IF;
RETURN NEW; RETURN NEW;
END; END;
@@ -158,3 +184,72 @@ CREATE TRIGGER tag_changes_trigger
AFTER UPDATE ON tags AFTER UPDATE ON tags
FOR EACH ROW FOR EACH ROW
EXECUTE FUNCTION track_tag_changes(); EXECUTE FUNCTION track_tag_changes();
-- Trigger to auto-update updated_at timestamps
CREATE OR REPLACE FUNCTION update_updated_at_column()
RETURNS TRIGGER AS $$
BEGIN
NEW.updated_at = NOW();
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
CREATE TRIGGER projects_updated_at_trigger
BEFORE UPDATE ON projects
FOR EACH ROW
EXECUTE FUNCTION update_updated_at_column();
CREATE TRIGGER packages_updated_at_trigger
BEFORE UPDATE ON packages
FOR EACH ROW
EXECUTE FUNCTION update_updated_at_column();
CREATE TRIGGER tags_updated_at_trigger
BEFORE UPDATE ON tags
FOR EACH ROW
EXECUTE FUNCTION update_updated_at_column();
-- Triggers for maintaining artifact ref_count accuracy
CREATE OR REPLACE FUNCTION increment_artifact_ref_count()
RETURNS TRIGGER AS $$
BEGIN
UPDATE artifacts SET ref_count = ref_count + 1 WHERE id = NEW.artifact_id;
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
CREATE OR REPLACE FUNCTION decrement_artifact_ref_count()
RETURNS TRIGGER AS $$
BEGIN
UPDATE artifacts SET ref_count = ref_count - 1 WHERE id = OLD.artifact_id;
RETURN OLD;
END;
$$ LANGUAGE plpgsql;
CREATE OR REPLACE FUNCTION update_artifact_ref_count()
RETURNS TRIGGER AS $$
BEGIN
IF OLD.artifact_id != NEW.artifact_id THEN
UPDATE artifacts SET ref_count = ref_count - 1 WHERE id = OLD.artifact_id;
UPDATE artifacts SET ref_count = ref_count + 1 WHERE id = NEW.artifact_id;
END IF;
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
-- Note: ref_count triggers on tags table
-- These track how many tags reference each artifact
CREATE TRIGGER tags_ref_count_insert_trigger
AFTER INSERT ON tags
FOR EACH ROW
EXECUTE FUNCTION increment_artifact_ref_count();
CREATE TRIGGER tags_ref_count_delete_trigger
AFTER DELETE ON tags
FOR EACH ROW
EXECUTE FUNCTION decrement_artifact_ref_count();
CREATE TRIGGER tags_ref_count_update_trigger
AFTER UPDATE ON tags
FOR EACH ROW
EXECUTE FUNCTION update_artifact_ref_count();

View File

@@ -0,0 +1,170 @@
-- Migration 002: Schema Enhancements
-- Adds new fields, indexes, and triggers for improved functionality
-- ============================================
-- Packages: Add format and platform fields
-- ============================================
ALTER TABLE packages ADD COLUMN IF NOT EXISTS format VARCHAR(50) DEFAULT 'generic';
ALTER TABLE packages ADD COLUMN IF NOT EXISTS platform VARCHAR(50) DEFAULT 'any';
CREATE INDEX IF NOT EXISTS idx_packages_format ON packages(format);
CREATE INDEX IF NOT EXISTS idx_packages_platform ON packages(platform);
-- ============================================
-- Artifacts: Add checksum_md5, metadata, and CHECK constraints
-- ============================================
ALTER TABLE artifacts ADD COLUMN IF NOT EXISTS checksum_md5 VARCHAR(32);
ALTER TABLE artifacts ADD COLUMN IF NOT EXISTS metadata JSONB;
-- Add CHECK constraints (will fail if data violates them)
DO $$
BEGIN
IF NOT EXISTS (SELECT 1 FROM pg_constraint WHERE conname = 'artifacts_ref_count_check') THEN
ALTER TABLE artifacts ADD CONSTRAINT artifacts_ref_count_check CHECK (ref_count >= 0);
END IF;
IF NOT EXISTS (SELECT 1 FROM pg_constraint WHERE conname = 'artifacts_size_check') THEN
ALTER TABLE artifacts ADD CONSTRAINT artifacts_size_check CHECK (size > 0);
END IF;
END $$;
CREATE INDEX IF NOT EXISTS idx_artifacts_metadata ON artifacts USING GIN (metadata);
-- ============================================
-- Tags: Add updated_at and composite index
-- ============================================
ALTER TABLE tags ADD COLUMN IF NOT EXISTS updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW();
CREATE INDEX IF NOT EXISTS idx_tags_package_created_at ON tags(package_id, created_at DESC);
-- ============================================
-- Tag History: Add change_type and index
-- ============================================
ALTER TABLE tag_history ADD COLUMN IF NOT EXISTS change_type VARCHAR(20) DEFAULT 'update';
DO $$
BEGIN
IF NOT EXISTS (SELECT 1 FROM pg_constraint WHERE conname = 'tag_history_change_type_check') THEN
ALTER TABLE tag_history ADD CONSTRAINT tag_history_change_type_check
CHECK (change_type IN ('create', 'update', 'delete'));
END IF;
END $$;
CREATE INDEX IF NOT EXISTS idx_tag_history_changed_at ON tag_history(changed_at);
-- ============================================
-- Uploads: Add new fields and composite indexes
-- ============================================
ALTER TABLE uploads ADD COLUMN IF NOT EXISTS tag_name VARCHAR(255);
ALTER TABLE uploads ADD COLUMN IF NOT EXISTS user_agent VARCHAR(512);
ALTER TABLE uploads ADD COLUMN IF NOT EXISTS duration_ms INTEGER;
ALTER TABLE uploads ADD COLUMN IF NOT EXISTS deduplicated BOOLEAN DEFAULT false;
ALTER TABLE uploads ADD COLUMN IF NOT EXISTS checksum_verified BOOLEAN DEFAULT true;
CREATE INDEX IF NOT EXISTS idx_uploads_package_uploaded_at ON uploads(package_id, uploaded_at DESC);
CREATE INDEX IF NOT EXISTS idx_uploads_uploaded_by_at ON uploads(uploaded_by, uploaded_at DESC);
-- ============================================
-- Audit Logs: Add composite indexes and GIN index
-- ============================================
CREATE INDEX IF NOT EXISTS idx_audit_logs_resource_timestamp ON audit_logs(resource, timestamp DESC);
CREATE INDEX IF NOT EXISTS idx_audit_logs_user_timestamp ON audit_logs(user_id, timestamp DESC);
CREATE INDEX IF NOT EXISTS idx_audit_logs_details ON audit_logs USING GIN (details);
-- ============================================
-- Projects: Add partial index for public projects
-- ============================================
CREATE INDEX IF NOT EXISTS idx_projects_public ON projects(name) WHERE is_public = true;
-- ============================================
-- Triggers: Update tag_changes trigger for change_type
-- ============================================
CREATE OR REPLACE FUNCTION track_tag_changes()
RETURNS TRIGGER AS $$
BEGIN
IF TG_OP = 'UPDATE' AND OLD.artifact_id != NEW.artifact_id THEN
INSERT INTO tag_history (id, tag_id, old_artifact_id, new_artifact_id, change_type, changed_at, changed_by)
VALUES (gen_random_uuid(), NEW.id, OLD.artifact_id, NEW.artifact_id, 'update', NOW(), NEW.created_by);
END IF;
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
-- ============================================
-- Triggers: Auto-update updated_at timestamps
-- ============================================
CREATE OR REPLACE FUNCTION update_updated_at_column()
RETURNS TRIGGER AS $$
BEGIN
NEW.updated_at = NOW();
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
-- Drop triggers if they exist, then recreate
DROP TRIGGER IF EXISTS projects_updated_at_trigger ON projects;
CREATE TRIGGER projects_updated_at_trigger
BEFORE UPDATE ON projects
FOR EACH ROW
EXECUTE FUNCTION update_updated_at_column();
DROP TRIGGER IF EXISTS packages_updated_at_trigger ON packages;
CREATE TRIGGER packages_updated_at_trigger
BEFORE UPDATE ON packages
FOR EACH ROW
EXECUTE FUNCTION update_updated_at_column();
DROP TRIGGER IF EXISTS tags_updated_at_trigger ON tags;
CREATE TRIGGER tags_updated_at_trigger
BEFORE UPDATE ON tags
FOR EACH ROW
EXECUTE FUNCTION update_updated_at_column();
-- ============================================
-- Triggers: Maintain artifact ref_count accuracy
-- ============================================
CREATE OR REPLACE FUNCTION increment_artifact_ref_count()
RETURNS TRIGGER AS $$
BEGIN
UPDATE artifacts SET ref_count = ref_count + 1 WHERE id = NEW.artifact_id;
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
CREATE OR REPLACE FUNCTION decrement_artifact_ref_count()
RETURNS TRIGGER AS $$
BEGIN
UPDATE artifacts SET ref_count = ref_count - 1 WHERE id = OLD.artifact_id;
RETURN OLD;
END;
$$ LANGUAGE plpgsql;
CREATE OR REPLACE FUNCTION update_artifact_ref_count()
RETURNS TRIGGER AS $$
BEGIN
IF OLD.artifact_id != NEW.artifact_id THEN
UPDATE artifacts SET ref_count = ref_count - 1 WHERE id = OLD.artifact_id;
UPDATE artifacts SET ref_count = ref_count + 1 WHERE id = NEW.artifact_id;
END IF;
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
-- Note: ref_count triggers on tags table
-- These track how many tags reference each artifact
DROP TRIGGER IF EXISTS tags_ref_count_insert_trigger ON tags;
CREATE TRIGGER tags_ref_count_insert_trigger
AFTER INSERT ON tags
FOR EACH ROW
EXECUTE FUNCTION increment_artifact_ref_count();
DROP TRIGGER IF EXISTS tags_ref_count_delete_trigger ON tags;
CREATE TRIGGER tags_ref_count_delete_trigger
AFTER DELETE ON tags
FOR EACH ROW
EXECUTE FUNCTION decrement_artifact_ref_count();
DROP TRIGGER IF EXISTS tags_ref_count_update_trigger ON tags;
CREATE TRIGGER tags_ref_count_update_trigger
AFTER UPDATE ON tags
FOR EACH ROW
EXECUTE FUNCTION update_artifact_ref_count();

View File

@@ -0,0 +1,12 @@
-- Migration 003: Additional Checksum Fields
-- Adds checksum_sha1 and s3_etag fields to artifacts table
-- ============================================
-- Artifacts: Add checksum_sha1 and s3_etag fields
-- ============================================
ALTER TABLE artifacts ADD COLUMN IF NOT EXISTS checksum_sha1 VARCHAR(40);
ALTER TABLE artifacts ADD COLUMN IF NOT EXISTS s3_etag VARCHAR(64);
-- Create indexes for checksum lookups (optional, for verification queries)
CREATE INDEX IF NOT EXISTS idx_artifacts_checksum_md5 ON artifacts(checksum_md5) WHERE checksum_md5 IS NOT NULL;
CREATE INDEX IF NOT EXISTS idx_artifacts_checksum_sha1 ON artifacts(checksum_sha1) WHERE checksum_sha1 IS NOT NULL;

2
tasks.py Normal file
View File

@@ -0,0 +1,2 @@
# Import docker project template tasks
from prosper.projects.docker import *