1 Commits

Author SHA1 Message Date
Mondo Diaz
ec6b3f0ed8 Implement database storage layer
- Add connection pool configuration (pool_size, max_overflow, timeout, recycle)
- Add transaction management utilities (transaction, savepoint, retry_on_deadlock)
- Create repository pattern classes for all entities (Project, Package, Artifact, Tag, Upload)
- Implement ref_count decrement and cleanup service
- Add query helper functions (search, filtering, pagination, stats)
- Add database constraints (check_ref_count_non_negative, check_size_positive)
- Add performance indexes (idx_artifacts_ref_count, composite indexes for packages/tags)
- Initialize Alembic migrations for future schema changes
2025-12-12 12:18:01 -06:00
34 changed files with 186 additions and 2471 deletions

View File

@@ -1,21 +1,26 @@
include:
- project: 'esv/bsf/pypi/prosper'
ref: v0.64.1
file: '/prosper/templates/projects/docker.yml'
stages:
- test
- build
- publish
# - deploy
variables:
# renovate: datasource=gitlab-tags depName=esv/bsf/pypi/prosper versioning=semver registryUrl=https://gitlab.global.bsf.tools
PROSPER_VERSION: v0.64.1
kics:
allow_failure: true
hadolint:
allow_failure: true
# Container registry settings
REGISTRY: ${CI_REGISTRY}
IMAGE_NAME: ${CI_REGISTRY_IMAGE}
# Buildah settings
STORAGE_DRIVER: vfs
BUILDAH_FORMAT: docker
BUILDAH_ISOLATION: chroot
.buildah-base:
image: deps.global.bsf.tools/quay.io/buildah/stable:latest
before_script:
- buildah version
- buildah login -u ${CI_REGISTRY_USER} -p ${CI_REGISTRY_PASSWORD} ${CI_REGISTRY}
# Run Python tests
python_tests:
test:
stage: test
image: deps.global.bsf.tools/docker/python:3.12-slim
before_script:
@@ -24,6 +29,47 @@ python_tests:
script:
- cd backend
- python -m pytest -v || echo "No tests yet"
rules:
- if: $CI_PIPELINE_SOURCE == "merge_request_event"
- if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH
# Build container image for merge requests (no push)
build:
stage: build
extends: .buildah-base
script:
- |
buildah build \
--build-arg NPM_REGISTRY=https://deps.global.bsf.tools/artifactory/api/npm/registry.npmjs.org/ \
--tag ${IMAGE_NAME}:${CI_COMMIT_SHORT_SHA} \
--label org.opencontainers.image.source=${CI_PROJECT_URL} \
--label org.opencontainers.image.revision=${CI_COMMIT_SHA} \
--label org.opencontainers.image.created=$(date -u +%Y-%m-%dT%H:%M:%SZ) \
.
rules:
- if: $CI_PIPELINE_SOURCE == "merge_request_event"
# Build and push on main branch
publish:
stage: publish
extends: .buildah-base
script:
- |
buildah build \
--build-arg NPM_REGISTRY=https://deps.global.bsf.tools/artifactory/api/npm/registry.npmjs.org/ \
--tag ${IMAGE_NAME}:${CI_COMMIT_SHORT_SHA} \
--tag ${IMAGE_NAME}:${CI_COMMIT_REF_SLUG} \
--tag ${IMAGE_NAME}:latest \
--label org.opencontainers.image.source=${CI_PROJECT_URL} \
--label org.opencontainers.image.revision=${CI_COMMIT_SHA} \
--label org.opencontainers.image.created=$(date -u +%Y-%m-%dT%H:%M:%SZ) \
.
- buildah push ${IMAGE_NAME}:${CI_COMMIT_SHORT_SHA}
- buildah push ${IMAGE_NAME}:${CI_COMMIT_REF_SLUG}
- buildah push ${IMAGE_NAME}:latest
rules:
- if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH
# deploy_helm_charts:
# stage: deploy

View File

@@ -1,51 +0,0 @@
# Changelog
All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
## [Unreleased]
### Added
- Added presigned URL support for direct S3 downloads (#48)
- Added `ORCHARD_DOWNLOAD_MODE` config option (`presigned`, `redirect`, `proxy`) (#48)
- Added `ORCHARD_PRESIGNED_URL_EXPIRY` config option (default: 3600 seconds) (#48)
- Added `?mode=` query parameter to override download mode per-request (#48)
- Added `/api/v1/project/{project}/{package}/+/{ref}/url` endpoint for getting presigned URLs (#48)
- Added `PresignedUrlResponse` schema with URL, expiry, checksums, and artifact metadata (#48)
- Added MinIO ingress support in Helm chart for presigned URL access (#48)
- Added `orchard.download.mode` and `orchard.download.presignedUrlExpiry` Helm values (#48)
- Added integrity verification workflow design document (#24)
- Added `sha256` field to API responses for clarity (alias of `id`) (#25)
- Added `checksum_sha1` field to artifacts table for compatibility (#25)
- Added `s3_etag` field to artifacts table for S3 verification (#25)
- Compute and store MD5, SHA1, and S3 ETag alongside SHA256 during upload (#25)
- Added `Dockerfile.local` and `docker-compose.local.yml` for local development (#25)
- Added migration script `003_checksum_fields.sql` for existing databases (#25)
### Changed
- Changed default download mode from `proxy` to `presigned` for better performance (#48)
### Fixed
- Fixed Helm chart `minio.ingress` conflicting with Bitnami MinIO subchart by renaming to `minioIngress` (#48)
## [0.2.0] - 2025-12-15
### Added
- Added `format` and `platform` fields to packages table (#16)
- Added `checksum_md5` and `metadata` JSONB fields to artifacts table (#16)
- Added `updated_at` field to tags table (#16)
- Added `tag_name`, `user_agent`, `duration_ms`, `deduplicated`, `checksum_verified` fields to uploads table (#16)
- Added `change_type` field to tag_history table (#16)
- Added composite indexes for common query patterns (#16)
- Added GIN indexes on JSONB fields for efficient JSON queries (#16)
- Added partial index for public projects (#16)
- Added database triggers for `updated_at` timestamps (#16)
- Added database triggers for maintaining artifact `ref_count` accuracy (#16)
- Added CHECK constraints for data integrity (`size > 0`, `ref_count >= 0`) (#16)
- Added migration script `002_schema_enhancements.sql` for existing databases (#16)
### Changed
- Updated images to use internal container BSF proxy (#46)
## [0.1.0] - 2025-12-12
### Added
- Added Prosper docker template config (#45)
### Changed
- Changed the Dockerfile npm build arg to use the deps.global.bsf.tools URL as the default registry (#45)

View File

@@ -1,7 +1,7 @@
# Frontend build stage
FROM containers.global.bsf.tools/node:20-alpine AS frontend-builder
FROM node:20-alpine AS frontend-builder
ARG NPM_REGISTRY=https://deps.global.bsf.tools/artifactory/api/npm/registry.npmjs.org/
ARG NPM_REGISTRY
WORKDIR /app/frontend
@@ -19,10 +19,7 @@ COPY frontend/ ./
RUN npm run build
# Runtime stage
FROM containers.global.bsf.tools/python:3.12-slim
# Disable proxy cache
RUN echo 'Acquire::http::Pipeline-Depth 0;\nAcquire::http::No-Cache true;\nAcquire::BrokenProxy true;\n' > /etc/apt/apt.conf.d/99fixbadproxy
FROM python:3.12-slim
# Install system dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \

View File

@@ -1,50 +0,0 @@
# Frontend build stage
FROM node:20-alpine AS frontend-builder
WORKDIR /app/frontend
# Copy package files
COPY frontend/package*.json ./
RUN npm install
# Copy frontend source
COPY frontend/ ./
# Build frontend
RUN npm run build
# Runtime stage
FROM python:3.12-slim
# Install system dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
curl \
&& rm -rf /var/lib/apt/lists/*
# Create non-root user
RUN groupadd -g 1000 orchard && \
useradd -u 1000 -g orchard -s /bin/bash -m orchard
WORKDIR /app
# Copy requirements and install Python dependencies
COPY backend/requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
# Copy backend source
COPY backend/ ./backend/
# Copy frontend build
COPY --from=frontend-builder /app/frontend/dist ./frontend/dist
# Set ownership
RUN chown -R orchard:orchard /app
USER orchard
EXPOSE 8080
HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
CMD curl -f http://localhost:8080/health || exit 1
CMD ["uvicorn", "backend.app.main:app", "--host", "0.0.0.0", "--port", "8080"]

View File

@@ -60,8 +60,7 @@ Orchard is a centralized binary artifact storage system that provides content-ad
| `GET` | `/api/v1/project/:project/packages/:package` | Get single package with metadata |
| `POST` | `/api/v1/project/:project/packages` | Create a new package |
| `POST` | `/api/v1/project/:project/:package/upload` | Upload an artifact |
| `GET` | `/api/v1/project/:project/:package/+/:ref` | Download an artifact (supports Range header, mode param) |
| `GET` | `/api/v1/project/:project/:package/+/:ref/url` | Get presigned URL for direct S3 download |
| `GET` | `/api/v1/project/:project/:package/+/:ref` | Download an artifact (supports Range header) |
| `HEAD` | `/api/v1/project/:project/:package/+/:ref` | Get artifact metadata without downloading |
| `GET` | `/api/v1/project/:project/:package/tags` | List tags (with pagination, search, sorting, artifact metadata) |
| `POST` | `/api/v1/project/:project/:package/tags` | Create a tag |
@@ -276,64 +275,22 @@ curl -X POST http://localhost:8080/api/v1/project/my-project/releases/upload/abc
### Download an Artifact
```bash
# By tag (use -OJ to save with the correct filename from Content-Disposition header)
curl -OJ http://localhost:8080/api/v1/project/my-project/releases/+/v1.0.0
# By tag
curl -O http://localhost:8080/api/v1/project/my-project/releases/+/v1.0.0
# By artifact ID
curl -OJ http://localhost:8080/api/v1/project/my-project/releases/+/artifact:a3f5d8e12b4c6789...
curl -O http://localhost:8080/api/v1/project/my-project/releases/+/artifact:a3f5d8e12b4c6789...
# Using the short URL pattern
curl -OJ http://localhost:8080/project/my-project/releases/+/latest
# Save to a specific filename
curl -o myfile.tar.gz http://localhost:8080/api/v1/project/my-project/releases/+/v1.0.0
curl -O http://localhost:8080/project/my-project/releases/+/latest
# Partial download (range request)
curl -H "Range: bytes=0-1023" http://localhost:8080/api/v1/project/my-project/releases/+/v1.0.0
# Check file info without downloading (HEAD request)
curl -I http://localhost:8080/api/v1/project/my-project/releases/+/v1.0.0
# Download with specific mode (presigned, redirect, or proxy)
curl "http://localhost:8080/api/v1/project/my-project/releases/+/v1.0.0?mode=proxy"
# Get presigned URL for direct S3 download
curl http://localhost:8080/api/v1/project/my-project/releases/+/v1.0.0/url
```
> **Note on curl flags:**
> - `-O` saves the file using the URL path as the filename (e.g., `latest`, `v1.0.0`)
> - `-J` tells curl to use the filename from the `Content-Disposition` header (e.g., `app-v1.0.0.tar.gz`)
> - `-OJ` combines both: download to a file using the server-provided filename
> - `-o <filename>` saves to a specific filename you choose
#### Download Modes
Orchard supports three download modes, configurable via `ORCHARD_DOWNLOAD_MODE` or per-request with `?mode=`:
| Mode | Description | Use Case |
|------|-------------|----------|
| `presigned` (default) | Returns JSON with a presigned S3 URL | Clients that handle redirects themselves, web UIs |
| `redirect` | Returns HTTP 302 redirect to presigned S3 URL | Simple clients, browsers, wget |
| `proxy` | Streams content through the backend | When S3 isn't directly accessible to clients |
**Presigned URL Response:**
```json
{
"url": "https://minio.example.com/bucket/...",
"expires_at": "2025-01-01T01:00:00Z",
"method": "GET",
"artifact_id": "a3f5d8e...",
"size": 1048576,
"content_type": "application/gzip",
"original_name": "app-v1.0.0.tar.gz",
"checksum_sha256": "a3f5d8e...",
"checksum_md5": "d41d8cd..."
}
```
> **Note:** For presigned URLs to work, clients must be able to reach the S3 endpoint directly. In Kubernetes, this requires exposing MinIO via ingress (see Helm configuration below).
### Create a Tag
```bash
@@ -519,8 +476,6 @@ Configuration is provided via environment variables prefixed with `ORCHARD_`:
| `ORCHARD_S3_BUCKET` | S3 bucket name | `orchard-artifacts` |
| `ORCHARD_S3_ACCESS_KEY_ID` | S3 access key | - |
| `ORCHARD_S3_SECRET_ACCESS_KEY` | S3 secret key | - |
| `ORCHARD_DOWNLOAD_MODE` | Download mode: `presigned`, `redirect`, or `proxy` | `presigned` |
| `ORCHARD_PRESIGNED_URL_EXPIRY` | Presigned URL expiry in seconds | `3600` |
## Kubernetes Deployment
@@ -541,31 +496,6 @@ helm install orchard ./helm/orchard -n orchard --create-namespace
helm install orchard ./helm/orchard -f my-values.yaml
```
### Helm Configuration
Key configuration options in `values.yaml`:
```yaml
orchard:
# Download configuration
download:
mode: "presigned" # presigned, redirect, or proxy
presignedUrlExpiry: 3600
# MinIO ingress (required for presigned URL downloads)
minioIngress:
enabled: true
className: "nginx"
annotations:
cert-manager.io/cluster-issuer: "letsencrypt"
host: "minio.your-domain.com"
tls:
enabled: true
secretName: minio-tls
```
When `minioIngress.enabled` is `true`, the S3 endpoint automatically uses the external URL (`https://minio.your-domain.com`), making presigned URLs accessible to external clients.
See `helm/orchard/values.yaml` for all configuration options.
## Database Schema

View File

@@ -32,10 +32,6 @@ class Settings(BaseSettings):
s3_secret_access_key: str = ""
s3_use_path_style: bool = True
# Download settings
download_mode: str = "presigned" # "presigned", "redirect", or "proxy"
presigned_url_expiry: int = 3600 # Presigned URL expiry in seconds (default: 1 hour)
@property
def database_url(self) -> str:
sslmode = f"?sslmode={self.database_sslmode}" if self.database_sslmode else ""

View File

@@ -73,33 +73,15 @@ class Artifact(Base):
size = Column(BigInteger, nullable=False)
content_type = Column(String(255))
original_name = Column(String(1024))
checksum_md5 = Column(String(32)) # MD5 hash for additional verification
checksum_sha1 = Column(String(40)) # SHA1 hash for compatibility
s3_etag = Column(String(64)) # S3 ETag for verification
artifact_metadata = Column("metadata", JSON, default=dict) # Format-specific metadata (column name is 'metadata')
created_at = Column(DateTime(timezone=True), default=datetime.utcnow)
created_by = Column(String(255), nullable=False)
ref_count = Column(Integer, default=1)
s3_key = Column(String(1024), nullable=False)
format_metadata = Column(JSON, default=dict) # Format-specific metadata (version, etc.)
tags = relationship("Tag", back_populates="artifact")
uploads = relationship("Upload", back_populates="artifact")
@property
def sha256(self) -> str:
"""Alias for id - the SHA256 hash of the artifact content"""
return self.id
@property
def format_metadata(self):
"""Alias for artifact_metadata - backward compatibility"""
return self.artifact_metadata
@format_metadata.setter
def format_metadata(self, value):
"""Alias setter for artifact_metadata - backward compatibility"""
self.artifact_metadata = value
__table_args__ = (
Index("idx_artifacts_created_at", "created_at"),
Index("idx_artifacts_created_by", "created_by"),
@@ -117,7 +99,6 @@ class Tag(Base):
name = Column(String(255), nullable=False)
artifact_id = Column(String(64), ForeignKey("artifacts.id"), nullable=False)
created_at = Column(DateTime(timezone=True), default=datetime.utcnow)
updated_at = Column(DateTime(timezone=True), default=datetime.utcnow, onupdate=datetime.utcnow)
created_by = Column(String(255), nullable=False)
package = relationship("Package", back_populates="tags")
@@ -139,7 +120,6 @@ class TagHistory(Base):
tag_id = Column(UUID(as_uuid=True), ForeignKey("tags.id", ondelete="CASCADE"), nullable=False)
old_artifact_id = Column(String(64), ForeignKey("artifacts.id"))
new_artifact_id = Column(String(64), ForeignKey("artifacts.id"), nullable=False)
change_type = Column(String(20), nullable=False, default="update")
changed_at = Column(DateTime(timezone=True), default=datetime.utcnow)
changed_by = Column(String(255), nullable=False)
@@ -147,8 +127,6 @@ class TagHistory(Base):
__table_args__ = (
Index("idx_tag_history_tag_id", "tag_id"),
Index("idx_tag_history_changed_at", "changed_at"),
CheckConstraint("change_type IN ('create', 'update', 'delete')", name="check_change_type"),
)
@@ -159,11 +137,6 @@ class Upload(Base):
artifact_id = Column(String(64), ForeignKey("artifacts.id"), nullable=False)
package_id = Column(UUID(as_uuid=True), ForeignKey("packages.id"), nullable=False)
original_name = Column(String(1024))
tag_name = Column(String(255)) # Tag assigned during upload
user_agent = Column(String(512)) # Client identification
duration_ms = Column(Integer) # Upload timing in milliseconds
deduplicated = Column(Boolean, default=False) # Whether artifact was deduplicated
checksum_verified = Column(Boolean, default=True) # Whether checksum was verified
uploaded_at = Column(DateTime(timezone=True), default=datetime.utcnow)
uploaded_by = Column(String(255), nullable=False)
source_ip = Column(String(45))
@@ -175,8 +148,6 @@ class Upload(Base):
Index("idx_uploads_artifact_id", "artifact_id"),
Index("idx_uploads_package_id", "package_id"),
Index("idx_uploads_uploaded_at", "uploaded_at"),
Index("idx_uploads_package_uploaded_at", "package_id", "uploaded_at"),
Index("idx_uploads_uploaded_by_at", "uploaded_by", "uploaded_at"),
)
@@ -249,6 +220,4 @@ class AuditLog(Base):
Index("idx_audit_logs_resource", "resource"),
Index("idx_audit_logs_user_id", "user_id"),
Index("idx_audit_logs_timestamp", "timestamp"),
Index("idx_audit_logs_resource_timestamp", "resource", "timestamp"),
Index("idx_audit_logs_user_timestamp", "user_id", "timestamp"),
)

View File

@@ -1,9 +1,9 @@
from datetime import datetime, timedelta, timezone
from datetime import datetime
from fastapi import APIRouter, Depends, HTTPException, UploadFile, File, Form, Request, Query, Header, Response
from fastapi.responses import StreamingResponse, RedirectResponse
from fastapi.responses import StreamingResponse
from sqlalchemy.orm import Session
from sqlalchemy import or_, func
from typing import List, Optional, Literal
from typing import List, Optional
import math
import re
import io
@@ -28,11 +28,8 @@ from .schemas import (
ResumableUploadCompleteRequest,
ResumableUploadCompleteResponse,
ResumableUploadStatusResponse,
GlobalSearchResponse, SearchResultProject, SearchResultPackage, SearchResultArtifact,
PresignedUrlResponse,
)
from .metadata import extract_metadata
from .config import get_settings
router = APIRouter()
@@ -54,155 +51,32 @@ def health_check():
return HealthResponse(status="ok")
# Global search
@router.get("/api/v1/search", response_model=GlobalSearchResponse)
def global_search(
request: Request,
q: str = Query(..., min_length=1, description="Search query"),
limit: int = Query(default=5, ge=1, le=20, description="Results per type"),
db: Session = Depends(get_db),
):
"""
Search across all entity types (projects, packages, artifacts/tags).
Returns limited results for each type plus total counts.
"""
user_id = get_user_id(request)
search_lower = q.lower()
# Search projects (name and description)
project_query = db.query(Project).filter(
or_(Project.is_public == True, Project.created_by == user_id),
or_(
func.lower(Project.name).contains(search_lower),
func.lower(Project.description).contains(search_lower)
)
)
project_count = project_query.count()
projects = project_query.order_by(Project.name).limit(limit).all()
# Search packages (name and description) with project name
package_query = db.query(Package, Project.name.label("project_name")).join(
Project, Package.project_id == Project.id
).filter(
or_(Project.is_public == True, Project.created_by == user_id),
or_(
func.lower(Package.name).contains(search_lower),
func.lower(Package.description).contains(search_lower)
)
)
package_count = package_query.count()
package_results = package_query.order_by(Package.name).limit(limit).all()
# Search tags/artifacts (tag name and original filename)
artifact_query = db.query(
Tag, Artifact, Package.name.label("package_name"), Project.name.label("project_name")
).join(
Artifact, Tag.artifact_id == Artifact.id
).join(
Package, Tag.package_id == Package.id
).join(
Project, Package.project_id == Project.id
).filter(
or_(Project.is_public == True, Project.created_by == user_id),
or_(
func.lower(Tag.name).contains(search_lower),
func.lower(Artifact.original_name).contains(search_lower)
)
)
artifact_count = artifact_query.count()
artifact_results = artifact_query.order_by(Tag.name).limit(limit).all()
return GlobalSearchResponse(
query=q,
projects=[SearchResultProject(
id=p.id,
name=p.name,
description=p.description,
is_public=p.is_public
) for p in projects],
packages=[SearchResultPackage(
id=pkg.id,
project_id=pkg.project_id,
project_name=project_name,
name=pkg.name,
description=pkg.description,
format=pkg.format
) for pkg, project_name in package_results],
artifacts=[SearchResultArtifact(
tag_id=tag.id,
tag_name=tag.name,
artifact_id=artifact.id,
package_id=tag.package_id,
package_name=package_name,
project_name=project_name,
original_name=artifact.original_name
) for tag, artifact, package_name, project_name in artifact_results],
counts={
"projects": project_count,
"packages": package_count,
"artifacts": artifact_count,
"total": project_count + package_count + artifact_count
}
)
# Project routes
@router.get("/api/v1/projects", response_model=PaginatedResponse[ProjectResponse])
def list_projects(
request: Request,
page: int = Query(default=1, ge=1, description="Page number"),
limit: int = Query(default=20, ge=1, le=100, description="Items per page"),
search: Optional[str] = Query(default=None, description="Search by project name or description"),
visibility: Optional[str] = Query(default=None, description="Filter by visibility (public, private)"),
sort: str = Query(default="name", description="Sort field (name, created_at, updated_at)"),
order: str = Query(default="asc", description="Sort order (asc, desc)"),
search: Optional[str] = Query(default=None, description="Search by project name"),
db: Session = Depends(get_db),
):
user_id = get_user_id(request)
# Validate sort field
valid_sort_fields = {"name": Project.name, "created_at": Project.created_at, "updated_at": Project.updated_at}
if sort not in valid_sort_fields:
raise HTTPException(status_code=400, detail=f"Invalid sort field. Must be one of: {', '.join(valid_sort_fields.keys())}")
# Validate order
if order not in ("asc", "desc"):
raise HTTPException(status_code=400, detail="Invalid order. Must be 'asc' or 'desc'")
# Base query - filter by access
query = db.query(Project).filter(
or_(Project.is_public == True, Project.created_by == user_id)
)
# Apply visibility filter
if visibility == "public":
query = query.filter(Project.is_public == True)
elif visibility == "private":
query = query.filter(Project.is_public == False, Project.created_by == user_id)
# Apply search filter (case-insensitive on name and description)
# Apply search filter (case-insensitive)
if search:
search_lower = search.lower()
query = query.filter(
or_(
func.lower(Project.name).contains(search_lower),
func.lower(Project.description).contains(search_lower)
)
)
query = query.filter(func.lower(Project.name).contains(search.lower()))
# Get total count before pagination
total = query.count()
# Apply sorting
sort_column = valid_sort_fields[sort]
if order == "desc":
query = query.order_by(sort_column.desc())
else:
query = query.order_by(sort_column.asc())
# Apply pagination
offset = (page - 1) * limit
projects = query.offset(offset).limit(limit).all()
projects = query.order_by(Project.name).offset(offset).limit(limit).all()
# Calculate total pages
total_pages = math.ceil(total / limit) if total > 0 else 1
@@ -522,51 +396,40 @@ def upload_artifact(
)
# Store file (uses multipart for large files)
storage_result = storage.store(file.file, content_length)
sha256_hash, size, s3_key = storage.store(file.file, content_length)
# Check if this is a deduplicated upload
deduplicated = False
# Create or update artifact record
artifact = db.query(Artifact).filter(Artifact.id == storage_result.sha256).first()
artifact = db.query(Artifact).filter(Artifact.id == sha256_hash).first()
if artifact:
artifact.ref_count += 1
deduplicated = True
# Merge metadata if new metadata was extracted
if file_metadata and artifact.artifact_metadata:
artifact.artifact_metadata = {**artifact.artifact_metadata, **file_metadata}
if file_metadata and artifact.format_metadata:
artifact.format_metadata = {**artifact.format_metadata, **file_metadata}
elif file_metadata:
artifact.artifact_metadata = file_metadata
# Update checksums if not already set
if not artifact.checksum_md5 and storage_result.md5:
artifact.checksum_md5 = storage_result.md5
if not artifact.checksum_sha1 and storage_result.sha1:
artifact.checksum_sha1 = storage_result.sha1
if not artifact.s3_etag and storage_result.s3_etag:
artifact.s3_etag = storage_result.s3_etag
artifact.format_metadata = file_metadata
else:
artifact = Artifact(
id=storage_result.sha256,
size=storage_result.size,
id=sha256_hash,
size=size,
content_type=file.content_type,
original_name=file.filename,
checksum_md5=storage_result.md5,
checksum_sha1=storage_result.sha1,
s3_etag=storage_result.s3_etag,
created_by=user_id,
s3_key=storage_result.s3_key,
artifact_metadata=file_metadata or {},
s3_key=s3_key,
format_metadata=file_metadata or {},
)
db.add(artifact)
# Record upload
upload = Upload(
artifact_id=storage_result.sha256,
artifact_id=sha256_hash,
package_id=package.id,
original_name=file.filename,
uploaded_by=user_id,
source_ip=request.client.host if request.client else None,
deduplicated=deduplicated,
)
db.add(upload)
@@ -574,13 +437,13 @@ def upload_artifact(
if tag:
existing_tag = db.query(Tag).filter(Tag.package_id == package.id, Tag.name == tag).first()
if existing_tag:
existing_tag.artifact_id = storage_result.sha256
existing_tag.artifact_id = sha256_hash
existing_tag.created_by = user_id
else:
new_tag = Tag(
package_id=package.id,
name=tag,
artifact_id=storage_result.sha256,
artifact_id=sha256_hash,
created_by=user_id,
)
db.add(new_tag)
@@ -588,16 +451,12 @@ def upload_artifact(
db.commit()
return UploadResponse(
artifact_id=storage_result.sha256,
sha256=storage_result.sha256,
size=storage_result.size,
artifact_id=sha256_hash,
size=size,
project=project_name,
package=package_name,
tag=tag,
checksum_md5=storage_result.md5,
checksum_sha1=storage_result.sha1,
s3_etag=storage_result.s3_etag,
format_metadata=artifact.artifact_metadata,
format_metadata=artifact.format_metadata,
deduplicated=deduplicated,
)
@@ -846,13 +705,27 @@ def get_upload_status(
raise HTTPException(status_code=404, detail=str(e))
# Helper function to resolve artifact reference
def _resolve_artifact_ref(
# Download artifact with range request support
@router.get("/api/v1/project/{project_name}/{package_name}/+/{ref}")
def download_artifact(
project_name: str,
package_name: str,
ref: str,
package: Package,
db: Session,
) -> Optional[Artifact]:
"""Resolve a reference (tag name, artifact:hash, tag:name) to an artifact"""
request: Request,
db: Session = Depends(get_db),
storage: S3Storage = Depends(get_storage),
range: Optional[str] = Header(None),
):
# Get project and package
project = db.query(Project).filter(Project.name == project_name).first()
if not project:
raise HTTPException(status_code=404, detail="Project not found")
package = db.query(Package).filter(Package.project_id == project.id, Package.name == package_name).first()
if not package:
raise HTTPException(status_code=404, detail="Package not found")
# Resolve reference to artifact
artifact = None
# Check for explicit prefixes
@@ -873,76 +746,11 @@ def _resolve_artifact_ref(
# Try as direct artifact ID
artifact = db.query(Artifact).filter(Artifact.id == ref).first()
return artifact
# Download artifact with range request support and download modes
@router.get("/api/v1/project/{project_name}/{package_name}/+/{ref}")
def download_artifact(
project_name: str,
package_name: str,
ref: str,
request: Request,
db: Session = Depends(get_db),
storage: S3Storage = Depends(get_storage),
range: Optional[str] = Header(None),
mode: Optional[Literal["proxy", "redirect", "presigned"]] = Query(
default=None,
description="Download mode: proxy (stream through backend), redirect (302 to presigned URL), presigned (return JSON with URL)"
),
):
settings = get_settings()
# Get project and package
project = db.query(Project).filter(Project.name == project_name).first()
if not project:
raise HTTPException(status_code=404, detail="Project not found")
package = db.query(Package).filter(Package.project_id == project.id, Package.name == package_name).first()
if not package:
raise HTTPException(status_code=404, detail="Package not found")
# Resolve reference to artifact
artifact = _resolve_artifact_ref(ref, package, db)
if not artifact:
raise HTTPException(status_code=404, detail="Artifact not found")
filename = artifact.original_name or f"{artifact.id}"
# Determine download mode (query param overrides server default)
download_mode = mode or settings.download_mode
# Handle presigned mode - return JSON with presigned URL
if download_mode == "presigned":
presigned_url = storage.generate_presigned_url(
artifact.s3_key,
response_content_type=artifact.content_type,
response_content_disposition=f'attachment; filename="{filename}"',
)
expires_at = datetime.now(timezone.utc) + timedelta(seconds=settings.presigned_url_expiry)
return PresignedUrlResponse(
url=presigned_url,
expires_at=expires_at,
method="GET",
artifact_id=artifact.id,
size=artifact.size,
content_type=artifact.content_type,
original_name=artifact.original_name,
checksum_sha256=artifact.id,
checksum_md5=artifact.checksum_md5,
)
# Handle redirect mode - return 302 redirect to presigned URL
if download_mode == "redirect":
presigned_url = storage.generate_presigned_url(
artifact.s3_key,
response_content_type=artifact.content_type,
response_content_disposition=f'attachment; filename="{filename}"',
)
return RedirectResponse(url=presigned_url, status_code=302)
# Proxy mode (default fallback) - stream through backend
# Handle range requests
if range:
stream, content_length, content_range = storage.get_stream(artifact.s3_key, range)
@@ -976,63 +784,6 @@ def download_artifact(
)
# Get presigned URL endpoint (explicit endpoint for getting URL without redirect)
@router.get("/api/v1/project/{project_name}/{package_name}/+/{ref}/url", response_model=PresignedUrlResponse)
def get_artifact_url(
project_name: str,
package_name: str,
ref: str,
db: Session = Depends(get_db),
storage: S3Storage = Depends(get_storage),
expiry: Optional[int] = Query(
default=None,
description="Custom expiry time in seconds (defaults to server setting)"
),
):
"""
Get a presigned URL for direct S3 download.
This endpoint always returns a presigned URL regardless of server download mode.
"""
settings = get_settings()
# Get project and package
project = db.query(Project).filter(Project.name == project_name).first()
if not project:
raise HTTPException(status_code=404, detail="Project not found")
package = db.query(Package).filter(Package.project_id == project.id, Package.name == package_name).first()
if not package:
raise HTTPException(status_code=404, detail="Package not found")
# Resolve reference to artifact
artifact = _resolve_artifact_ref(ref, package, db)
if not artifact:
raise HTTPException(status_code=404, detail="Artifact not found")
filename = artifact.original_name or f"{artifact.id}"
url_expiry = expiry or settings.presigned_url_expiry
presigned_url = storage.generate_presigned_url(
artifact.s3_key,
expiry=url_expiry,
response_content_type=artifact.content_type,
response_content_disposition=f'attachment; filename="{filename}"',
)
expires_at = datetime.now(timezone.utc) + timedelta(seconds=url_expiry)
return PresignedUrlResponse(
url=presigned_url,
expires_at=expires_at,
method="GET",
artifact_id=artifact.id,
size=artifact.size,
content_type=artifact.content_type,
original_name=artifact.original_name,
checksum_sha256=artifact.id,
checksum_md5=artifact.checksum_md5,
)
# HEAD request for download (to check file info without downloading)
@router.head("/api/v1/project/{project_name}/{package_name}/+/{ref}")
def head_artifact(
@@ -1051,8 +802,23 @@ def head_artifact(
if not package:
raise HTTPException(status_code=404, detail="Package not found")
# Resolve reference to artifact
artifact = _resolve_artifact_ref(ref, package, db)
# Resolve reference to artifact (same logic as download)
artifact = None
if ref.startswith("artifact:"):
artifact_id = ref[9:]
artifact = db.query(Artifact).filter(Artifact.id == artifact_id).first()
elif ref.startswith("tag:") or ref.startswith("version:"):
tag_name = ref.split(":", 1)[1]
tag = db.query(Tag).filter(Tag.package_id == package.id, Tag.name == tag_name).first()
if tag:
artifact = db.query(Artifact).filter(Artifact.id == tag.artifact_id).first()
else:
tag = db.query(Tag).filter(Tag.package_id == package.id, Tag.name == ref).first()
if tag:
artifact = db.query(Artifact).filter(Artifact.id == tag.artifact_id).first()
else:
artifact = db.query(Artifact).filter(Artifact.id == ref).first()
if not artifact:
raise HTTPException(status_code=404, detail="Artifact not found")
@@ -1116,15 +882,9 @@ def list_tags(
# Base query with JOIN to artifact for metadata
query = db.query(Tag, Artifact).join(Artifact, Tag.artifact_id == Artifact.id).filter(Tag.package_id == package.id)
# Apply search filter (case-insensitive on tag name OR artifact original filename)
# Apply search filter (case-insensitive on tag name)
if search:
search_lower = search.lower()
query = query.filter(
or_(
func.lower(Tag.name).contains(search_lower),
func.lower(Artifact.original_name).contains(search_lower)
)
)
query = query.filter(func.lower(Tag.name).contains(search.lower()))
# Get total count before pagination
total = query.count()

View File

@@ -99,13 +99,9 @@ class PackageDetailResponse(BaseModel):
# Artifact schemas
class ArtifactResponse(BaseModel):
id: str
sha256: str # Explicit SHA256 field (same as id)
size: int
content_type: Optional[str]
original_name: Optional[str]
checksum_md5: Optional[str] = None
checksum_sha1: Optional[str] = None
s3_etag: Optional[str] = None
created_at: datetime
created_by: str
ref_count: int
@@ -177,13 +173,9 @@ class ArtifactTagInfo(BaseModel):
class ArtifactDetailResponse(BaseModel):
"""Artifact with list of tags/packages referencing it"""
id: str
sha256: str # Explicit SHA256 field (same as id)
size: int
content_type: Optional[str]
original_name: Optional[str]
checksum_md5: Optional[str] = None
checksum_sha1: Optional[str] = None
s3_etag: Optional[str] = None
created_at: datetime
created_by: str
ref_count: int
@@ -197,13 +189,9 @@ class ArtifactDetailResponse(BaseModel):
class PackageArtifactResponse(BaseModel):
"""Artifact with tags for package artifact listing"""
id: str
sha256: str # Explicit SHA256 field (same as id)
size: int
content_type: Optional[str]
original_name: Optional[str]
checksum_md5: Optional[str] = None
checksum_sha1: Optional[str] = None
s3_etag: Optional[str] = None
created_at: datetime
created_by: str
format_metadata: Optional[Dict[str, Any]] = None
@@ -216,14 +204,10 @@ class PackageArtifactResponse(BaseModel):
# Upload response
class UploadResponse(BaseModel):
artifact_id: str
sha256: str # Explicit SHA256 field (same as artifact_id)
size: int
project: str
package: str
tag: Optional[str]
checksum_md5: Optional[str] = None
checksum_sha1: Optional[str] = None
s3_etag: Optional[str] = None
format_metadata: Optional[Dict[str, Any]] = None
deduplicated: bool = False
@@ -285,65 +269,6 @@ class ConsumerResponse(BaseModel):
from_attributes = True
# Global search schemas
class SearchResultProject(BaseModel):
"""Project result for global search"""
id: UUID
name: str
description: Optional[str]
is_public: bool
class Config:
from_attributes = True
class SearchResultPackage(BaseModel):
"""Package result for global search"""
id: UUID
project_id: UUID
project_name: str
name: str
description: Optional[str]
format: str
class Config:
from_attributes = True
class SearchResultArtifact(BaseModel):
"""Artifact/tag result for global search"""
tag_id: UUID
tag_name: str
artifact_id: str
package_id: UUID
package_name: str
project_name: str
original_name: Optional[str]
class GlobalSearchResponse(BaseModel):
"""Combined search results across all entity types"""
query: str
projects: List[SearchResultProject]
packages: List[SearchResultPackage]
artifacts: List[SearchResultArtifact]
counts: Dict[str, int] # Total counts for each type
# Presigned URL response
class PresignedUrlResponse(BaseModel):
"""Response containing a presigned URL for direct S3 download"""
url: str
expires_at: datetime
method: str = "GET"
artifact_id: str
size: int
content_type: Optional[str] = None
original_name: Optional[str] = None
checksum_sha256: Optional[str] = None
checksum_md5: Optional[str] = None
# Health check
class HealthResponse(BaseModel):
status: str

View File

@@ -1,6 +1,6 @@
import hashlib
import logging
from typing import BinaryIO, Tuple, Optional, Dict, Any, Generator, NamedTuple
from typing import BinaryIO, Tuple, Optional, Dict, Any, Generator
import boto3
from botocore.config import Config
from botocore.exceptions import ClientError
@@ -18,16 +18,6 @@ MULTIPART_CHUNK_SIZE = 10 * 1024 * 1024
HASH_CHUNK_SIZE = 8 * 1024 * 1024
class StorageResult(NamedTuple):
"""Result of storing a file with all computed checksums"""
sha256: str
size: int
s3_key: str
md5: Optional[str] = None
sha1: Optional[str] = None
s3_etag: Optional[str] = None
class S3Storage:
def __init__(self):
config = Config(s3={"addressing_style": "path"} if settings.s3_use_path_style else {})
@@ -44,9 +34,9 @@ class S3Storage:
# Store active multipart uploads for resumable support
self._active_uploads: Dict[str, Dict[str, Any]] = {}
def store(self, file: BinaryIO, content_length: Optional[int] = None) -> StorageResult:
def store(self, file: BinaryIO, content_length: Optional[int] = None) -> Tuple[str, int, str]:
"""
Store a file and return StorageResult with all checksums.
Store a file and return its SHA256 hash, size, and s3_key.
Content-addressable: if the file already exists, just return the hash.
Uses multipart upload for files larger than MULTIPART_THRESHOLD.
"""
@@ -56,76 +46,45 @@ class S3Storage:
else:
return self._store_multipart(file, content_length)
def _store_simple(self, file: BinaryIO) -> StorageResult:
def _store_simple(self, file: BinaryIO) -> Tuple[str, int, str]:
"""Store a small file using simple put_object"""
# Read file and compute all hashes
# Read file and compute hash
content = file.read()
sha256_hash = hashlib.sha256(content).hexdigest()
md5_hash = hashlib.md5(content).hexdigest()
sha1_hash = hashlib.sha1(content).hexdigest()
size = len(content)
# Check if already exists
s3_key = f"fruits/{sha256_hash[:2]}/{sha256_hash[2:4]}/{sha256_hash}"
s3_etag = None
if not self._exists(s3_key):
response = self.client.put_object(
self.client.put_object(
Bucket=self.bucket,
Key=s3_key,
Body=content,
)
s3_etag = response.get("ETag", "").strip('"')
else:
# Get existing ETag
obj_info = self.get_object_info(s3_key)
if obj_info:
s3_etag = obj_info.get("etag", "").strip('"')
return StorageResult(
sha256=sha256_hash,
size=size,
s3_key=s3_key,
md5=md5_hash,
sha1=sha1_hash,
s3_etag=s3_etag,
)
return sha256_hash, size, s3_key
def _store_multipart(self, file: BinaryIO, content_length: int) -> StorageResult:
def _store_multipart(self, file: BinaryIO, content_length: int) -> Tuple[str, int, str]:
"""Store a large file using S3 multipart upload with streaming hash computation"""
# First pass: compute all hashes by streaming through file
sha256_hasher = hashlib.sha256()
md5_hasher = hashlib.md5()
sha1_hasher = hashlib.sha1()
# First pass: compute hash by streaming through file
hasher = hashlib.sha256()
size = 0
# Read file in chunks to compute hashes
# Read file in chunks to compute hash
while True:
chunk = file.read(HASH_CHUNK_SIZE)
if not chunk:
break
sha256_hasher.update(chunk)
md5_hasher.update(chunk)
sha1_hasher.update(chunk)
hasher.update(chunk)
size += len(chunk)
sha256_hash = sha256_hasher.hexdigest()
md5_hash = md5_hasher.hexdigest()
sha1_hash = sha1_hasher.hexdigest()
sha256_hash = hasher.hexdigest()
s3_key = f"fruits/{sha256_hash[:2]}/{sha256_hash[2:4]}/{sha256_hash}"
# Check if already exists (deduplication)
if self._exists(s3_key):
obj_info = self.get_object_info(s3_key)
s3_etag = obj_info.get("etag", "").strip('"') if obj_info else None
return StorageResult(
sha256=sha256_hash,
size=size,
s3_key=s3_key,
md5=md5_hash,
sha1=sha1_hash,
s3_etag=s3_etag,
)
return sha256_hash, size, s3_key
# Seek back to start for upload
file.seek(0)
@@ -157,22 +116,14 @@ class S3Storage:
part_number += 1
# Complete multipart upload
complete_response = self.client.complete_multipart_upload(
self.client.complete_multipart_upload(
Bucket=self.bucket,
Key=s3_key,
UploadId=upload_id,
MultipartUpload={"Parts": parts},
)
s3_etag = complete_response.get("ETag", "").strip('"')
return StorageResult(
sha256=sha256_hash,
size=size,
s3_key=s3_key,
md5=md5_hash,
sha1=sha1_hash,
s3_etag=s3_etag,
)
return sha256_hash, size, s3_key
except Exception as e:
# Abort multipart upload on failure
@@ -184,50 +135,33 @@ class S3Storage:
)
raise
def store_streaming(self, chunks: Generator[bytes, None, None]) -> StorageResult:
def store_streaming(self, chunks: Generator[bytes, None, None]) -> Tuple[str, int, str]:
"""
Store a file from a stream of chunks.
First accumulates to compute hash, then uploads.
For truly large files, consider using initiate_resumable_upload instead.
"""
# Accumulate chunks and compute all hashes
sha256_hasher = hashlib.sha256()
md5_hasher = hashlib.md5()
sha1_hasher = hashlib.sha1()
# Accumulate chunks and compute hash
hasher = hashlib.sha256()
all_chunks = []
size = 0
for chunk in chunks:
sha256_hasher.update(chunk)
md5_hasher.update(chunk)
sha1_hasher.update(chunk)
hasher.update(chunk)
all_chunks.append(chunk)
size += len(chunk)
sha256_hash = sha256_hasher.hexdigest()
md5_hash = md5_hasher.hexdigest()
sha1_hash = sha1_hasher.hexdigest()
sha256_hash = hasher.hexdigest()
s3_key = f"fruits/{sha256_hash[:2]}/{sha256_hash[2:4]}/{sha256_hash}"
s3_etag = None
# Check if already exists
if self._exists(s3_key):
obj_info = self.get_object_info(s3_key)
s3_etag = obj_info.get("etag", "").strip('"') if obj_info else None
return StorageResult(
sha256=sha256_hash,
size=size,
s3_key=s3_key,
md5=md5_hash,
sha1=sha1_hash,
s3_etag=s3_etag,
)
return sha256_hash, size, s3_key
# Upload based on size
if size < MULTIPART_THRESHOLD:
content = b"".join(all_chunks)
response = self.client.put_object(Bucket=self.bucket, Key=s3_key, Body=content)
s3_etag = response.get("ETag", "").strip('"')
self.client.put_object(Bucket=self.bucket, Key=s3_key, Body=content)
else:
# Use multipart for large files
mpu = self.client.create_multipart_upload(Bucket=self.bucket, Key=s3_key)
@@ -271,13 +205,12 @@ class S3Storage:
"ETag": response["ETag"],
})
complete_response = self.client.complete_multipart_upload(
self.client.complete_multipart_upload(
Bucket=self.bucket,
Key=s3_key,
UploadId=upload_id,
MultipartUpload={"Parts": parts},
)
s3_etag = complete_response.get("ETag", "").strip('"')
except Exception as e:
logger.error(f"Streaming multipart upload failed: {e}")
@@ -288,14 +221,7 @@ class S3Storage:
)
raise
return StorageResult(
sha256=sha256_hash,
size=size,
s3_key=s3_key,
md5=md5_hash,
sha1=sha1_hash,
s3_etag=s3_etag,
)
return sha256_hash, size, s3_key
def initiate_resumable_upload(self, expected_hash: str) -> Dict[str, Any]:
"""
@@ -450,46 +376,6 @@ class S3Storage:
except ClientError:
return False
def generate_presigned_url(
self,
s3_key: str,
expiry: Optional[int] = None,
response_content_type: Optional[str] = None,
response_content_disposition: Optional[str] = None,
) -> str:
"""
Generate a presigned URL for downloading an object.
Args:
s3_key: The S3 key of the object
expiry: URL expiry in seconds (defaults to settings.presigned_url_expiry)
response_content_type: Override Content-Type header in response
response_content_disposition: Override Content-Disposition header in response
Returns:
Presigned URL string
"""
if expiry is None:
expiry = settings.presigned_url_expiry
params = {
"Bucket": self.bucket,
"Key": s3_key,
}
# Add response header overrides if specified
if response_content_type:
params["ResponseContentType"] = response_content_type
if response_content_disposition:
params["ResponseContentDisposition"] = response_content_disposition
url = self.client.generate_presigned_url(
"get_object",
Params=params,
ExpiresIn=expiry,
)
return url
# Singleton instance
_storage = None

View File

@@ -1,7 +0,0 @@
#!/bin/sh
echo "testing container"
# Without a sleep, local testing shows no output because attaching to the logs happens after the container is done executing
# this script.
sleep 1

View File

@@ -1,122 +0,0 @@
version: '3.8'
services:
orchard-server:
build:
context: .
dockerfile: Dockerfile.local
ports:
- "8080:8080"
environment:
- ORCHARD_SERVER_HOST=0.0.0.0
- ORCHARD_SERVER_PORT=8080
- ORCHARD_DATABASE_HOST=postgres
- ORCHARD_DATABASE_PORT=5432
- ORCHARD_DATABASE_USER=orchard
- ORCHARD_DATABASE_PASSWORD=orchard_secret
- ORCHARD_DATABASE_DBNAME=orchard
- ORCHARD_DATABASE_SSLMODE=disable
- ORCHARD_S3_ENDPOINT=http://minio:9000
- ORCHARD_S3_REGION=us-east-1
- ORCHARD_S3_BUCKET=orchard-artifacts
- ORCHARD_S3_ACCESS_KEY_ID=minioadmin
- ORCHARD_S3_SECRET_ACCESS_KEY=minioadmin
- ORCHARD_S3_USE_PATH_STYLE=true
- ORCHARD_REDIS_HOST=redis
- ORCHARD_REDIS_PORT=6379
depends_on:
postgres:
condition: service_healthy
minio:
condition: service_healthy
redis:
condition: service_healthy
networks:
- orchard-network
restart: unless-stopped
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8080/health"]
interval: 30s
timeout: 3s
start_period: 10s
retries: 3
postgres:
image: postgres:16-alpine
environment:
- POSTGRES_USER=orchard
- POSTGRES_PASSWORD=orchard_secret
- POSTGRES_DB=orchard
volumes:
- postgres-data-local:/var/lib/postgresql/data
- ./migrations:/docker-entrypoint-initdb.d:ro
ports:
- "5432:5432"
healthcheck:
test: ["CMD-SHELL", "pg_isready -U orchard -d orchard"]
interval: 10s
timeout: 5s
retries: 5
networks:
- orchard-network
restart: unless-stopped
minio:
image: minio/minio:latest
command: server /data --console-address ":9001"
environment:
- MINIO_ROOT_USER=minioadmin
- MINIO_ROOT_PASSWORD=minioadmin
volumes:
- minio-data-local:/data
ports:
- "9000:9000"
- "9001:9001"
healthcheck:
test: ["CMD", "mc", "ready", "local"]
interval: 10s
timeout: 5s
retries: 5
networks:
- orchard-network
restart: unless-stopped
minio-init:
image: minio/mc:latest
depends_on:
minio:
condition: service_healthy
entrypoint: >
/bin/sh -c "
mc alias set myminio http://minio:9000 minioadmin minioadmin;
mc mb myminio/orchard-artifacts --ignore-existing;
mc anonymous set download myminio/orchard-artifacts;
exit 0;
"
networks:
- orchard-network
redis:
image: redis:7-alpine
command: redis-server --appendonly yes
volumes:
- redis-data-local:/data
ports:
- "6379:6379"
healthcheck:
test: ["CMD", "redis-cli", "ping"]
interval: 10s
timeout: 5s
retries: 5
networks:
- orchard-network
restart: unless-stopped
volumes:
postgres-data-local:
minio-data-local:
redis-data-local:
networks:
orchard-network:
driver: bridge

View File

@@ -36,7 +36,7 @@ services:
restart: unless-stopped
postgres:
image: containers.global.bsf.tools/postgres:16-alpine
image: postgres:16-alpine
environment:
- POSTGRES_USER=orchard
- POSTGRES_PASSWORD=orchard_secret
@@ -56,7 +56,7 @@ services:
restart: unless-stopped
minio:
image: containers.global.bsf.tools/minio/minio:latest
image: minio/minio:latest
command: server /data --console-address ":9001"
environment:
- MINIO_ROOT_USER=minioadmin
@@ -76,7 +76,7 @@ services:
restart: unless-stopped
minio-init:
image: containers.global.bsf.tools/minio/mc:latest
image: minio/mc:latest
depends_on:
minio:
condition: service_healthy
@@ -91,7 +91,7 @@ services:
- orchard-network
redis:
image: containers.global.bsf.tools/redis:7-alpine
image: redis:7-alpine
command: redis-server --appendonly yes
volumes:
- redis-data:/data

View File

@@ -1,504 +0,0 @@
# Integrity Verification Workflow Design
This document defines the process for SHA256 checksum verification on artifact downloads, including failure handling and retry mechanisms.
## Overview
Orchard uses content-addressable storage where the artifact ID is the SHA256 hash of the content. This design leverages that property to provide configurable integrity verification during downloads.
## Current State
| Aspect | Status |
|--------|--------|
| Download streams content directly from S3 | ✅ Implemented |
| Artifact ID is the SHA256 hash | ✅ Implemented |
| S3 key derived from SHA256 hash | ✅ Implemented |
| Verification during download | ❌ Not implemented |
| Checksum headers in response | ❌ Not implemented |
| Retry mechanism on failure | ❌ Not implemented |
| Failure handling beyond S3 errors | ❌ Not implemented |
## Verification Modes
The verification mode is selected via query parameter `?verify=<mode>` or server-wide default via `ORCHARD_VERIFY_MODE`.
| Mode | Performance | Integrity | Use Case |
|------|-------------|-----------|----------|
| `none` | ⚡ Fastest | Client-side | Trusted networks, high throughput |
| `header` | ⚡ Fast | Client-side | Standard downloads, client verification |
| `stream` | 🔄 Moderate | Post-hoc server | Logging/auditing, non-blocking |
| `pre` | 🐢 Slower | Guaranteed | Critical downloads, untrusted storage |
| `strict` | 🐢 Slower | Guaranteed + Alert | Security-sensitive, compliance |
### Mode: None (Default)
**Behavior:**
- Stream content directly from S3 with no server-side processing
- Maximum download performance
- Client is responsible for verification
**Headers Returned:**
```
X-Checksum-SHA256: <expected_hash>
Content-Length: <expected_size>
```
**Flow:**
```
Client Request → Lookup Artifact → Stream from S3 → Client
```
### Mode: Header
**Behavior:**
- Stream content directly from S3
- Include comprehensive checksum headers
- Client performs verification using headers
**Headers Returned:**
```
X-Checksum-SHA256: <expected_hash>
Content-Length: <expected_size>
Digest: sha-256=<base64_encoded_hash>
ETag: "<sha256_hash>"
X-Content-SHA256: <expected_hash>
```
**Flow:**
```
Client Request → Lookup Artifact → Add Headers → Stream from S3 → Client Verifies
```
**Client Verification Example:**
```bash
# Download and verify
curl -OJ https://orchard/project/foo/bar/+/v1.0.0
EXPECTED=$(curl -sI https://orchard/project/foo/bar/+/v1.0.0 | grep X-Checksum-SHA256 | cut -d' ' -f2)
ACTUAL=$(sha256sum downloaded_file | cut -d' ' -f1)
[ "$EXPECTED" = "$ACTUAL" ] && echo "OK" || echo "MISMATCH"
```
### Mode: Stream (Post-Hoc Verification)
**Behavior:**
- Wrap S3 stream with `HashingStreamWrapper`
- Compute SHA256 incrementally while streaming to client
- Verify hash after stream completes
- Log verification result
- Cannot reject content (already sent to client)
**Headers Returned:**
```
X-Checksum-SHA256: <expected_hash>
Content-Length: <expected_size>
X-Verify-Mode: stream
Trailer: X-Verified
```
**Trailers (if client supports):**
```
X-Verified: true|false
X-Computed-SHA256: <computed_hash>
```
**Flow:**
```
Client Request → Lookup Artifact → Wrap Stream → Stream to Client
Compute Hash Incrementally
Verify After Complete → Log Result
```
**Implementation:**
```python
class HashingStreamWrapper:
def __init__(self, stream, expected_hash: str, on_complete: Callable):
self.stream = stream
self.hasher = hashlib.sha256()
self.expected_hash = expected_hash
self.on_complete = on_complete
def __iter__(self):
for chunk in self.stream:
self.hasher.update(chunk)
yield chunk
# Stream complete, verify
computed = self.hasher.hexdigest()
self.on_complete(computed == self.expected_hash, computed)
```
### Mode: Pre-Verify (Blocking)
**Behavior:**
- Download entire content from S3 to memory/temp file
- Compute SHA256 hash before sending to client
- On match: stream verified content to client
- On mismatch: retry from S3 (up to N times)
- If retries exhausted: return 500 error
**Headers Returned:**
```
X-Checksum-SHA256: <expected_hash>
Content-Length: <expected_size>
X-Verify-Mode: pre
X-Verified: true
```
**Flow:**
```
Client Request → Lookup Artifact → Download from S3 → Compute Hash
Hash Matches?
↓ ↓
Yes No
↓ ↓
Stream to Client Retry?
Yes → Loop
No → 500 Error
```
**Memory Considerations:**
- For files < `ORCHARD_VERIFY_MEMORY_LIMIT` (default 100MB): buffer in memory
- For larger files: use temporary file with streaming hash computation
- Cleanup temp files after response sent
### Mode: Strict
**Behavior:**
- Same as pre-verify but with no retries
- Fail immediately on any mismatch
- Quarantine artifact on failure (mark as potentially corrupted)
- Trigger alert/notification on failure
- For security-critical downloads
**Headers Returned (on success):**
```
X-Checksum-SHA256: <expected_hash>
Content-Length: <expected_size>
X-Verify-Mode: strict
X-Verified: true
```
**Error Response (on failure):**
```json
{
"error": "integrity_verification_failed",
"message": "Artifact content does not match expected checksum",
"expected_hash": "<expected>",
"computed_hash": "<computed>",
"artifact_id": "<id>",
"action_taken": "quarantined"
}
```
**Quarantine Process:**
1. Mark artifact `status = 'quarantined'` in database
2. Log security event to audit_logs
3. Optionally notify via webhook/email
4. Artifact becomes unavailable for download until resolved
## Failure Detection
### Failure Types
| Failure Type | Detection Method | Severity |
|--------------|------------------|----------|
| Hash mismatch | Computed SHA256 ≠ Expected | Critical |
| Size mismatch | Actual bytes ≠ `Content-Length` | High |
| S3 read error | boto3 exception | Medium |
| Truncated content | Stream ends early | High |
| S3 object missing | `NoSuchKey` error | Critical |
| ETag mismatch | S3 ETag ≠ expected | Medium |
### Detection Implementation
```python
class VerificationResult:
success: bool
failure_type: Optional[str] # hash_mismatch, size_mismatch, etc.
expected_hash: str
computed_hash: Optional[str]
expected_size: int
actual_size: Optional[int]
error_message: Optional[str]
retry_count: int
```
## Retry Mechanism
### Configuration
| Environment Variable | Default | Description |
|---------------------|---------|-------------|
| `ORCHARD_VERIFY_MAX_RETRIES` | 3 | Maximum retry attempts |
| `ORCHARD_VERIFY_RETRY_DELAY_MS` | 100 | Base delay between retries |
| `ORCHARD_VERIFY_RETRY_BACKOFF` | 2.0 | Exponential backoff multiplier |
| `ORCHARD_VERIFY_RETRY_MAX_DELAY_MS` | 5000 | Maximum delay cap |
### Backoff Formula
```
delay = min(base_delay * (backoff ^ attempt), max_delay)
```
Example with defaults:
- Attempt 1: 100ms
- Attempt 2: 200ms
- Attempt 3: 400ms
### Retry Flow
```python
async def download_with_retry(artifact, max_retries=3):
for attempt in range(max_retries + 1):
try:
content = await fetch_from_s3(artifact.s3_key)
computed_hash = compute_sha256(content)
if computed_hash == artifact.id:
return content # Success
# Hash mismatch
log.warning(f"Verification failed, attempt {attempt + 1}/{max_retries + 1}")
if attempt < max_retries:
delay = calculate_backoff(attempt)
await asyncio.sleep(delay / 1000)
else:
raise IntegrityError("Max retries exceeded")
except S3Error as e:
if attempt < max_retries:
delay = calculate_backoff(attempt)
await asyncio.sleep(delay / 1000)
else:
raise
```
### Retryable vs Non-Retryable Failures
**Retryable:**
- S3 read timeout
- S3 connection error
- Hash mismatch (may be transient S3 issue)
- Truncated content
**Non-Retryable:**
- S3 object not found (404)
- S3 access denied (403)
- Artifact not in database
- Strict mode failures
## Configuration Reference
### Environment Variables
```bash
# Verification mode (none, header, stream, pre, strict)
ORCHARD_VERIFY_MODE=none
# Retry settings
ORCHARD_VERIFY_MAX_RETRIES=3
ORCHARD_VERIFY_RETRY_DELAY_MS=100
ORCHARD_VERIFY_RETRY_BACKOFF=2.0
ORCHARD_VERIFY_RETRY_MAX_DELAY_MS=5000
# Memory limit for pre-verify buffering (bytes)
ORCHARD_VERIFY_MEMORY_LIMIT=104857600 # 100MB
# Strict mode settings
ORCHARD_VERIFY_QUARANTINE_ON_FAILURE=true
ORCHARD_VERIFY_ALERT_WEBHOOK=https://alerts.example.com/webhook
# Allow per-request mode override
ORCHARD_VERIFY_ALLOW_OVERRIDE=true
```
### Per-Request Override
When `ORCHARD_VERIFY_ALLOW_OVERRIDE=true`, clients can specify verification mode:
```
GET /api/v1/project/foo/bar/+/v1.0.0?verify=pre
GET /api/v1/project/foo/bar/+/v1.0.0?verify=none
```
## API Changes
### Download Endpoint
**Request:**
```
GET /api/v1/project/{project}/{package}/+/{ref}?verify={mode}
```
**New Query Parameters:**
| Parameter | Type | Default | Description |
|-----------|------|---------|-------------|
| `verify` | string | from config | Verification mode |
**New Response Headers:**
| Header | Description |
|--------|-------------|
| `X-Checksum-SHA256` | Expected SHA256 hash |
| `X-Verify-Mode` | Active verification mode |
| `X-Verified` | `true` if server verified content |
| `Digest` | RFC 3230 digest header |
### New Endpoint: Verify Artifact
**Request:**
```
POST /api/v1/project/{project}/{package}/+/{ref}/verify
```
**Response:**
```json
{
"artifact_id": "abc123...",
"verified": true,
"expected_hash": "abc123...",
"computed_hash": "abc123...",
"size_match": true,
"expected_size": 1048576,
"actual_size": 1048576,
"verification_time_ms": 45
}
```
## Logging and Monitoring
### Log Events
| Event | Level | When |
|-------|-------|------|
| `verification.success` | INFO | Hash verified successfully |
| `verification.failure` | ERROR | Hash mismatch detected |
| `verification.retry` | WARN | Retry attempt initiated |
| `verification.quarantine` | ERROR | Artifact quarantined |
| `verification.skip` | DEBUG | Verification skipped (mode=none) |
### Metrics
| Metric | Type | Description |
|--------|------|-------------|
| `orchard_verification_total` | Counter | Total verification attempts |
| `orchard_verification_failures` | Counter | Failed verifications |
| `orchard_verification_retries` | Counter | Retry attempts |
| `orchard_verification_duration_ms` | Histogram | Verification time |
### Audit Log Entry
```json
{
"action": "artifact.download.verified",
"resource": "project/foo/package/bar/artifact/abc123",
"user_id": "user@example.com",
"details": {
"verification_mode": "pre",
"verified": true,
"retry_count": 0,
"duration_ms": 45
}
}
```
## Security Considerations
1. **Strict Mode for Sensitive Data**: Use strict mode for artifacts containing credentials, certificates, or security-critical code.
2. **Quarantine Isolation**: Quarantined artifacts should be moved to a separate S3 prefix or bucket for forensic analysis.
3. **Alert on Repeated Failures**: Multiple verification failures for the same artifact may indicate storage corruption or tampering.
4. **Audit Trail**: All verification events should be logged for compliance and forensic purposes.
5. **Client Trust**: In `none` and `header` modes, clients must implement their own verification for security guarantees.
## Implementation Phases
### Phase 1: Headers Only
- Add `X-Checksum-SHA256` header to all downloads
- Add `verify=header` mode support
- Add configuration options
### Phase 2: Stream Verification
- Implement `HashingStreamWrapper`
- Add `verify=stream` mode
- Add verification logging
### Phase 3: Pre-Verification
- Implement buffered verification
- Add retry mechanism
- Add `verify=pre` mode
### Phase 4: Strict Mode
- Implement quarantine mechanism
- Add alerting integration
- Add `verify=strict` mode
## Client Integration Examples
### curl with Verification
```bash
#!/bin/bash
URL="https://orchard.example.com/api/v1/project/myproject/mypackage/+/v1.0.0"
# Get expected hash from headers
EXPECTED=$(curl -sI "$URL" | grep -i "X-Checksum-SHA256" | tr -d '\r' | cut -d' ' -f2)
# Download file
curl -sO "$URL"
FILENAME=$(basename "$URL")
# Verify
ACTUAL=$(sha256sum "$FILENAME" | cut -d' ' -f1)
if [ "$EXPECTED" = "$ACTUAL" ]; then
echo "✓ Verification passed"
else
echo "✗ Verification FAILED"
echo " Expected: $EXPECTED"
echo " Actual: $ACTUAL"
exit 1
fi
```
### Python Client
```python
import hashlib
import requests
def download_verified(url: str) -> bytes:
# Get headers first
head = requests.head(url)
expected_hash = head.headers.get('X-Checksum-SHA256')
expected_size = int(head.headers.get('Content-Length', 0))
# Download content
response = requests.get(url)
content = response.content
# Verify size
if len(content) != expected_size:
raise ValueError(f"Size mismatch: {len(content)} != {expected_size}")
# Verify hash
actual_hash = hashlib.sha256(content).hexdigest()
if actual_hash != expected_hash:
raise ValueError(f"Hash mismatch: {actual_hash} != {expected_hash}")
return content
```
### Server-Side Verification
```bash
# Force server to verify before sending
curl -O "https://orchard.example.com/api/v1/project/myproject/mypackage/+/v1.0.0?verify=pre"
# Check if verification was performed
curl -I "https://orchard.example.com/api/v1/project/myproject/mypackage/+/v1.0.0?verify=pre" | grep X-Verified
# X-Verified: true
```

View File

@@ -11,8 +11,6 @@ import {
TagListParams,
PackageListParams,
ArtifactListParams,
ProjectListParams,
GlobalSearchResponse,
} from './types';
const API_BASE = '/api/v1';
@@ -36,15 +34,8 @@ function buildQueryString(params: Record<string, unknown>): string {
return query ? `?${query}` : '';
}
// Global Search API
export async function globalSearch(query: string, limit: number = 5): Promise<GlobalSearchResponse> {
const params = buildQueryString({ q: query, limit });
const response = await fetch(`${API_BASE}/search${params}`);
return handleResponse<GlobalSearchResponse>(response);
}
// Project API
export async function listProjects(params: ProjectListParams = {}): Promise<PaginatedResponse<Project>> {
export async function listProjects(params: ListParams = {}): Promise<PaginatedResponse<Project>> {
const query = buildQueryString(params as Record<string, unknown>);
const response = await fetch(`${API_BASE}/projects${query}`);
return handleResponse<PaginatedResponse<Project>>(response);

View File

@@ -1,75 +0,0 @@
.filter-dropdown {
position: relative;
}
.filter-dropdown__trigger {
display: flex;
align-items: center;
gap: 8px;
padding: 8px 12px;
background: var(--bg-tertiary);
border: 1px solid var(--border-primary);
border-radius: var(--radius-md);
color: var(--text-secondary);
font-size: 0.875rem;
cursor: pointer;
transition: all var(--transition-fast);
}
.filter-dropdown__trigger:hover {
background: var(--bg-hover);
color: var(--text-primary);
}
.filter-dropdown__trigger--active {
border-color: var(--accent-primary);
color: var(--text-primary);
}
.filter-dropdown__chevron {
transition: transform var(--transition-fast);
}
.filter-dropdown__chevron--open {
transform: rotate(180deg);
}
.filter-dropdown__menu {
position: absolute;
top: calc(100% + 4px);
left: 0;
min-width: 150px;
background: var(--bg-secondary);
border: 1px solid var(--border-primary);
border-radius: var(--radius-md);
box-shadow: var(--shadow-lg);
z-index: 50;
overflow: hidden;
}
.filter-dropdown__option {
display: flex;
align-items: center;
justify-content: space-between;
width: 100%;
padding: 8px 12px;
background: transparent;
border: none;
color: var(--text-primary);
font-size: 0.875rem;
text-align: left;
cursor: pointer;
transition: background var(--transition-fast);
}
.filter-dropdown__option:hover {
background: var(--bg-hover);
}
.filter-dropdown__option--selected {
color: var(--accent-primary);
}
.filter-dropdown__option svg {
color: var(--accent-primary);
}

View File

@@ -1,80 +0,0 @@
import { useState, useRef, useEffect } from 'react';
import './FilterDropdown.css';
export interface FilterOption {
value: string;
label: string;
}
interface FilterDropdownProps {
label: string;
options: FilterOption[];
value: string;
onChange: (value: string) => void;
className?: string;
}
export function FilterDropdown({ label, options, value, onChange, className = '' }: FilterDropdownProps) {
const [isOpen, setIsOpen] = useState(false);
const dropdownRef = useRef<HTMLDivElement>(null);
const selectedOption = options.find((o) => o.value === value);
useEffect(() => {
function handleClickOutside(event: MouseEvent) {
if (dropdownRef.current && !dropdownRef.current.contains(event.target as Node)) {
setIsOpen(false);
}
}
document.addEventListener('mousedown', handleClickOutside);
return () => document.removeEventListener('mousedown', handleClickOutside);
}, []);
return (
<div className={`filter-dropdown ${className}`.trim()} ref={dropdownRef}>
<button
type="button"
className={`filter-dropdown__trigger ${value ? 'filter-dropdown__trigger--active' : ''}`}
onClick={() => setIsOpen(!isOpen)}
aria-expanded={isOpen}
>
<span>{selectedOption ? selectedOption.label : label}</span>
<svg
className={`filter-dropdown__chevron ${isOpen ? 'filter-dropdown__chevron--open' : ''}`}
width="14"
height="14"
viewBox="0 0 24 24"
fill="none"
stroke="currentColor"
strokeWidth="2"
>
<polyline points="6 9 12 15 18 9" />
</svg>
</button>
{isOpen && (
<div className="filter-dropdown__menu">
{options.map((option) => (
<button
key={option.value}
type="button"
className={`filter-dropdown__option ${option.value === value ? 'filter-dropdown__option--selected' : ''}`}
onClick={() => {
onChange(option.value);
setIsOpen(false);
}}
>
{option.label}
{option.value === value && (
<svg width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2">
<polyline points="20 6 9 17 4 12" />
</svg>
)}
</button>
))}
</div>
)}
</div>
);
}

View File

@@ -1,216 +0,0 @@
.global-search {
position: relative;
flex: 1;
max-width: 400px;
margin: 0 24px;
}
.global-search__input-wrapper {
position: relative;
display: flex;
align-items: center;
}
.global-search__icon {
position: absolute;
left: 12px;
color: var(--text-secondary);
pointer-events: none;
}
.global-search__input {
width: 100%;
padding: 8px 40px 8px 36px;
background: var(--bg-tertiary);
border: 1px solid var(--border-primary);
border-radius: var(--radius-md);
color: var(--text-primary);
font-size: 0.875rem;
transition: all var(--transition-fast);
}
.global-search__input:focus {
outline: none;
border-color: var(--accent-primary);
box-shadow: 0 0 0 3px rgba(16, 185, 129, 0.15);
}
.global-search__input::placeholder {
color: var(--text-muted);
}
.global-search__shortcut {
position: absolute;
right: 8px;
padding: 2px 6px;
background: var(--bg-secondary);
border: 1px solid var(--border-primary);
border-radius: var(--radius-sm);
color: var(--text-muted);
font-family: inherit;
font-size: 0.75rem;
pointer-events: none;
}
.global-search__spinner {
position: absolute;
right: 36px;
width: 14px;
height: 14px;
border: 2px solid var(--border-primary);
border-top-color: var(--accent-primary);
border-radius: 50%;
animation: spin 0.6s linear infinite;
}
@keyframes spin {
to {
transform: rotate(360deg);
}
}
/* Dropdown */
.global-search__dropdown {
position: absolute;
top: calc(100% + 8px);
left: 0;
right: 0;
background: var(--bg-secondary);
border: 1px solid var(--border-primary);
border-radius: var(--radius-lg);
box-shadow: var(--shadow-lg);
max-height: 400px;
overflow-y: auto;
z-index: 1000;
}
.global-search__empty {
padding: 24px;
text-align: center;
color: var(--text-secondary);
font-size: 0.875rem;
}
/* Sections */
.global-search__section {
padding: 8px 0;
border-bottom: 1px solid var(--border-primary);
}
.global-search__section:last-child {
border-bottom: none;
}
.global-search__section-header {
display: flex;
align-items: center;
justify-content: space-between;
padding: 4px 12px 8px;
color: var(--text-secondary);
font-size: 0.75rem;
font-weight: 600;
text-transform: uppercase;
letter-spacing: 0.05em;
}
.global-search__count {
background: var(--bg-tertiary);
padding: 2px 6px;
border-radius: var(--radius-sm);
font-size: 0.7rem;
}
/* Results */
.global-search__result {
display: flex;
align-items: flex-start;
gap: 12px;
width: 100%;
padding: 8px 12px;
background: transparent;
border: none;
text-align: left;
color: var(--text-primary);
cursor: pointer;
transition: background var(--transition-fast);
}
.global-search__result:hover,
.global-search__result.selected {
background: var(--bg-hover);
}
.global-search__result svg {
flex-shrink: 0;
margin-top: 2px;
color: var(--text-secondary);
}
.global-search__result-content {
flex: 1;
min-width: 0;
display: flex;
flex-direction: column;
gap: 2px;
}
.global-search__result-name {
font-weight: 500;
color: var(--text-primary);
}
.global-search__result-path {
font-size: 0.75rem;
color: var(--text-secondary);
}
.global-search__result-desc {
font-size: 0.75rem;
color: var(--text-muted);
white-space: nowrap;
overflow: hidden;
text-overflow: ellipsis;
}
/* Badges */
.global-search__badge {
flex-shrink: 0;
padding: 2px 8px;
border-radius: var(--radius-sm);
font-size: 0.7rem;
font-weight: 500;
text-transform: uppercase;
}
.global-search__badge.public {
background: rgba(16, 185, 129, 0.15);
color: var(--accent-primary);
}
.global-search__badge.private {
background: rgba(234, 179, 8, 0.15);
color: #eab308;
}
.global-search__badge.format {
background: var(--bg-tertiary);
color: var(--text-secondary);
}
/* Responsive */
@media (max-width: 768px) {
.global-search {
max-width: none;
margin: 0 12px;
}
.global-search__shortcut {
display: none;
}
}
@media (max-width: 640px) {
.global-search {
display: none;
}
}

View File

@@ -1,265 +0,0 @@
import { useState, useEffect, useRef, useCallback } from 'react';
import { useNavigate } from 'react-router-dom';
import { globalSearch } from '../api';
import { GlobalSearchResponse } from '../types';
import './GlobalSearch.css';
export function GlobalSearch() {
const navigate = useNavigate();
const [query, setQuery] = useState('');
const [results, setResults] = useState<GlobalSearchResponse | null>(null);
const [loading, setLoading] = useState(false);
const [isOpen, setIsOpen] = useState(false);
const [selectedIndex, setSelectedIndex] = useState(-1);
const inputRef = useRef<HTMLInputElement>(null);
const containerRef = useRef<HTMLDivElement>(null);
// Build flat list of results for keyboard navigation
const flatResults = results
? [
...results.projects.map((p) => ({ type: 'project' as const, item: p })),
...results.packages.map((p) => ({ type: 'package' as const, item: p })),
...results.artifacts.map((a) => ({ type: 'artifact' as const, item: a })),
]
: [];
const handleSearch = useCallback(async (searchQuery: string) => {
if (!searchQuery.trim()) {
setResults(null);
setIsOpen(false);
return;
}
setLoading(true);
try {
const data = await globalSearch(searchQuery);
setResults(data);
setIsOpen(true);
setSelectedIndex(-1);
} catch (err) {
console.error('Search failed:', err);
setResults(null);
} finally {
setLoading(false);
}
}, []);
// Debounced search
useEffect(() => {
const timer = setTimeout(() => {
handleSearch(query);
}, 300);
return () => clearTimeout(timer);
}, [query, handleSearch]);
// Close on click outside
useEffect(() => {
function handleClickOutside(event: MouseEvent) {
if (containerRef.current && !containerRef.current.contains(event.target as Node)) {
setIsOpen(false);
}
}
document.addEventListener('mousedown', handleClickOutside);
return () => document.removeEventListener('mousedown', handleClickOutside);
}, []);
// Keyboard navigation
useEffect(() => {
function handleKeyDown(event: KeyboardEvent) {
if (event.key === '/' && !['INPUT', 'TEXTAREA'].includes((event.target as HTMLElement).tagName)) {
event.preventDefault();
inputRef.current?.focus();
}
if (!isOpen) return;
switch (event.key) {
case 'ArrowDown':
event.preventDefault();
setSelectedIndex((prev) => Math.min(prev + 1, flatResults.length - 1));
break;
case 'ArrowUp':
event.preventDefault();
setSelectedIndex((prev) => Math.max(prev - 1, -1));
break;
case 'Enter':
if (selectedIndex >= 0 && flatResults[selectedIndex]) {
event.preventDefault();
navigateToResult(flatResults[selectedIndex]);
}
break;
case 'Escape':
setIsOpen(false);
inputRef.current?.blur();
break;
}
}
document.addEventListener('keydown', handleKeyDown);
return () => document.removeEventListener('keydown', handleKeyDown);
}, [isOpen, selectedIndex, flatResults]);
function navigateToResult(result: (typeof flatResults)[0]) {
setIsOpen(false);
setQuery('');
switch (result.type) {
case 'project':
navigate(`/project/${result.item.name}`);
break;
case 'package':
navigate(`/project/${result.item.project_name}/${result.item.name}`);
break;
case 'artifact':
navigate(`/project/${result.item.project_name}/${result.item.package_name}`);
break;
}
}
const hasResults = results && results.counts.total > 0;
return (
<div className="global-search" ref={containerRef}>
<div className="global-search__input-wrapper">
<svg
className="global-search__icon"
width="16"
height="16"
viewBox="0 0 24 24"
fill="none"
stroke="currentColor"
strokeWidth="2"
>
<circle cx="11" cy="11" r="8" />
<line x1="21" y1="21" x2="16.65" y2="16.65" />
</svg>
<input
ref={inputRef}
type="text"
value={query}
onChange={(e) => setQuery(e.target.value)}
onFocus={() => query && results && setIsOpen(true)}
placeholder="Search projects, packages, artifacts..."
className="global-search__input"
/>
<kbd className="global-search__shortcut">/</kbd>
{loading && <span className="global-search__spinner" />}
</div>
{isOpen && (
<div className="global-search__dropdown">
{!hasResults && query && (
<div className="global-search__empty">No results found for "{query}"</div>
)}
{hasResults && (
<>
{results.projects.length > 0 && (
<div className="global-search__section">
<div className="global-search__section-header">
Projects
<span className="global-search__count">{results.counts.projects}</span>
</div>
{results.projects.map((project, index) => {
const flatIndex = index;
return (
<button
key={project.id}
className={`global-search__result ${selectedIndex === flatIndex ? 'selected' : ''}`}
onClick={() => navigateToResult({ type: 'project', item: project })}
onMouseEnter={() => setSelectedIndex(flatIndex)}
>
<svg width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2">
<path d="M22 19a2 2 0 0 1-2 2H4a2 2 0 0 1-2-2V5a2 2 0 0 1 2-2h5l2 3h9a2 2 0 0 1 2 2z" />
</svg>
<div className="global-search__result-content">
<span className="global-search__result-name">{project.name}</span>
{project.description && (
<span className="global-search__result-desc">{project.description}</span>
)}
</div>
<span className={`global-search__badge ${project.is_public ? 'public' : 'private'}`}>
{project.is_public ? 'Public' : 'Private'}
</span>
</button>
);
})}
</div>
)}
{results.packages.length > 0 && (
<div className="global-search__section">
<div className="global-search__section-header">
Packages
<span className="global-search__count">{results.counts.packages}</span>
</div>
{results.packages.map((pkg, index) => {
const flatIndex = results.projects.length + index;
return (
<button
key={pkg.id}
className={`global-search__result ${selectedIndex === flatIndex ? 'selected' : ''}`}
onClick={() => navigateToResult({ type: 'package', item: pkg })}
onMouseEnter={() => setSelectedIndex(flatIndex)}
>
<svg width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2">
<path d="M16.5 9.4l-9-5.19M21 16V8a2 2 0 0 0-1-1.73l-7-4a2 2 0 0 0-2 0l-7 4A2 2 0 0 0 3 8v8a2 2 0 0 0 1 1.73l7 4a2 2 0 0 0 2 0l7-4A2 2 0 0 0 21 16z" />
<polyline points="3.27 6.96 12 12.01 20.73 6.96" />
<line x1="12" y1="22.08" x2="12" y2="12" />
</svg>
<div className="global-search__result-content">
<span className="global-search__result-name">{pkg.name}</span>
<span className="global-search__result-path">{pkg.project_name}</span>
{pkg.description && (
<span className="global-search__result-desc">{pkg.description}</span>
)}
</div>
<span className="global-search__badge format">{pkg.format}</span>
</button>
);
})}
</div>
)}
{results.artifacts.length > 0 && (
<div className="global-search__section">
<div className="global-search__section-header">
Artifacts / Tags
<span className="global-search__count">{results.counts.artifacts}</span>
</div>
{results.artifacts.map((artifact, index) => {
const flatIndex = results.projects.length + results.packages.length + index;
return (
<button
key={artifact.tag_id}
className={`global-search__result ${selectedIndex === flatIndex ? 'selected' : ''}`}
onClick={() => navigateToResult({ type: 'artifact', item: artifact })}
onMouseEnter={() => setSelectedIndex(flatIndex)}
>
<svg width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2">
<path d="M20.59 13.41l-7.17 7.17a2 2 0 0 1-2.83 0L2 12V2h10l8.59 8.59a2 2 0 0 1 0 2.82z" />
<line x1="7" y1="7" x2="7.01" y2="7" />
</svg>
<div className="global-search__result-content">
<span className="global-search__result-name">{artifact.tag_name}</span>
<span className="global-search__result-path">
{artifact.project_name} / {artifact.package_name}
</span>
{artifact.original_name && (
<span className="global-search__result-desc">{artifact.original_name}</span>
)}
</div>
</button>
);
})}
</div>
)}
</>
)}
</div>
)}
</div>
);
}

View File

@@ -1,6 +1,5 @@
import { ReactNode } from 'react';
import { Link, useLocation } from 'react-router-dom';
import { GlobalSearch } from './GlobalSearch';
import './Layout.css';
interface LayoutProps {
@@ -33,7 +32,6 @@ function Layout({ children }: LayoutProps) {
</div>
<span className="logo-text">Orchard</span>
</Link>
<GlobalSearch />
<nav className="nav">
<Link to="/" className={location.pathname === '/' ? 'active' : ''}>
<svg width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2">

View File

@@ -4,9 +4,6 @@ export { Breadcrumb } from './Breadcrumb';
export { SearchInput } from './SearchInput';
export { SortDropdown } from './SortDropdown';
export type { SortOption } from './SortDropdown';
export { FilterDropdown } from './FilterDropdown';
export type { FilterOption } from './FilterDropdown';
export { FilterChip, FilterChipGroup } from './FilterChip';
export { DataTable } from './DataTable';
export { Pagination } from './Pagination';
export { GlobalSearch } from './GlobalSearch';

View File

@@ -3,8 +3,8 @@ import { Link, useSearchParams } from 'react-router-dom';
import { Project, PaginatedResponse } from '../types';
import { listProjects, createProject } from '../api';
import { Badge } from '../components/Badge';
import { SearchInput } from '../components/SearchInput';
import { SortDropdown, SortOption } from '../components/SortDropdown';
import { FilterDropdown, FilterOption } from '../components/FilterDropdown';
import { FilterChip, FilterChipGroup } from '../components/FilterChip';
import { Pagination } from '../components/Pagination';
import './Home.css';
@@ -15,12 +15,6 @@ const SORT_OPTIONS: SortOption[] = [
{ value: 'updated_at', label: 'Updated' },
];
const VISIBILITY_OPTIONS: FilterOption[] = [
{ value: '', label: 'All Projects' },
{ value: 'public', label: 'Public Only' },
{ value: 'private', label: 'Private Only' },
];
function Home() {
const [searchParams, setSearchParams] = useSearchParams();
@@ -33,9 +27,9 @@ function Home() {
// Get params from URL
const page = parseInt(searchParams.get('page') || '1', 10);
const search = searchParams.get('search') || '';
const sort = searchParams.get('sort') || 'name';
const order = (searchParams.get('order') || 'asc') as 'asc' | 'desc';
const visibility = searchParams.get('visibility') || '';
const updateParams = useCallback(
(updates: Record<string, string | undefined>) => {
@@ -55,12 +49,7 @@ function Home() {
const loadProjects = useCallback(async () => {
try {
setLoading(true);
const data = await listProjects({
page,
sort,
order,
visibility: visibility as 'public' | 'private' | undefined || undefined,
});
const data = await listProjects({ page, search, sort, order });
setProjectsData(data);
setError(null);
} catch (err) {
@@ -68,7 +57,7 @@ function Home() {
} finally {
setLoading(false);
}
}, [page, sort, order, visibility]);
}, [page, search, sort, order]);
useEffect(() => {
loadProjects();
@@ -89,12 +78,12 @@ function Home() {
}
}
const handleSortChange = (newSort: string, newOrder: 'asc' | 'desc') => {
updateParams({ sort: newSort, order: newOrder, page: '1' });
const handleSearchChange = (value: string) => {
updateParams({ search: value, page: '1' });
};
const handleVisibilityChange = (value: string) => {
updateParams({ visibility: value, page: '1' });
const handleSortChange = (newSort: string, newOrder: 'asc' | 'desc') => {
updateParams({ sort: newSort, order: newOrder, page: '1' });
};
const handlePageChange = (newPage: number) => {
@@ -105,7 +94,7 @@ function Home() {
setSearchParams({});
};
const hasActiveFilters = visibility !== '';
const hasActiveFilters = search !== '';
const projects = projectsData?.items || [];
const pagination = projectsData?.pagination;
@@ -165,24 +154,18 @@ function Home() {
)}
<div className="list-controls">
<FilterDropdown
label="Visibility"
options={VISIBILITY_OPTIONS}
value={visibility}
onChange={handleVisibilityChange}
<SearchInput
value={search}
onChange={handleSearchChange}
placeholder="Search projects..."
className="list-controls__search"
/>
<SortDropdown options={SORT_OPTIONS} value={sort} order={order} onChange={handleSortChange} />
</div>
{hasActiveFilters && (
<FilterChipGroup onClearAll={clearFilters}>
{visibility && (
<FilterChip
label="Visibility"
value={visibility === 'public' ? 'Public' : 'Private'}
onRemove={() => handleVisibilityChange('')}
/>
)}
{search && <FilterChip label="Search" value={search} onRemove={() => handleSearchChange('')} />}
</FilterChipGroup>
)}

View File

@@ -325,7 +325,7 @@ function PackagePage() {
<SearchInput
value={search}
onChange={handleSearchChange}
placeholder="Filter tags..."
placeholder="Search tags..."
className="list-controls__search"
/>
<SortDropdown options={SORT_OPTIONS} value={sort} order={order} onChange={handleSortChange} />
@@ -333,7 +333,7 @@ function PackagePage() {
{hasActiveFilters && (
<FilterChipGroup onClearAll={clearFilters}>
{search && <FilterChip label="Filter" value={search} onRemove={() => handleSearchChange('')} />}
{search && <FilterChip label="Search" value={search} onRemove={() => handleSearchChange('')} />}
</FilterChipGroup>
)}

View File

@@ -226,7 +226,7 @@ function ProjectPage() {
<SearchInput
value={search}
onChange={handleSearchChange}
placeholder="Filter packages..."
placeholder="Search packages..."
className="list-controls__search"
/>
<select
@@ -246,7 +246,7 @@ function ProjectPage() {
{hasActiveFilters && (
<FilterChipGroup onClearAll={clearFilters}>
{search && <FilterChip label="Filter" value={search} onRemove={() => handleSearchChange('')} />}
{search && <FilterChip label="Search" value={search} onRemove={() => handleSearchChange('')} />}
{format && <FilterChip label="Format" value={format} onRemove={() => handleFormatChange('')} />}
</FilterChipGroup>
)}

View File

@@ -117,47 +117,3 @@ export interface UploadResponse {
package: string;
tag: string | null;
}
// Global search types
export interface SearchResultProject {
id: string;
name: string;
description: string | null;
is_public: boolean;
}
export interface SearchResultPackage {
id: string;
project_id: string;
project_name: string;
name: string;
description: string | null;
format: string;
}
export interface SearchResultArtifact {
tag_id: string;
tag_name: string;
artifact_id: string;
package_id: string;
package_name: string;
project_name: string;
original_name: string | null;
}
export interface GlobalSearchResponse {
query: string;
projects: SearchResultProject[];
packages: SearchResultPackage[];
artifacts: SearchResultArtifact[];
counts: {
projects: number;
packages: number;
artifacts: number;
total: number;
};
}
export interface ProjectListParams extends ListParams {
visibility?: 'public' | 'private';
}

View File

@@ -62,3 +62,5 @@ Orchard has been installed!
Endpoint: {{ include "orchard.minio.host" . }}
Bucket: {{ .Values.orchard.s3.bucket }}
{{- end }}
For more information, visit: https://git.bitstorm.ca/bitforge/orchard

View File

@@ -97,27 +97,10 @@ password
{{- end }}
{{/*
MinIO internal host (for server-side operations)
*/}}
{{- define "orchard.minio.internalHost" -}}
{{- if .Values.minio.enabled }}
{{- printf "http://%s-minio:9000" .Release.Name }}
{{- else }}
{{- .Values.orchard.s3.endpoint }}
{{- end }}
{{- end }}
{{/*
MinIO host (uses external URL if ingress enabled, for presigned URLs)
MinIO host
*/}}
{{- define "orchard.minio.host" -}}
{{- if and .Values.minio.enabled .Values.minioIngress.enabled .Values.minioIngress.host }}
{{- if .Values.minioIngress.tls.enabled }}
{{- printf "https://%s" .Values.minioIngress.host }}
{{- else }}
{{- printf "http://%s" .Values.minioIngress.host }}
{{- end }}
{{- else if .Values.minio.enabled }}
{{- if .Values.minio.enabled }}
{{- printf "http://%s-minio:9000" .Release.Name }}
{{- else }}
{{- .Values.orchard.s3.endpoint }}

View File

@@ -92,10 +92,6 @@ spec:
secretKeyRef:
name: {{ include "orchard.minio.secretName" . }}
key: {{ if .Values.minio.enabled }}root-password{{ else }}{{ .Values.orchard.s3.existingSecretSecretKeyKey }}{{ end }}
- name: ORCHARD_DOWNLOAD_MODE
value: {{ .Values.orchard.download.mode | quote }}
- name: ORCHARD_PRESIGNED_URL_EXPIRY
value: {{ .Values.orchard.download.presignedUrlExpiry | quote }}
livenessProbe:
{{- toYaml .Values.livenessProbe | nindent 12 }}
readinessProbe:

View File

@@ -1,34 +0,0 @@
{{- if and .Values.minio.enabled .Values.minioIngress.enabled -}}
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: {{ include "orchard.fullname" . }}-minio
labels:
{{- include "orchard.labels" . | nindent 4 }}
app.kubernetes.io/component: minio
{{- with .Values.minioIngress.annotations }}
annotations:
{{- toYaml . | nindent 4 }}
{{- end }}
spec:
{{- if .Values.minioIngress.className }}
ingressClassName: {{ .Values.minioIngress.className }}
{{- end }}
{{- if .Values.minioIngress.tls.enabled }}
tls:
- hosts:
- {{ .Values.minioIngress.host | quote }}
secretName: {{ .Values.minioIngress.tls.secretName }}
{{- end }}
rules:
- host: {{ .Values.minioIngress.host | quote }}
http:
paths:
- path: /
pathType: Prefix
backend:
service:
name: {{ .Release.Name }}-minio
port:
number: 9000
{{- end }}

View File

@@ -115,11 +115,6 @@ orchard:
existingSecretAccessKeyKey: "access-key-id"
existingSecretSecretKeyKey: "secret-access-key"
# Download configuration
download:
mode: "presigned" # presigned, redirect, or proxy
presignedUrlExpiry: 3600 # Presigned URL expiry in seconds
# PostgreSQL subchart configuration
postgresql:
enabled: true
@@ -153,18 +148,6 @@ minio:
enabled: false
size: 50Gi
# MinIO external ingress for presigned URL access (separate from subchart ingress)
minioIngress:
enabled: true
className: "nginx"
annotations:
cert-manager.io/cluster-issuer: "letsencrypt"
nginx.ingress.kubernetes.io/proxy-body-size: "0" # Disable body size limit for uploads
host: "minio-orch-dev.common.global.bsf.tools"
tls:
enabled: true
secretName: minio-tls
# Redis subchart configuration (for future caching)
redis:
enabled: false

View File

@@ -14,7 +14,6 @@ CREATE TABLE IF NOT EXISTS projects (
CREATE INDEX idx_projects_name ON projects(name);
CREATE INDEX idx_projects_created_by ON projects(created_by);
CREATE INDEX idx_projects_public ON projects(name) WHERE is_public = true;
-- Packages (collections within projects)
CREATE TABLE IF NOT EXISTS packages (
@@ -22,8 +21,6 @@ CREATE TABLE IF NOT EXISTS packages (
project_id UUID NOT NULL REFERENCES projects(id) ON DELETE CASCADE,
name VARCHAR(255) NOT NULL,
description TEXT,
format VARCHAR(50) DEFAULT 'generic', -- package type: generic, npm, pypi, docker, etc.
platform VARCHAR(50) DEFAULT 'any', -- target platform: any, linux, darwin, windows, etc.
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
UNIQUE(project_id, name)
@@ -31,30 +28,21 @@ CREATE TABLE IF NOT EXISTS packages (
CREATE INDEX idx_packages_project_id ON packages(project_id);
CREATE INDEX idx_packages_name ON packages(name);
CREATE INDEX idx_packages_format ON packages(format);
CREATE INDEX idx_packages_platform ON packages(platform);
-- Artifacts (Content-Addressable)
CREATE TABLE IF NOT EXISTS artifacts (
id VARCHAR(64) PRIMARY KEY, -- SHA256 hash
size BIGINT NOT NULL CHECK (size > 0),
size BIGINT NOT NULL,
content_type VARCHAR(255),
original_name VARCHAR(1024),
checksum_md5 VARCHAR(32), -- MD5 hash for additional verification
checksum_sha1 VARCHAR(40), -- SHA1 hash for compatibility
s3_etag VARCHAR(64), -- S3 ETag for verification
metadata JSONB, -- format-specific metadata
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
created_by VARCHAR(255) NOT NULL,
ref_count INTEGER DEFAULT 1 CHECK (ref_count >= 0),
ref_count INTEGER DEFAULT 1,
s3_key VARCHAR(1024) NOT NULL
);
CREATE INDEX idx_artifacts_created_at ON artifacts(created_at);
CREATE INDEX idx_artifacts_created_by ON artifacts(created_by);
CREATE INDEX idx_artifacts_metadata ON artifacts USING GIN (metadata);
CREATE INDEX idx_artifacts_checksum_md5 ON artifacts(checksum_md5) WHERE checksum_md5 IS NOT NULL;
CREATE INDEX idx_artifacts_checksum_sha1 ON artifacts(checksum_sha1) WHERE checksum_sha1 IS NOT NULL;
-- Tags (Aliases pointing to artifacts)
CREATE TABLE IF NOT EXISTS tags (
@@ -63,14 +51,12 @@ CREATE TABLE IF NOT EXISTS tags (
name VARCHAR(255) NOT NULL,
artifact_id VARCHAR(64) NOT NULL REFERENCES artifacts(id),
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
created_by VARCHAR(255) NOT NULL,
UNIQUE(package_id, name)
);
CREATE INDEX idx_tags_package_id ON tags(package_id);
CREATE INDEX idx_tags_artifact_id ON tags(artifact_id);
CREATE INDEX idx_tags_package_created_at ON tags(package_id, created_at DESC);
-- Tag History (for rollback capability)
CREATE TABLE IF NOT EXISTS tag_history (
@@ -78,13 +64,11 @@ CREATE TABLE IF NOT EXISTS tag_history (
tag_id UUID NOT NULL REFERENCES tags(id) ON DELETE CASCADE,
old_artifact_id VARCHAR(64) REFERENCES artifacts(id),
new_artifact_id VARCHAR(64) NOT NULL REFERENCES artifacts(id),
change_type VARCHAR(20) NOT NULL DEFAULT 'update' CHECK (change_type IN ('create', 'update', 'delete')),
changed_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
changed_by VARCHAR(255) NOT NULL
);
CREATE INDEX idx_tag_history_tag_id ON tag_history(tag_id);
CREATE INDEX idx_tag_history_changed_at ON tag_history(changed_at);
-- Uploads (upload event records)
CREATE TABLE IF NOT EXISTS uploads (
@@ -92,11 +76,6 @@ CREATE TABLE IF NOT EXISTS uploads (
artifact_id VARCHAR(64) NOT NULL REFERENCES artifacts(id),
package_id UUID NOT NULL REFERENCES packages(id),
original_name VARCHAR(1024),
tag_name VARCHAR(255), -- tag assigned during upload
user_agent VARCHAR(512), -- client identification
duration_ms INTEGER, -- upload timing in milliseconds
deduplicated BOOLEAN DEFAULT false, -- whether artifact was deduplicated
checksum_verified BOOLEAN DEFAULT true, -- whether checksum was verified
uploaded_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
uploaded_by VARCHAR(255) NOT NULL,
source_ip VARCHAR(45)
@@ -105,8 +84,6 @@ CREATE TABLE IF NOT EXISTS uploads (
CREATE INDEX idx_uploads_artifact_id ON uploads(artifact_id);
CREATE INDEX idx_uploads_package_id ON uploads(package_id);
CREATE INDEX idx_uploads_uploaded_at ON uploads(uploaded_at);
CREATE INDEX idx_uploads_package_uploaded_at ON uploads(package_id, uploaded_at DESC);
CREATE INDEX idx_uploads_uploaded_by_at ON uploads(uploaded_by, uploaded_at DESC);
-- Consumers (Dependency tracking)
CREATE TABLE IF NOT EXISTS consumers (
@@ -164,17 +141,14 @@ CREATE INDEX idx_audit_logs_action ON audit_logs(action);
CREATE INDEX idx_audit_logs_resource ON audit_logs(resource);
CREATE INDEX idx_audit_logs_user_id ON audit_logs(user_id);
CREATE INDEX idx_audit_logs_timestamp ON audit_logs(timestamp);
CREATE INDEX idx_audit_logs_resource_timestamp ON audit_logs(resource, timestamp DESC);
CREATE INDEX idx_audit_logs_user_timestamp ON audit_logs(user_id, timestamp DESC);
CREATE INDEX idx_audit_logs_details ON audit_logs USING GIN (details);
-- Trigger to update tag history on changes
CREATE OR REPLACE FUNCTION track_tag_changes()
RETURNS TRIGGER AS $$
BEGIN
IF TG_OP = 'UPDATE' AND OLD.artifact_id != NEW.artifact_id THEN
INSERT INTO tag_history (id, tag_id, old_artifact_id, new_artifact_id, change_type, changed_at, changed_by)
VALUES (gen_random_uuid(), NEW.id, OLD.artifact_id, NEW.artifact_id, 'update', NOW(), NEW.created_by);
INSERT INTO tag_history (id, tag_id, old_artifact_id, new_artifact_id, changed_at, changed_by)
VALUES (gen_random_uuid(), NEW.id, OLD.artifact_id, NEW.artifact_id, NOW(), NEW.created_by);
END IF;
RETURN NEW;
END;
@@ -184,72 +158,3 @@ CREATE TRIGGER tag_changes_trigger
AFTER UPDATE ON tags
FOR EACH ROW
EXECUTE FUNCTION track_tag_changes();
-- Trigger to auto-update updated_at timestamps
CREATE OR REPLACE FUNCTION update_updated_at_column()
RETURNS TRIGGER AS $$
BEGIN
NEW.updated_at = NOW();
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
CREATE TRIGGER projects_updated_at_trigger
BEFORE UPDATE ON projects
FOR EACH ROW
EXECUTE FUNCTION update_updated_at_column();
CREATE TRIGGER packages_updated_at_trigger
BEFORE UPDATE ON packages
FOR EACH ROW
EXECUTE FUNCTION update_updated_at_column();
CREATE TRIGGER tags_updated_at_trigger
BEFORE UPDATE ON tags
FOR EACH ROW
EXECUTE FUNCTION update_updated_at_column();
-- Triggers for maintaining artifact ref_count accuracy
CREATE OR REPLACE FUNCTION increment_artifact_ref_count()
RETURNS TRIGGER AS $$
BEGIN
UPDATE artifacts SET ref_count = ref_count + 1 WHERE id = NEW.artifact_id;
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
CREATE OR REPLACE FUNCTION decrement_artifact_ref_count()
RETURNS TRIGGER AS $$
BEGIN
UPDATE artifacts SET ref_count = ref_count - 1 WHERE id = OLD.artifact_id;
RETURN OLD;
END;
$$ LANGUAGE plpgsql;
CREATE OR REPLACE FUNCTION update_artifact_ref_count()
RETURNS TRIGGER AS $$
BEGIN
IF OLD.artifact_id != NEW.artifact_id THEN
UPDATE artifacts SET ref_count = ref_count - 1 WHERE id = OLD.artifact_id;
UPDATE artifacts SET ref_count = ref_count + 1 WHERE id = NEW.artifact_id;
END IF;
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
-- Note: ref_count triggers on tags table
-- These track how many tags reference each artifact
CREATE TRIGGER tags_ref_count_insert_trigger
AFTER INSERT ON tags
FOR EACH ROW
EXECUTE FUNCTION increment_artifact_ref_count();
CREATE TRIGGER tags_ref_count_delete_trigger
AFTER DELETE ON tags
FOR EACH ROW
EXECUTE FUNCTION decrement_artifact_ref_count();
CREATE TRIGGER tags_ref_count_update_trigger
AFTER UPDATE ON tags
FOR EACH ROW
EXECUTE FUNCTION update_artifact_ref_count();

View File

@@ -1,170 +0,0 @@
-- Migration 002: Schema Enhancements
-- Adds new fields, indexes, and triggers for improved functionality
-- ============================================
-- Packages: Add format and platform fields
-- ============================================
ALTER TABLE packages ADD COLUMN IF NOT EXISTS format VARCHAR(50) DEFAULT 'generic';
ALTER TABLE packages ADD COLUMN IF NOT EXISTS platform VARCHAR(50) DEFAULT 'any';
CREATE INDEX IF NOT EXISTS idx_packages_format ON packages(format);
CREATE INDEX IF NOT EXISTS idx_packages_platform ON packages(platform);
-- ============================================
-- Artifacts: Add checksum_md5, metadata, and CHECK constraints
-- ============================================
ALTER TABLE artifacts ADD COLUMN IF NOT EXISTS checksum_md5 VARCHAR(32);
ALTER TABLE artifacts ADD COLUMN IF NOT EXISTS metadata JSONB;
-- Add CHECK constraints (will fail if data violates them)
DO $$
BEGIN
IF NOT EXISTS (SELECT 1 FROM pg_constraint WHERE conname = 'artifacts_ref_count_check') THEN
ALTER TABLE artifacts ADD CONSTRAINT artifacts_ref_count_check CHECK (ref_count >= 0);
END IF;
IF NOT EXISTS (SELECT 1 FROM pg_constraint WHERE conname = 'artifacts_size_check') THEN
ALTER TABLE artifacts ADD CONSTRAINT artifacts_size_check CHECK (size > 0);
END IF;
END $$;
CREATE INDEX IF NOT EXISTS idx_artifacts_metadata ON artifacts USING GIN (metadata);
-- ============================================
-- Tags: Add updated_at and composite index
-- ============================================
ALTER TABLE tags ADD COLUMN IF NOT EXISTS updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW();
CREATE INDEX IF NOT EXISTS idx_tags_package_created_at ON tags(package_id, created_at DESC);
-- ============================================
-- Tag History: Add change_type and index
-- ============================================
ALTER TABLE tag_history ADD COLUMN IF NOT EXISTS change_type VARCHAR(20) DEFAULT 'update';
DO $$
BEGIN
IF NOT EXISTS (SELECT 1 FROM pg_constraint WHERE conname = 'tag_history_change_type_check') THEN
ALTER TABLE tag_history ADD CONSTRAINT tag_history_change_type_check
CHECK (change_type IN ('create', 'update', 'delete'));
END IF;
END $$;
CREATE INDEX IF NOT EXISTS idx_tag_history_changed_at ON tag_history(changed_at);
-- ============================================
-- Uploads: Add new fields and composite indexes
-- ============================================
ALTER TABLE uploads ADD COLUMN IF NOT EXISTS tag_name VARCHAR(255);
ALTER TABLE uploads ADD COLUMN IF NOT EXISTS user_agent VARCHAR(512);
ALTER TABLE uploads ADD COLUMN IF NOT EXISTS duration_ms INTEGER;
ALTER TABLE uploads ADD COLUMN IF NOT EXISTS deduplicated BOOLEAN DEFAULT false;
ALTER TABLE uploads ADD COLUMN IF NOT EXISTS checksum_verified BOOLEAN DEFAULT true;
CREATE INDEX IF NOT EXISTS idx_uploads_package_uploaded_at ON uploads(package_id, uploaded_at DESC);
CREATE INDEX IF NOT EXISTS idx_uploads_uploaded_by_at ON uploads(uploaded_by, uploaded_at DESC);
-- ============================================
-- Audit Logs: Add composite indexes and GIN index
-- ============================================
CREATE INDEX IF NOT EXISTS idx_audit_logs_resource_timestamp ON audit_logs(resource, timestamp DESC);
CREATE INDEX IF NOT EXISTS idx_audit_logs_user_timestamp ON audit_logs(user_id, timestamp DESC);
CREATE INDEX IF NOT EXISTS idx_audit_logs_details ON audit_logs USING GIN (details);
-- ============================================
-- Projects: Add partial index for public projects
-- ============================================
CREATE INDEX IF NOT EXISTS idx_projects_public ON projects(name) WHERE is_public = true;
-- ============================================
-- Triggers: Update tag_changes trigger for change_type
-- ============================================
CREATE OR REPLACE FUNCTION track_tag_changes()
RETURNS TRIGGER AS $$
BEGIN
IF TG_OP = 'UPDATE' AND OLD.artifact_id != NEW.artifact_id THEN
INSERT INTO tag_history (id, tag_id, old_artifact_id, new_artifact_id, change_type, changed_at, changed_by)
VALUES (gen_random_uuid(), NEW.id, OLD.artifact_id, NEW.artifact_id, 'update', NOW(), NEW.created_by);
END IF;
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
-- ============================================
-- Triggers: Auto-update updated_at timestamps
-- ============================================
CREATE OR REPLACE FUNCTION update_updated_at_column()
RETURNS TRIGGER AS $$
BEGIN
NEW.updated_at = NOW();
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
-- Drop triggers if they exist, then recreate
DROP TRIGGER IF EXISTS projects_updated_at_trigger ON projects;
CREATE TRIGGER projects_updated_at_trigger
BEFORE UPDATE ON projects
FOR EACH ROW
EXECUTE FUNCTION update_updated_at_column();
DROP TRIGGER IF EXISTS packages_updated_at_trigger ON packages;
CREATE TRIGGER packages_updated_at_trigger
BEFORE UPDATE ON packages
FOR EACH ROW
EXECUTE FUNCTION update_updated_at_column();
DROP TRIGGER IF EXISTS tags_updated_at_trigger ON tags;
CREATE TRIGGER tags_updated_at_trigger
BEFORE UPDATE ON tags
FOR EACH ROW
EXECUTE FUNCTION update_updated_at_column();
-- ============================================
-- Triggers: Maintain artifact ref_count accuracy
-- ============================================
CREATE OR REPLACE FUNCTION increment_artifact_ref_count()
RETURNS TRIGGER AS $$
BEGIN
UPDATE artifacts SET ref_count = ref_count + 1 WHERE id = NEW.artifact_id;
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
CREATE OR REPLACE FUNCTION decrement_artifact_ref_count()
RETURNS TRIGGER AS $$
BEGIN
UPDATE artifacts SET ref_count = ref_count - 1 WHERE id = OLD.artifact_id;
RETURN OLD;
END;
$$ LANGUAGE plpgsql;
CREATE OR REPLACE FUNCTION update_artifact_ref_count()
RETURNS TRIGGER AS $$
BEGIN
IF OLD.artifact_id != NEW.artifact_id THEN
UPDATE artifacts SET ref_count = ref_count - 1 WHERE id = OLD.artifact_id;
UPDATE artifacts SET ref_count = ref_count + 1 WHERE id = NEW.artifact_id;
END IF;
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
-- Note: ref_count triggers on tags table
-- These track how many tags reference each artifact
DROP TRIGGER IF EXISTS tags_ref_count_insert_trigger ON tags;
CREATE TRIGGER tags_ref_count_insert_trigger
AFTER INSERT ON tags
FOR EACH ROW
EXECUTE FUNCTION increment_artifact_ref_count();
DROP TRIGGER IF EXISTS tags_ref_count_delete_trigger ON tags;
CREATE TRIGGER tags_ref_count_delete_trigger
AFTER DELETE ON tags
FOR EACH ROW
EXECUTE FUNCTION decrement_artifact_ref_count();
DROP TRIGGER IF EXISTS tags_ref_count_update_trigger ON tags;
CREATE TRIGGER tags_ref_count_update_trigger
AFTER UPDATE ON tags
FOR EACH ROW
EXECUTE FUNCTION update_artifact_ref_count();

View File

@@ -1,12 +0,0 @@
-- Migration 003: Additional Checksum Fields
-- Adds checksum_sha1 and s3_etag fields to artifacts table
-- ============================================
-- Artifacts: Add checksum_sha1 and s3_etag fields
-- ============================================
ALTER TABLE artifacts ADD COLUMN IF NOT EXISTS checksum_sha1 VARCHAR(40);
ALTER TABLE artifacts ADD COLUMN IF NOT EXISTS s3_etag VARCHAR(64);
-- Create indexes for checksum lookups (optional, for verification queries)
CREATE INDEX IF NOT EXISTS idx_artifacts_checksum_md5 ON artifacts(checksum_md5) WHERE checksum_md5 IS NOT NULL;
CREATE INDEX IF NOT EXISTS idx_artifacts_checksum_sha1 ON artifacts(checksum_sha1) WHERE checksum_sha1 IS NOT NULL;

View File

@@ -1,2 +0,0 @@
# Import docker project template tasks
from prosper.projects.docker import *