Files
orchard/backend/app/routes.py
Mondo Diaz b124b94b56 Store SHA256 checksums with artifacts and add multiple hash support
- Add sha256 field to API responses as explicit alias of artifact id
- Add checksum_sha1 and s3_etag fields to artifacts table
- Compute MD5, SHA1, and capture S3 ETag during upload
- Update StorageResult to return all checksums from storage layer
- Add migration 003_checksum_fields.sql for existing databases
- Add Dockerfile.local and docker-compose.local.yml for local development
- Update schemas to include all checksum fields in responses
2025-12-15 14:15:37 -06:00

1321 lines
46 KiB
Python

from datetime import datetime
from fastapi import APIRouter, Depends, HTTPException, UploadFile, File, Form, Request, Query, Header, Response
from fastapi.responses import StreamingResponse
from sqlalchemy.orm import Session
from sqlalchemy import or_, func
from typing import List, Optional
import math
import re
import io
import hashlib
from .database import get_db
from .storage import get_storage, S3Storage, MULTIPART_CHUNK_SIZE
from .models import Project, Package, Artifact, Tag, TagHistory, Upload, Consumer
from .schemas import (
ProjectCreate, ProjectResponse,
PackageCreate, PackageResponse, PackageDetailResponse, TagSummary,
PACKAGE_FORMATS, PACKAGE_PLATFORMS,
ArtifactResponse, ArtifactDetailResponse, ArtifactTagInfo, PackageArtifactResponse,
TagCreate, TagResponse, TagDetailResponse, TagHistoryResponse,
UploadResponse,
ConsumerResponse,
HealthResponse,
PaginatedResponse, PaginationMeta,
ResumableUploadInitRequest,
ResumableUploadInitResponse,
ResumableUploadPartResponse,
ResumableUploadCompleteRequest,
ResumableUploadCompleteResponse,
ResumableUploadStatusResponse,
GlobalSearchResponse, SearchResultProject, SearchResultPackage, SearchResultArtifact,
)
from .metadata import extract_metadata
router = APIRouter()
def get_user_id(request: Request) -> str:
"""Extract user ID from request (simplified for now)"""
api_key = request.headers.get("X-Orchard-API-Key")
if api_key:
return "api-user"
auth = request.headers.get("Authorization")
if auth:
return "bearer-user"
return "anonymous"
# Health check
@router.get("/health", response_model=HealthResponse)
def health_check():
return HealthResponse(status="ok")
# Global search
@router.get("/api/v1/search", response_model=GlobalSearchResponse)
def global_search(
request: Request,
q: str = Query(..., min_length=1, description="Search query"),
limit: int = Query(default=5, ge=1, le=20, description="Results per type"),
db: Session = Depends(get_db),
):
"""
Search across all entity types (projects, packages, artifacts/tags).
Returns limited results for each type plus total counts.
"""
user_id = get_user_id(request)
search_lower = q.lower()
# Search projects (name and description)
project_query = db.query(Project).filter(
or_(Project.is_public == True, Project.created_by == user_id),
or_(
func.lower(Project.name).contains(search_lower),
func.lower(Project.description).contains(search_lower)
)
)
project_count = project_query.count()
projects = project_query.order_by(Project.name).limit(limit).all()
# Search packages (name and description) with project name
package_query = db.query(Package, Project.name.label("project_name")).join(
Project, Package.project_id == Project.id
).filter(
or_(Project.is_public == True, Project.created_by == user_id),
or_(
func.lower(Package.name).contains(search_lower),
func.lower(Package.description).contains(search_lower)
)
)
package_count = package_query.count()
package_results = package_query.order_by(Package.name).limit(limit).all()
# Search tags/artifacts (tag name and original filename)
artifact_query = db.query(
Tag, Artifact, Package.name.label("package_name"), Project.name.label("project_name")
).join(
Artifact, Tag.artifact_id == Artifact.id
).join(
Package, Tag.package_id == Package.id
).join(
Project, Package.project_id == Project.id
).filter(
or_(Project.is_public == True, Project.created_by == user_id),
or_(
func.lower(Tag.name).contains(search_lower),
func.lower(Artifact.original_name).contains(search_lower)
)
)
artifact_count = artifact_query.count()
artifact_results = artifact_query.order_by(Tag.name).limit(limit).all()
return GlobalSearchResponse(
query=q,
projects=[SearchResultProject(
id=p.id,
name=p.name,
description=p.description,
is_public=p.is_public
) for p in projects],
packages=[SearchResultPackage(
id=pkg.id,
project_id=pkg.project_id,
project_name=project_name,
name=pkg.name,
description=pkg.description,
format=pkg.format
) for pkg, project_name in package_results],
artifacts=[SearchResultArtifact(
tag_id=tag.id,
tag_name=tag.name,
artifact_id=artifact.id,
package_id=tag.package_id,
package_name=package_name,
project_name=project_name,
original_name=artifact.original_name
) for tag, artifact, package_name, project_name in artifact_results],
counts={
"projects": project_count,
"packages": package_count,
"artifacts": artifact_count,
"total": project_count + package_count + artifact_count
}
)
# Project routes
@router.get("/api/v1/projects", response_model=PaginatedResponse[ProjectResponse])
def list_projects(
request: Request,
page: int = Query(default=1, ge=1, description="Page number"),
limit: int = Query(default=20, ge=1, le=100, description="Items per page"),
search: Optional[str] = Query(default=None, description="Search by project name or description"),
visibility: Optional[str] = Query(default=None, description="Filter by visibility (public, private)"),
sort: str = Query(default="name", description="Sort field (name, created_at, updated_at)"),
order: str = Query(default="asc", description="Sort order (asc, desc)"),
db: Session = Depends(get_db),
):
user_id = get_user_id(request)
# Validate sort field
valid_sort_fields = {"name": Project.name, "created_at": Project.created_at, "updated_at": Project.updated_at}
if sort not in valid_sort_fields:
raise HTTPException(status_code=400, detail=f"Invalid sort field. Must be one of: {', '.join(valid_sort_fields.keys())}")
# Validate order
if order not in ("asc", "desc"):
raise HTTPException(status_code=400, detail="Invalid order. Must be 'asc' or 'desc'")
# Base query - filter by access
query = db.query(Project).filter(
or_(Project.is_public == True, Project.created_by == user_id)
)
# Apply visibility filter
if visibility == "public":
query = query.filter(Project.is_public == True)
elif visibility == "private":
query = query.filter(Project.is_public == False, Project.created_by == user_id)
# Apply search filter (case-insensitive on name and description)
if search:
search_lower = search.lower()
query = query.filter(
or_(
func.lower(Project.name).contains(search_lower),
func.lower(Project.description).contains(search_lower)
)
)
# Get total count before pagination
total = query.count()
# Apply sorting
sort_column = valid_sort_fields[sort]
if order == "desc":
query = query.order_by(sort_column.desc())
else:
query = query.order_by(sort_column.asc())
# Apply pagination
offset = (page - 1) * limit
projects = query.offset(offset).limit(limit).all()
# Calculate total pages
total_pages = math.ceil(total / limit) if total > 0 else 1
return PaginatedResponse(
items=projects,
pagination=PaginationMeta(
page=page,
limit=limit,
total=total,
total_pages=total_pages,
),
)
@router.post("/api/v1/projects", response_model=ProjectResponse)
def create_project(project: ProjectCreate, request: Request, db: Session = Depends(get_db)):
user_id = get_user_id(request)
existing = db.query(Project).filter(Project.name == project.name).first()
if existing:
raise HTTPException(status_code=400, detail="Project already exists")
db_project = Project(
name=project.name,
description=project.description,
is_public=project.is_public,
created_by=user_id,
)
db.add(db_project)
db.commit()
db.refresh(db_project)
return db_project
@router.get("/api/v1/projects/{project_name}", response_model=ProjectResponse)
def get_project(project_name: str, db: Session = Depends(get_db)):
project = db.query(Project).filter(Project.name == project_name).first()
if not project:
raise HTTPException(status_code=404, detail="Project not found")
return project
# Package routes
@router.get("/api/v1/project/{project_name}/packages", response_model=PaginatedResponse[PackageDetailResponse])
def list_packages(
project_name: str,
page: int = Query(default=1, ge=1, description="Page number"),
limit: int = Query(default=20, ge=1, le=100, description="Items per page"),
search: Optional[str] = Query(default=None, description="Search by name or description"),
sort: str = Query(default="name", description="Sort field (name, created_at, updated_at)"),
order: str = Query(default="asc", description="Sort order (asc, desc)"),
format: Optional[str] = Query(default=None, description="Filter by package format"),
platform: Optional[str] = Query(default=None, description="Filter by platform"),
db: Session = Depends(get_db),
):
project = db.query(Project).filter(Project.name == project_name).first()
if not project:
raise HTTPException(status_code=404, detail="Project not found")
# Validate sort field
valid_sort_fields = {"name": Package.name, "created_at": Package.created_at, "updated_at": Package.updated_at}
if sort not in valid_sort_fields:
raise HTTPException(status_code=400, detail=f"Invalid sort field. Must be one of: {', '.join(valid_sort_fields.keys())}")
# Validate order
if order not in ("asc", "desc"):
raise HTTPException(status_code=400, detail="Invalid order. Must be 'asc' or 'desc'")
# Validate format filter
if format and format not in PACKAGE_FORMATS:
raise HTTPException(status_code=400, detail=f"Invalid format. Must be one of: {', '.join(PACKAGE_FORMATS)}")
# Validate platform filter
if platform and platform not in PACKAGE_PLATFORMS:
raise HTTPException(status_code=400, detail=f"Invalid platform. Must be one of: {', '.join(PACKAGE_PLATFORMS)}")
# Base query
query = db.query(Package).filter(Package.project_id == project.id)
# Apply search filter (case-insensitive on name and description)
if search:
search_lower = search.lower()
query = query.filter(
or_(
func.lower(Package.name).contains(search_lower),
func.lower(Package.description).contains(search_lower)
)
)
# Apply format filter
if format:
query = query.filter(Package.format == format)
# Apply platform filter
if platform:
query = query.filter(Package.platform == platform)
# Get total count before pagination
total = query.count()
# Apply sorting
sort_column = valid_sort_fields[sort]
if order == "desc":
query = query.order_by(sort_column.desc())
else:
query = query.order_by(sort_column.asc())
# Apply pagination
offset = (page - 1) * limit
packages = query.offset(offset).limit(limit).all()
# Calculate total pages
total_pages = math.ceil(total / limit) if total > 0 else 1
# Build detailed responses with aggregated data
detailed_packages = []
for pkg in packages:
# Get tag count
tag_count = db.query(func.count(Tag.id)).filter(Tag.package_id == pkg.id).scalar() or 0
# Get unique artifact count and total size via uploads
artifact_stats = db.query(
func.count(func.distinct(Upload.artifact_id)),
func.coalesce(func.sum(Artifact.size), 0)
).join(Artifact, Upload.artifact_id == Artifact.id).filter(
Upload.package_id == pkg.id
).first()
artifact_count = artifact_stats[0] if artifact_stats else 0
total_size = artifact_stats[1] if artifact_stats else 0
# Get latest tag
latest_tag_obj = db.query(Tag).filter(
Tag.package_id == pkg.id
).order_by(Tag.created_at.desc()).first()
latest_tag = latest_tag_obj.name if latest_tag_obj else None
# Get latest upload timestamp
latest_upload = db.query(func.max(Upload.uploaded_at)).filter(
Upload.package_id == pkg.id
).scalar()
# Get recent tags (limit 5)
recent_tags_objs = db.query(Tag).filter(
Tag.package_id == pkg.id
).order_by(Tag.created_at.desc()).limit(5).all()
recent_tags = [
TagSummary(name=t.name, artifact_id=t.artifact_id, created_at=t.created_at)
for t in recent_tags_objs
]
detailed_packages.append(PackageDetailResponse(
id=pkg.id,
project_id=pkg.project_id,
name=pkg.name,
description=pkg.description,
format=pkg.format,
platform=pkg.platform,
created_at=pkg.created_at,
updated_at=pkg.updated_at,
tag_count=tag_count,
artifact_count=artifact_count,
total_size=total_size,
latest_tag=latest_tag,
latest_upload_at=latest_upload,
recent_tags=recent_tags,
))
return PaginatedResponse(
items=detailed_packages,
pagination=PaginationMeta(
page=page,
limit=limit,
total=total,
total_pages=total_pages,
),
)
@router.get("/api/v1/project/{project_name}/packages/{package_name}", response_model=PackageDetailResponse)
def get_package(
project_name: str,
package_name: str,
include_tags: bool = Query(default=False, description="Include all tags (not just recent 5)"),
db: Session = Depends(get_db),
):
"""Get a single package with full metadata"""
project = db.query(Project).filter(Project.name == project_name).first()
if not project:
raise HTTPException(status_code=404, detail="Project not found")
pkg = db.query(Package).filter(
Package.project_id == project.id,
Package.name == package_name
).first()
if not pkg:
raise HTTPException(status_code=404, detail="Package not found")
# Get tag count
tag_count = db.query(func.count(Tag.id)).filter(Tag.package_id == pkg.id).scalar() or 0
# Get unique artifact count and total size via uploads
artifact_stats = db.query(
func.count(func.distinct(Upload.artifact_id)),
func.coalesce(func.sum(Artifact.size), 0)
).join(Artifact, Upload.artifact_id == Artifact.id).filter(
Upload.package_id == pkg.id
).first()
artifact_count = artifact_stats[0] if artifact_stats else 0
total_size = artifact_stats[1] if artifact_stats else 0
# Get latest tag
latest_tag_obj = db.query(Tag).filter(
Tag.package_id == pkg.id
).order_by(Tag.created_at.desc()).first()
latest_tag = latest_tag_obj.name if latest_tag_obj else None
# Get latest upload timestamp
latest_upload = db.query(func.max(Upload.uploaded_at)).filter(
Upload.package_id == pkg.id
).scalar()
# Get tags (all if include_tags=true, else limit 5)
tags_query = db.query(Tag).filter(Tag.package_id == pkg.id).order_by(Tag.created_at.desc())
if not include_tags:
tags_query = tags_query.limit(5)
tags_objs = tags_query.all()
recent_tags = [
TagSummary(name=t.name, artifact_id=t.artifact_id, created_at=t.created_at)
for t in tags_objs
]
return PackageDetailResponse(
id=pkg.id,
project_id=pkg.project_id,
name=pkg.name,
description=pkg.description,
format=pkg.format,
platform=pkg.platform,
created_at=pkg.created_at,
updated_at=pkg.updated_at,
tag_count=tag_count,
artifact_count=artifact_count,
total_size=total_size,
latest_tag=latest_tag,
latest_upload_at=latest_upload,
recent_tags=recent_tags,
)
@router.post("/api/v1/project/{project_name}/packages", response_model=PackageResponse)
def create_package(project_name: str, package: PackageCreate, db: Session = Depends(get_db)):
project = db.query(Project).filter(Project.name == project_name).first()
if not project:
raise HTTPException(status_code=404, detail="Project not found")
# Validate format
if package.format not in PACKAGE_FORMATS:
raise HTTPException(status_code=400, detail=f"Invalid format. Must be one of: {', '.join(PACKAGE_FORMATS)}")
# Validate platform
if package.platform not in PACKAGE_PLATFORMS:
raise HTTPException(status_code=400, detail=f"Invalid platform. Must be one of: {', '.join(PACKAGE_PLATFORMS)}")
existing = db.query(Package).filter(Package.project_id == project.id, Package.name == package.name).first()
if existing:
raise HTTPException(status_code=400, detail="Package already exists in this project")
db_package = Package(
project_id=project.id,
name=package.name,
description=package.description,
format=package.format,
platform=package.platform,
)
db.add(db_package)
db.commit()
db.refresh(db_package)
return db_package
# Upload artifact
@router.post("/api/v1/project/{project_name}/{package_name}/upload", response_model=UploadResponse)
def upload_artifact(
project_name: str,
package_name: str,
request: Request,
file: UploadFile = File(...),
tag: Optional[str] = Form(None),
db: Session = Depends(get_db),
storage: S3Storage = Depends(get_storage),
content_length: Optional[int] = Header(None, alias="Content-Length"),
):
user_id = get_user_id(request)
# Get project and package
project = db.query(Project).filter(Project.name == project_name).first()
if not project:
raise HTTPException(status_code=404, detail="Project not found")
package = db.query(Package).filter(Package.project_id == project.id, Package.name == package_name).first()
if not package:
raise HTTPException(status_code=404, detail="Package not found")
# Extract format-specific metadata before storing
file_metadata = {}
if file.filename:
# Read file into memory for metadata extraction
file_content = file.file.read()
file.file.seek(0)
# Extract metadata
file_metadata = extract_metadata(
io.BytesIO(file_content),
file.filename,
file.content_type
)
# Store file (uses multipart for large files)
storage_result = storage.store(file.file, content_length)
# Check if this is a deduplicated upload
deduplicated = False
# Create or update artifact record
artifact = db.query(Artifact).filter(Artifact.id == storage_result.sha256).first()
if artifact:
artifact.ref_count += 1
deduplicated = True
# Merge metadata if new metadata was extracted
if file_metadata and artifact.artifact_metadata:
artifact.artifact_metadata = {**artifact.artifact_metadata, **file_metadata}
elif file_metadata:
artifact.artifact_metadata = file_metadata
# Update checksums if not already set
if not artifact.checksum_md5 and storage_result.md5:
artifact.checksum_md5 = storage_result.md5
if not artifact.checksum_sha1 and storage_result.sha1:
artifact.checksum_sha1 = storage_result.sha1
if not artifact.s3_etag and storage_result.s3_etag:
artifact.s3_etag = storage_result.s3_etag
else:
artifact = Artifact(
id=storage_result.sha256,
size=storage_result.size,
content_type=file.content_type,
original_name=file.filename,
checksum_md5=storage_result.md5,
checksum_sha1=storage_result.sha1,
s3_etag=storage_result.s3_etag,
created_by=user_id,
s3_key=storage_result.s3_key,
artifact_metadata=file_metadata or {},
)
db.add(artifact)
# Record upload
upload = Upload(
artifact_id=storage_result.sha256,
package_id=package.id,
original_name=file.filename,
uploaded_by=user_id,
source_ip=request.client.host if request.client else None,
deduplicated=deduplicated,
)
db.add(upload)
# Create tag if provided
if tag:
existing_tag = db.query(Tag).filter(Tag.package_id == package.id, Tag.name == tag).first()
if existing_tag:
existing_tag.artifact_id = storage_result.sha256
existing_tag.created_by = user_id
else:
new_tag = Tag(
package_id=package.id,
name=tag,
artifact_id=storage_result.sha256,
created_by=user_id,
)
db.add(new_tag)
db.commit()
return UploadResponse(
artifact_id=storage_result.sha256,
sha256=storage_result.sha256,
size=storage_result.size,
project=project_name,
package=package_name,
tag=tag,
checksum_md5=storage_result.md5,
checksum_sha1=storage_result.sha1,
s3_etag=storage_result.s3_etag,
format_metadata=artifact.artifact_metadata,
deduplicated=deduplicated,
)
# Resumable upload endpoints
@router.post("/api/v1/project/{project_name}/{package_name}/upload/init", response_model=ResumableUploadInitResponse)
def init_resumable_upload(
project_name: str,
package_name: str,
init_request: ResumableUploadInitRequest,
request: Request,
db: Session = Depends(get_db),
storage: S3Storage = Depends(get_storage),
):
"""
Initialize a resumable upload session.
Client must provide the SHA256 hash of the file in advance.
"""
user_id = get_user_id(request)
# Validate project and package
project = db.query(Project).filter(Project.name == project_name).first()
if not project:
raise HTTPException(status_code=404, detail="Project not found")
package = db.query(Package).filter(Package.project_id == project.id, Package.name == package_name).first()
if not package:
raise HTTPException(status_code=404, detail="Package not found")
# Check if artifact already exists (deduplication)
existing_artifact = db.query(Artifact).filter(Artifact.id == init_request.expected_hash).first()
if existing_artifact:
# File already exists - increment ref count and return immediately
existing_artifact.ref_count += 1
# Record the upload
upload = Upload(
artifact_id=init_request.expected_hash,
package_id=package.id,
original_name=init_request.filename,
uploaded_by=user_id,
source_ip=request.client.host if request.client else None,
)
db.add(upload)
# Create tag if provided
if init_request.tag:
existing_tag = db.query(Tag).filter(
Tag.package_id == package.id, Tag.name == init_request.tag
).first()
if existing_tag:
existing_tag.artifact_id = init_request.expected_hash
existing_tag.created_by = user_id
else:
new_tag = Tag(
package_id=package.id,
name=init_request.tag,
artifact_id=init_request.expected_hash,
created_by=user_id,
)
db.add(new_tag)
db.commit()
return ResumableUploadInitResponse(
upload_id=None,
already_exists=True,
artifact_id=init_request.expected_hash,
chunk_size=MULTIPART_CHUNK_SIZE,
)
# Initialize resumable upload
session = storage.initiate_resumable_upload(init_request.expected_hash)
return ResumableUploadInitResponse(
upload_id=session["upload_id"],
already_exists=False,
artifact_id=None,
chunk_size=MULTIPART_CHUNK_SIZE,
)
@router.put("/api/v1/project/{project_name}/{package_name}/upload/{upload_id}/part/{part_number}")
def upload_part(
project_name: str,
package_name: str,
upload_id: str,
part_number: int,
request: Request,
db: Session = Depends(get_db),
storage: S3Storage = Depends(get_storage),
):
"""
Upload a part of a resumable upload.
Part numbers start at 1.
"""
# Validate project and package exist
project = db.query(Project).filter(Project.name == project_name).first()
if not project:
raise HTTPException(status_code=404, detail="Project not found")
package = db.query(Package).filter(Package.project_id == project.id, Package.name == package_name).first()
if not package:
raise HTTPException(status_code=404, detail="Package not found")
if part_number < 1:
raise HTTPException(status_code=400, detail="Part number must be >= 1")
# Read part data from request body
import asyncio
loop = asyncio.new_event_loop()
async def read_body():
return await request.body()
try:
data = loop.run_until_complete(read_body())
finally:
loop.close()
if not data:
raise HTTPException(status_code=400, detail="No data in request body")
try:
part_info = storage.upload_part(upload_id, part_number, data)
return ResumableUploadPartResponse(
part_number=part_info["PartNumber"],
etag=part_info["ETag"],
)
except ValueError as e:
raise HTTPException(status_code=404, detail=str(e))
@router.post("/api/v1/project/{project_name}/{package_name}/upload/{upload_id}/complete")
def complete_resumable_upload(
project_name: str,
package_name: str,
upload_id: str,
complete_request: ResumableUploadCompleteRequest,
request: Request,
db: Session = Depends(get_db),
storage: S3Storage = Depends(get_storage),
):
"""Complete a resumable upload"""
user_id = get_user_id(request)
# Validate project and package
project = db.query(Project).filter(Project.name == project_name).first()
if not project:
raise HTTPException(status_code=404, detail="Project not found")
package = db.query(Package).filter(Package.project_id == project.id, Package.name == package_name).first()
if not package:
raise HTTPException(status_code=404, detail="Package not found")
try:
sha256_hash, s3_key = storage.complete_resumable_upload(upload_id)
except ValueError as e:
raise HTTPException(status_code=404, detail=str(e))
# Get file size from S3
obj_info = storage.get_object_info(s3_key)
size = obj_info["size"] if obj_info else 0
# Create artifact record
artifact = Artifact(
id=sha256_hash,
size=size,
s3_key=s3_key,
created_by=user_id,
format_metadata={},
)
db.add(artifact)
# Record upload
upload = Upload(
artifact_id=sha256_hash,
package_id=package.id,
uploaded_by=user_id,
source_ip=request.client.host if request.client else None,
)
db.add(upload)
# Create tag if provided
if complete_request.tag:
existing_tag = db.query(Tag).filter(
Tag.package_id == package.id, Tag.name == complete_request.tag
).first()
if existing_tag:
existing_tag.artifact_id = sha256_hash
existing_tag.created_by = user_id
else:
new_tag = Tag(
package_id=package.id,
name=complete_request.tag,
artifact_id=sha256_hash,
created_by=user_id,
)
db.add(new_tag)
db.commit()
return ResumableUploadCompleteResponse(
artifact_id=sha256_hash,
size=size,
project=project_name,
package=package_name,
tag=complete_request.tag,
)
@router.delete("/api/v1/project/{project_name}/{package_name}/upload/{upload_id}")
def abort_resumable_upload(
project_name: str,
package_name: str,
upload_id: str,
storage: S3Storage = Depends(get_storage),
):
"""Abort a resumable upload"""
try:
storage.abort_resumable_upload(upload_id)
return {"status": "aborted"}
except ValueError as e:
raise HTTPException(status_code=404, detail=str(e))
@router.get("/api/v1/project/{project_name}/{package_name}/upload/{upload_id}/status")
def get_upload_status(
project_name: str,
package_name: str,
upload_id: str,
storage: S3Storage = Depends(get_storage),
):
"""Get status of a resumable upload"""
try:
parts = storage.list_upload_parts(upload_id)
uploaded_parts = [p["PartNumber"] for p in parts]
total_bytes = sum(p.get("Size", 0) for p in parts)
return ResumableUploadStatusResponse(
upload_id=upload_id,
uploaded_parts=uploaded_parts,
total_uploaded_bytes=total_bytes,
)
except ValueError as e:
raise HTTPException(status_code=404, detail=str(e))
# Download artifact with range request support
@router.get("/api/v1/project/{project_name}/{package_name}/+/{ref}")
def download_artifact(
project_name: str,
package_name: str,
ref: str,
request: Request,
db: Session = Depends(get_db),
storage: S3Storage = Depends(get_storage),
range: Optional[str] = Header(None),
):
# Get project and package
project = db.query(Project).filter(Project.name == project_name).first()
if not project:
raise HTTPException(status_code=404, detail="Project not found")
package = db.query(Package).filter(Package.project_id == project.id, Package.name == package_name).first()
if not package:
raise HTTPException(status_code=404, detail="Package not found")
# Resolve reference to artifact
artifact = None
# Check for explicit prefixes
if ref.startswith("artifact:"):
artifact_id = ref[9:]
artifact = db.query(Artifact).filter(Artifact.id == artifact_id).first()
elif ref.startswith("tag:") or ref.startswith("version:"):
tag_name = ref.split(":", 1)[1]
tag = db.query(Tag).filter(Tag.package_id == package.id, Tag.name == tag_name).first()
if tag:
artifact = db.query(Artifact).filter(Artifact.id == tag.artifact_id).first()
else:
# Try as tag name first
tag = db.query(Tag).filter(Tag.package_id == package.id, Tag.name == ref).first()
if tag:
artifact = db.query(Artifact).filter(Artifact.id == tag.artifact_id).first()
else:
# Try as direct artifact ID
artifact = db.query(Artifact).filter(Artifact.id == ref).first()
if not artifact:
raise HTTPException(status_code=404, detail="Artifact not found")
filename = artifact.original_name or f"{artifact.id}"
# Handle range requests
if range:
stream, content_length, content_range = storage.get_stream(artifact.s3_key, range)
headers = {
"Content-Disposition": f'attachment; filename="{filename}"',
"Accept-Ranges": "bytes",
"Content-Length": str(content_length),
}
if content_range:
headers["Content-Range"] = content_range
return StreamingResponse(
stream,
status_code=206, # Partial Content
media_type=artifact.content_type or "application/octet-stream",
headers=headers,
)
# Full download
stream, content_length, _ = storage.get_stream(artifact.s3_key)
return StreamingResponse(
stream,
media_type=artifact.content_type or "application/octet-stream",
headers={
"Content-Disposition": f'attachment; filename="{filename}"',
"Accept-Ranges": "bytes",
"Content-Length": str(content_length),
},
)
# HEAD request for download (to check file info without downloading)
@router.head("/api/v1/project/{project_name}/{package_name}/+/{ref}")
def head_artifact(
project_name: str,
package_name: str,
ref: str,
db: Session = Depends(get_db),
storage: S3Storage = Depends(get_storage),
):
# Get project and package
project = db.query(Project).filter(Project.name == project_name).first()
if not project:
raise HTTPException(status_code=404, detail="Project not found")
package = db.query(Package).filter(Package.project_id == project.id, Package.name == package_name).first()
if not package:
raise HTTPException(status_code=404, detail="Package not found")
# Resolve reference to artifact (same logic as download)
artifact = None
if ref.startswith("artifact:"):
artifact_id = ref[9:]
artifact = db.query(Artifact).filter(Artifact.id == artifact_id).first()
elif ref.startswith("tag:") or ref.startswith("version:"):
tag_name = ref.split(":", 1)[1]
tag = db.query(Tag).filter(Tag.package_id == package.id, Tag.name == tag_name).first()
if tag:
artifact = db.query(Artifact).filter(Artifact.id == tag.artifact_id).first()
else:
tag = db.query(Tag).filter(Tag.package_id == package.id, Tag.name == ref).first()
if tag:
artifact = db.query(Artifact).filter(Artifact.id == tag.artifact_id).first()
else:
artifact = db.query(Artifact).filter(Artifact.id == ref).first()
if not artifact:
raise HTTPException(status_code=404, detail="Artifact not found")
filename = artifact.original_name or f"{artifact.id}"
return Response(
content=b"",
media_type=artifact.content_type or "application/octet-stream",
headers={
"Content-Disposition": f'attachment; filename="{filename}"',
"Accept-Ranges": "bytes",
"Content-Length": str(artifact.size),
"X-Artifact-Id": artifact.id,
},
)
# Compatibility route
@router.get("/project/{project_name}/{package_name}/+/{ref}")
def download_artifact_compat(
project_name: str,
package_name: str,
ref: str,
request: Request,
db: Session = Depends(get_db),
storage: S3Storage = Depends(get_storage),
range: Optional[str] = Header(None),
):
return download_artifact(project_name, package_name, ref, request, db, storage, range)
# Tag routes
@router.get("/api/v1/project/{project_name}/{package_name}/tags", response_model=PaginatedResponse[TagDetailResponse])
def list_tags(
project_name: str,
package_name: str,
page: int = Query(default=1, ge=1, description="Page number"),
limit: int = Query(default=20, ge=1, le=100, description="Items per page"),
search: Optional[str] = Query(default=None, description="Search by tag name"),
sort: str = Query(default="name", description="Sort field (name, created_at)"),
order: str = Query(default="asc", description="Sort order (asc, desc)"),
db: Session = Depends(get_db),
):
project = db.query(Project).filter(Project.name == project_name).first()
if not project:
raise HTTPException(status_code=404, detail="Project not found")
package = db.query(Package).filter(Package.project_id == project.id, Package.name == package_name).first()
if not package:
raise HTTPException(status_code=404, detail="Package not found")
# Validate sort field
valid_sort_fields = {"name": Tag.name, "created_at": Tag.created_at}
if sort not in valid_sort_fields:
raise HTTPException(status_code=400, detail=f"Invalid sort field. Must be one of: {', '.join(valid_sort_fields.keys())}")
# Validate order
if order not in ("asc", "desc"):
raise HTTPException(status_code=400, detail="Invalid order. Must be 'asc' or 'desc'")
# Base query with JOIN to artifact for metadata
query = db.query(Tag, Artifact).join(Artifact, Tag.artifact_id == Artifact.id).filter(Tag.package_id == package.id)
# Apply search filter (case-insensitive on tag name OR artifact original filename)
if search:
search_lower = search.lower()
query = query.filter(
or_(
func.lower(Tag.name).contains(search_lower),
func.lower(Artifact.original_name).contains(search_lower)
)
)
# Get total count before pagination
total = query.count()
# Apply sorting
sort_column = valid_sort_fields[sort]
if order == "desc":
query = query.order_by(sort_column.desc())
else:
query = query.order_by(sort_column.asc())
# Apply pagination
offset = (page - 1) * limit
results = query.offset(offset).limit(limit).all()
# Calculate total pages
total_pages = math.ceil(total / limit) if total > 0 else 1
# Build detailed responses with artifact metadata
detailed_tags = []
for tag, artifact in results:
detailed_tags.append(TagDetailResponse(
id=tag.id,
package_id=tag.package_id,
name=tag.name,
artifact_id=tag.artifact_id,
created_at=tag.created_at,
created_by=tag.created_by,
artifact_size=artifact.size,
artifact_content_type=artifact.content_type,
artifact_original_name=artifact.original_name,
artifact_created_at=artifact.created_at,
artifact_format_metadata=artifact.format_metadata,
))
return PaginatedResponse(
items=detailed_tags,
pagination=PaginationMeta(
page=page,
limit=limit,
total=total,
total_pages=total_pages,
),
)
@router.post("/api/v1/project/{project_name}/{package_name}/tags", response_model=TagResponse)
def create_tag(
project_name: str,
package_name: str,
tag: TagCreate,
request: Request,
db: Session = Depends(get_db),
):
user_id = get_user_id(request)
project = db.query(Project).filter(Project.name == project_name).first()
if not project:
raise HTTPException(status_code=404, detail="Project not found")
package = db.query(Package).filter(Package.project_id == project.id, Package.name == package_name).first()
if not package:
raise HTTPException(status_code=404, detail="Package not found")
# Verify artifact exists
artifact = db.query(Artifact).filter(Artifact.id == tag.artifact_id).first()
if not artifact:
raise HTTPException(status_code=404, detail="Artifact not found")
# Create or update tag
existing = db.query(Tag).filter(Tag.package_id == package.id, Tag.name == tag.name).first()
if existing:
existing.artifact_id = tag.artifact_id
existing.created_by = user_id
db.commit()
db.refresh(existing)
return existing
db_tag = Tag(
package_id=package.id,
name=tag.name,
artifact_id=tag.artifact_id,
created_by=user_id,
)
db.add(db_tag)
db.commit()
db.refresh(db_tag)
return db_tag
@router.get("/api/v1/project/{project_name}/{package_name}/tags/{tag_name}", response_model=TagDetailResponse)
def get_tag(
project_name: str,
package_name: str,
tag_name: str,
db: Session = Depends(get_db),
):
"""Get a single tag with full artifact metadata"""
project = db.query(Project).filter(Project.name == project_name).first()
if not project:
raise HTTPException(status_code=404, detail="Project not found")
package = db.query(Package).filter(Package.project_id == project.id, Package.name == package_name).first()
if not package:
raise HTTPException(status_code=404, detail="Package not found")
result = db.query(Tag, Artifact).join(Artifact, Tag.artifact_id == Artifact.id).filter(
Tag.package_id == package.id,
Tag.name == tag_name
).first()
if not result:
raise HTTPException(status_code=404, detail="Tag not found")
tag, artifact = result
return TagDetailResponse(
id=tag.id,
package_id=tag.package_id,
name=tag.name,
artifact_id=tag.artifact_id,
created_at=tag.created_at,
created_by=tag.created_by,
artifact_size=artifact.size,
artifact_content_type=artifact.content_type,
artifact_original_name=artifact.original_name,
artifact_created_at=artifact.created_at,
artifact_format_metadata=artifact.format_metadata,
)
@router.get("/api/v1/project/{project_name}/{package_name}/tags/{tag_name}/history", response_model=List[TagHistoryResponse])
def get_tag_history(
project_name: str,
package_name: str,
tag_name: str,
db: Session = Depends(get_db),
):
"""Get the history of artifact assignments for a tag"""
project = db.query(Project).filter(Project.name == project_name).first()
if not project:
raise HTTPException(status_code=404, detail="Project not found")
package = db.query(Package).filter(Package.project_id == project.id, Package.name == package_name).first()
if not package:
raise HTTPException(status_code=404, detail="Package not found")
tag = db.query(Tag).filter(Tag.package_id == package.id, Tag.name == tag_name).first()
if not tag:
raise HTTPException(status_code=404, detail="Tag not found")
history = db.query(TagHistory).filter(TagHistory.tag_id == tag.id).order_by(TagHistory.changed_at.desc()).all()
return history
# Consumer routes
@router.get("/api/v1/project/{project_name}/{package_name}/consumers", response_model=List[ConsumerResponse])
def get_consumers(project_name: str, package_name: str, db: Session = Depends(get_db)):
project = db.query(Project).filter(Project.name == project_name).first()
if not project:
raise HTTPException(status_code=404, detail="Project not found")
package = db.query(Package).filter(Package.project_id == project.id, Package.name == package_name).first()
if not package:
raise HTTPException(status_code=404, detail="Package not found")
consumers = db.query(Consumer).filter(Consumer.package_id == package.id).order_by(Consumer.last_access.desc()).all()
return consumers
# Package artifacts
@router.get("/api/v1/project/{project_name}/{package_name}/artifacts", response_model=PaginatedResponse[PackageArtifactResponse])
def list_package_artifacts(
project_name: str,
package_name: str,
page: int = Query(default=1, ge=1, description="Page number"),
limit: int = Query(default=20, ge=1, le=100, description="Items per page"),
content_type: Optional[str] = Query(default=None, description="Filter by content type"),
created_after: Optional[datetime] = Query(default=None, description="Filter artifacts created after this date"),
created_before: Optional[datetime] = Query(default=None, description="Filter artifacts created before this date"),
db: Session = Depends(get_db),
):
"""List all unique artifacts uploaded to a package"""
project = db.query(Project).filter(Project.name == project_name).first()
if not project:
raise HTTPException(status_code=404, detail="Project not found")
package = db.query(Package).filter(Package.project_id == project.id, Package.name == package_name).first()
if not package:
raise HTTPException(status_code=404, detail="Package not found")
# Get distinct artifacts uploaded to this package via uploads table
artifact_ids_subquery = db.query(func.distinct(Upload.artifact_id)).filter(
Upload.package_id == package.id
).subquery()
query = db.query(Artifact).filter(Artifact.id.in_(artifact_ids_subquery))
# Apply content_type filter
if content_type:
query = query.filter(Artifact.content_type == content_type)
# Apply date range filters
if created_after:
query = query.filter(Artifact.created_at >= created_after)
if created_before:
query = query.filter(Artifact.created_at <= created_before)
# Get total count before pagination
total = query.count()
# Apply pagination
offset = (page - 1) * limit
artifacts = query.order_by(Artifact.created_at.desc()).offset(offset).limit(limit).all()
# Calculate total pages
total_pages = math.ceil(total / limit) if total > 0 else 1
# Build responses with tag info
artifact_responses = []
for artifact in artifacts:
# Get tags pointing to this artifact in this package
tags = db.query(Tag.name).filter(
Tag.package_id == package.id,
Tag.artifact_id == artifact.id
).all()
tag_names = [t.name for t in tags]
artifact_responses.append(PackageArtifactResponse(
id=artifact.id,
size=artifact.size,
content_type=artifact.content_type,
original_name=artifact.original_name,
created_at=artifact.created_at,
created_by=artifact.created_by,
format_metadata=artifact.format_metadata,
tags=tag_names,
))
return PaginatedResponse(
items=artifact_responses,
pagination=PaginationMeta(
page=page,
limit=limit,
total=total,
total_pages=total_pages,
),
)
# Artifact by ID
@router.get("/api/v1/artifact/{artifact_id}", response_model=ArtifactDetailResponse)
def get_artifact(artifact_id: str, db: Session = Depends(get_db)):
"""Get artifact metadata including list of packages/tags referencing it"""
artifact = db.query(Artifact).filter(Artifact.id == artifact_id).first()
if not artifact:
raise HTTPException(status_code=404, detail="Artifact not found")
# Get all tags referencing this artifact with package and project info
tags_with_context = db.query(Tag, Package, Project).join(
Package, Tag.package_id == Package.id
).join(
Project, Package.project_id == Project.id
).filter(
Tag.artifact_id == artifact_id
).all()
tag_infos = [
ArtifactTagInfo(
id=tag.id,
name=tag.name,
package_id=package.id,
package_name=package.name,
project_name=project.name,
)
for tag, package, project in tags_with_context
]
return ArtifactDetailResponse(
id=artifact.id,
size=artifact.size,
content_type=artifact.content_type,
original_name=artifact.original_name,
created_at=artifact.created_at,
created_by=artifact.created_by,
ref_count=artifact.ref_count,
format_metadata=artifact.format_metadata,
tags=tag_infos,
)