Files
orchard/backend/app/routes.py
2025-12-11 18:05:08 -06:00

726 lines
24 KiB
Python

from fastapi import APIRouter, Depends, HTTPException, UploadFile, File, Form, Request, Query, Header, Response
from fastapi.responses import StreamingResponse
from sqlalchemy.orm import Session
from sqlalchemy import or_, func
from typing import List, Optional
import math
import re
import io
import hashlib
from .database import get_db
from .storage import get_storage, S3Storage, MULTIPART_CHUNK_SIZE
from .models import Project, Package, Artifact, Tag, Upload, Consumer
from .schemas import (
ProjectCreate, ProjectResponse,
PackageCreate, PackageResponse,
ArtifactResponse,
TagCreate, TagResponse,
UploadResponse,
ConsumerResponse,
HealthResponse,
PaginatedResponse, PaginationMeta,
ResumableUploadInitRequest,
ResumableUploadInitResponse,
ResumableUploadPartResponse,
ResumableUploadCompleteRequest,
ResumableUploadCompleteResponse,
ResumableUploadStatusResponse,
)
from .metadata import extract_metadata
router = APIRouter()
def get_user_id(request: Request) -> str:
"""Extract user ID from request (simplified for now)"""
api_key = request.headers.get("X-Orchard-API-Key")
if api_key:
return "api-user"
auth = request.headers.get("Authorization")
if auth:
return "bearer-user"
return "anonymous"
# Health check
@router.get("/health", response_model=HealthResponse)
def health_check():
return HealthResponse(status="ok")
# Project routes
@router.get("/api/v1/projects", response_model=PaginatedResponse[ProjectResponse])
def list_projects(
request: Request,
page: int = Query(default=1, ge=1, description="Page number"),
limit: int = Query(default=20, ge=1, le=100, description="Items per page"),
search: Optional[str] = Query(default=None, description="Search by project name"),
db: Session = Depends(get_db),
):
user_id = get_user_id(request)
# Base query - filter by access
query = db.query(Project).filter(
or_(Project.is_public == True, Project.created_by == user_id)
)
# Apply search filter (case-insensitive)
if search:
query = query.filter(func.lower(Project.name).contains(search.lower()))
# Get total count before pagination
total = query.count()
# Apply pagination
offset = (page - 1) * limit
projects = query.order_by(Project.name).offset(offset).limit(limit).all()
# Calculate total pages
total_pages = math.ceil(total / limit) if total > 0 else 1
return PaginatedResponse(
items=projects,
pagination=PaginationMeta(
page=page,
limit=limit,
total=total,
total_pages=total_pages,
),
)
@router.post("/api/v1/projects", response_model=ProjectResponse)
def create_project(project: ProjectCreate, request: Request, db: Session = Depends(get_db)):
user_id = get_user_id(request)
existing = db.query(Project).filter(Project.name == project.name).first()
if existing:
raise HTTPException(status_code=400, detail="Project already exists")
db_project = Project(
name=project.name,
description=project.description,
is_public=project.is_public,
created_by=user_id,
)
db.add(db_project)
db.commit()
db.refresh(db_project)
return db_project
@router.get("/api/v1/projects/{project_name}", response_model=ProjectResponse)
def get_project(project_name: str, db: Session = Depends(get_db)):
project = db.query(Project).filter(Project.name == project_name).first()
if not project:
raise HTTPException(status_code=404, detail="Project not found")
return project
# Package routes
@router.get("/api/v1/project/{project_name}/packages", response_model=List[PackageResponse])
def list_packages(project_name: str, db: Session = Depends(get_db)):
project = db.query(Project).filter(Project.name == project_name).first()
if not project:
raise HTTPException(status_code=404, detail="Project not found")
packages = db.query(Package).filter(Package.project_id == project.id).order_by(Package.name).all()
return packages
@router.post("/api/v1/project/{project_name}/packages", response_model=PackageResponse)
def create_package(project_name: str, package: PackageCreate, db: Session = Depends(get_db)):
project = db.query(Project).filter(Project.name == project_name).first()
if not project:
raise HTTPException(status_code=404, detail="Project not found")
existing = db.query(Package).filter(Package.project_id == project.id, Package.name == package.name).first()
if existing:
raise HTTPException(status_code=400, detail="Package already exists in this project")
db_package = Package(
project_id=project.id,
name=package.name,
description=package.description,
)
db.add(db_package)
db.commit()
db.refresh(db_package)
return db_package
# Upload artifact
@router.post("/api/v1/project/{project_name}/{package_name}/upload", response_model=UploadResponse)
def upload_artifact(
project_name: str,
package_name: str,
request: Request,
file: UploadFile = File(...),
tag: Optional[str] = Form(None),
db: Session = Depends(get_db),
storage: S3Storage = Depends(get_storage),
content_length: Optional[int] = Header(None, alias="Content-Length"),
):
user_id = get_user_id(request)
# Get project and package
project = db.query(Project).filter(Project.name == project_name).first()
if not project:
raise HTTPException(status_code=404, detail="Project not found")
package = db.query(Package).filter(Package.project_id == project.id, Package.name == package_name).first()
if not package:
raise HTTPException(status_code=404, detail="Package not found")
# Extract format-specific metadata before storing
file_metadata = {}
if file.filename:
# Read file into memory for metadata extraction
file_content = file.file.read()
file.file.seek(0)
# Extract metadata
file_metadata = extract_metadata(
io.BytesIO(file_content),
file.filename,
file.content_type
)
# Store file (uses multipart for large files)
sha256_hash, size, s3_key = storage.store(file.file, content_length)
# Check if this is a deduplicated upload
deduplicated = False
# Create or update artifact record
artifact = db.query(Artifact).filter(Artifact.id == sha256_hash).first()
if artifact:
artifact.ref_count += 1
deduplicated = True
# Merge metadata if new metadata was extracted
if file_metadata and artifact.format_metadata:
artifact.format_metadata = {**artifact.format_metadata, **file_metadata}
elif file_metadata:
artifact.format_metadata = file_metadata
else:
artifact = Artifact(
id=sha256_hash,
size=size,
content_type=file.content_type,
original_name=file.filename,
created_by=user_id,
s3_key=s3_key,
format_metadata=file_metadata or {},
)
db.add(artifact)
# Record upload
upload = Upload(
artifact_id=sha256_hash,
package_id=package.id,
original_name=file.filename,
uploaded_by=user_id,
source_ip=request.client.host if request.client else None,
)
db.add(upload)
# Create tag if provided
if tag:
existing_tag = db.query(Tag).filter(Tag.package_id == package.id, Tag.name == tag).first()
if existing_tag:
existing_tag.artifact_id = sha256_hash
existing_tag.created_by = user_id
else:
new_tag = Tag(
package_id=package.id,
name=tag,
artifact_id=sha256_hash,
created_by=user_id,
)
db.add(new_tag)
db.commit()
return UploadResponse(
artifact_id=sha256_hash,
size=size,
project=project_name,
package=package_name,
tag=tag,
format_metadata=artifact.format_metadata,
deduplicated=deduplicated,
)
# Resumable upload endpoints
@router.post("/api/v1/project/{project_name}/{package_name}/upload/init", response_model=ResumableUploadInitResponse)
def init_resumable_upload(
project_name: str,
package_name: str,
init_request: ResumableUploadInitRequest,
request: Request,
db: Session = Depends(get_db),
storage: S3Storage = Depends(get_storage),
):
"""
Initialize a resumable upload session.
Client must provide the SHA256 hash of the file in advance.
"""
user_id = get_user_id(request)
# Validate project and package
project = db.query(Project).filter(Project.name == project_name).first()
if not project:
raise HTTPException(status_code=404, detail="Project not found")
package = db.query(Package).filter(Package.project_id == project.id, Package.name == package_name).first()
if not package:
raise HTTPException(status_code=404, detail="Package not found")
# Check if artifact already exists (deduplication)
existing_artifact = db.query(Artifact).filter(Artifact.id == init_request.expected_hash).first()
if existing_artifact:
# File already exists - increment ref count and return immediately
existing_artifact.ref_count += 1
# Record the upload
upload = Upload(
artifact_id=init_request.expected_hash,
package_id=package.id,
original_name=init_request.filename,
uploaded_by=user_id,
source_ip=request.client.host if request.client else None,
)
db.add(upload)
# Create tag if provided
if init_request.tag:
existing_tag = db.query(Tag).filter(
Tag.package_id == package.id, Tag.name == init_request.tag
).first()
if existing_tag:
existing_tag.artifact_id = init_request.expected_hash
existing_tag.created_by = user_id
else:
new_tag = Tag(
package_id=package.id,
name=init_request.tag,
artifact_id=init_request.expected_hash,
created_by=user_id,
)
db.add(new_tag)
db.commit()
return ResumableUploadInitResponse(
upload_id=None,
already_exists=True,
artifact_id=init_request.expected_hash,
chunk_size=MULTIPART_CHUNK_SIZE,
)
# Initialize resumable upload
session = storage.initiate_resumable_upload(init_request.expected_hash)
return ResumableUploadInitResponse(
upload_id=session["upload_id"],
already_exists=False,
artifact_id=None,
chunk_size=MULTIPART_CHUNK_SIZE,
)
@router.put("/api/v1/project/{project_name}/{package_name}/upload/{upload_id}/part/{part_number}")
def upload_part(
project_name: str,
package_name: str,
upload_id: str,
part_number: int,
request: Request,
db: Session = Depends(get_db),
storage: S3Storage = Depends(get_storage),
):
"""
Upload a part of a resumable upload.
Part numbers start at 1.
"""
# Validate project and package exist
project = db.query(Project).filter(Project.name == project_name).first()
if not project:
raise HTTPException(status_code=404, detail="Project not found")
package = db.query(Package).filter(Package.project_id == project.id, Package.name == package_name).first()
if not package:
raise HTTPException(status_code=404, detail="Package not found")
if part_number < 1:
raise HTTPException(status_code=400, detail="Part number must be >= 1")
# Read part data from request body
import asyncio
loop = asyncio.new_event_loop()
async def read_body():
return await request.body()
try:
data = loop.run_until_complete(read_body())
finally:
loop.close()
if not data:
raise HTTPException(status_code=400, detail="No data in request body")
try:
part_info = storage.upload_part(upload_id, part_number, data)
return ResumableUploadPartResponse(
part_number=part_info["PartNumber"],
etag=part_info["ETag"],
)
except ValueError as e:
raise HTTPException(status_code=404, detail=str(e))
@router.post("/api/v1/project/{project_name}/{package_name}/upload/{upload_id}/complete")
def complete_resumable_upload(
project_name: str,
package_name: str,
upload_id: str,
complete_request: ResumableUploadCompleteRequest,
request: Request,
db: Session = Depends(get_db),
storage: S3Storage = Depends(get_storage),
):
"""Complete a resumable upload"""
user_id = get_user_id(request)
# Validate project and package
project = db.query(Project).filter(Project.name == project_name).first()
if not project:
raise HTTPException(status_code=404, detail="Project not found")
package = db.query(Package).filter(Package.project_id == project.id, Package.name == package_name).first()
if not package:
raise HTTPException(status_code=404, detail="Package not found")
try:
sha256_hash, s3_key = storage.complete_resumable_upload(upload_id)
except ValueError as e:
raise HTTPException(status_code=404, detail=str(e))
# Get file size from S3
obj_info = storage.get_object_info(s3_key)
size = obj_info["size"] if obj_info else 0
# Create artifact record
artifact = Artifact(
id=sha256_hash,
size=size,
s3_key=s3_key,
created_by=user_id,
format_metadata={},
)
db.add(artifact)
# Record upload
upload = Upload(
artifact_id=sha256_hash,
package_id=package.id,
uploaded_by=user_id,
source_ip=request.client.host if request.client else None,
)
db.add(upload)
# Create tag if provided
if complete_request.tag:
existing_tag = db.query(Tag).filter(
Tag.package_id == package.id, Tag.name == complete_request.tag
).first()
if existing_tag:
existing_tag.artifact_id = sha256_hash
existing_tag.created_by = user_id
else:
new_tag = Tag(
package_id=package.id,
name=complete_request.tag,
artifact_id=sha256_hash,
created_by=user_id,
)
db.add(new_tag)
db.commit()
return ResumableUploadCompleteResponse(
artifact_id=sha256_hash,
size=size,
project=project_name,
package=package_name,
tag=complete_request.tag,
)
@router.delete("/api/v1/project/{project_name}/{package_name}/upload/{upload_id}")
def abort_resumable_upload(
project_name: str,
package_name: str,
upload_id: str,
storage: S3Storage = Depends(get_storage),
):
"""Abort a resumable upload"""
try:
storage.abort_resumable_upload(upload_id)
return {"status": "aborted"}
except ValueError as e:
raise HTTPException(status_code=404, detail=str(e))
@router.get("/api/v1/project/{project_name}/{package_name}/upload/{upload_id}/status")
def get_upload_status(
project_name: str,
package_name: str,
upload_id: str,
storage: S3Storage = Depends(get_storage),
):
"""Get status of a resumable upload"""
try:
parts = storage.list_upload_parts(upload_id)
uploaded_parts = [p["PartNumber"] for p in parts]
total_bytes = sum(p.get("Size", 0) for p in parts)
return ResumableUploadStatusResponse(
upload_id=upload_id,
uploaded_parts=uploaded_parts,
total_uploaded_bytes=total_bytes,
)
except ValueError as e:
raise HTTPException(status_code=404, detail=str(e))
# Download artifact with range request support
@router.get("/api/v1/project/{project_name}/{package_name}/+/{ref}")
def download_artifact(
project_name: str,
package_name: str,
ref: str,
request: Request,
db: Session = Depends(get_db),
storage: S3Storage = Depends(get_storage),
range: Optional[str] = Header(None),
):
# Get project and package
project = db.query(Project).filter(Project.name == project_name).first()
if not project:
raise HTTPException(status_code=404, detail="Project not found")
package = db.query(Package).filter(Package.project_id == project.id, Package.name == package_name).first()
if not package:
raise HTTPException(status_code=404, detail="Package not found")
# Resolve reference to artifact
artifact = None
# Check for explicit prefixes
if ref.startswith("artifact:"):
artifact_id = ref[9:]
artifact = db.query(Artifact).filter(Artifact.id == artifact_id).first()
elif ref.startswith("tag:") or ref.startswith("version:"):
tag_name = ref.split(":", 1)[1]
tag = db.query(Tag).filter(Tag.package_id == package.id, Tag.name == tag_name).first()
if tag:
artifact = db.query(Artifact).filter(Artifact.id == tag.artifact_id).first()
else:
# Try as tag name first
tag = db.query(Tag).filter(Tag.package_id == package.id, Tag.name == ref).first()
if tag:
artifact = db.query(Artifact).filter(Artifact.id == tag.artifact_id).first()
else:
# Try as direct artifact ID
artifact = db.query(Artifact).filter(Artifact.id == ref).first()
if not artifact:
raise HTTPException(status_code=404, detail="Artifact not found")
filename = artifact.original_name or f"{artifact.id}"
# Handle range requests
if range:
stream, content_length, content_range = storage.get_stream(artifact.s3_key, range)
headers = {
"Content-Disposition": f'attachment; filename="{filename}"',
"Accept-Ranges": "bytes",
"Content-Length": str(content_length),
}
if content_range:
headers["Content-Range"] = content_range
return StreamingResponse(
stream,
status_code=206, # Partial Content
media_type=artifact.content_type or "application/octet-stream",
headers=headers,
)
# Full download
stream, content_length, _ = storage.get_stream(artifact.s3_key)
return StreamingResponse(
stream,
media_type=artifact.content_type or "application/octet-stream",
headers={
"Content-Disposition": f'attachment; filename="{filename}"',
"Accept-Ranges": "bytes",
"Content-Length": str(content_length),
},
)
# HEAD request for download (to check file info without downloading)
@router.head("/api/v1/project/{project_name}/{package_name}/+/{ref}")
def head_artifact(
project_name: str,
package_name: str,
ref: str,
db: Session = Depends(get_db),
storage: S3Storage = Depends(get_storage),
):
# Get project and package
project = db.query(Project).filter(Project.name == project_name).first()
if not project:
raise HTTPException(status_code=404, detail="Project not found")
package = db.query(Package).filter(Package.project_id == project.id, Package.name == package_name).first()
if not package:
raise HTTPException(status_code=404, detail="Package not found")
# Resolve reference to artifact (same logic as download)
artifact = None
if ref.startswith("artifact:"):
artifact_id = ref[9:]
artifact = db.query(Artifact).filter(Artifact.id == artifact_id).first()
elif ref.startswith("tag:") or ref.startswith("version:"):
tag_name = ref.split(":", 1)[1]
tag = db.query(Tag).filter(Tag.package_id == package.id, Tag.name == tag_name).first()
if tag:
artifact = db.query(Artifact).filter(Artifact.id == tag.artifact_id).first()
else:
tag = db.query(Tag).filter(Tag.package_id == package.id, Tag.name == ref).first()
if tag:
artifact = db.query(Artifact).filter(Artifact.id == tag.artifact_id).first()
else:
artifact = db.query(Artifact).filter(Artifact.id == ref).first()
if not artifact:
raise HTTPException(status_code=404, detail="Artifact not found")
filename = artifact.original_name or f"{artifact.id}"
return Response(
content=b"",
media_type=artifact.content_type or "application/octet-stream",
headers={
"Content-Disposition": f'attachment; filename="{filename}"',
"Accept-Ranges": "bytes",
"Content-Length": str(artifact.size),
"X-Artifact-Id": artifact.id,
},
)
# Compatibility route
@router.get("/project/{project_name}/{package_name}/+/{ref}")
def download_artifact_compat(
project_name: str,
package_name: str,
ref: str,
request: Request,
db: Session = Depends(get_db),
storage: S3Storage = Depends(get_storage),
range: Optional[str] = Header(None),
):
return download_artifact(project_name, package_name, ref, request, db, storage, range)
# Tag routes
@router.get("/api/v1/project/{project_name}/{package_name}/tags", response_model=List[TagResponse])
def list_tags(project_name: str, package_name: str, db: Session = Depends(get_db)):
project = db.query(Project).filter(Project.name == project_name).first()
if not project:
raise HTTPException(status_code=404, detail="Project not found")
package = db.query(Package).filter(Package.project_id == project.id, Package.name == package_name).first()
if not package:
raise HTTPException(status_code=404, detail="Package not found")
tags = db.query(Tag).filter(Tag.package_id == package.id).order_by(Tag.name).all()
return tags
@router.post("/api/v1/project/{project_name}/{package_name}/tags", response_model=TagResponse)
def create_tag(
project_name: str,
package_name: str,
tag: TagCreate,
request: Request,
db: Session = Depends(get_db),
):
user_id = get_user_id(request)
project = db.query(Project).filter(Project.name == project_name).first()
if not project:
raise HTTPException(status_code=404, detail="Project not found")
package = db.query(Package).filter(Package.project_id == project.id, Package.name == package_name).first()
if not package:
raise HTTPException(status_code=404, detail="Package not found")
# Verify artifact exists
artifact = db.query(Artifact).filter(Artifact.id == tag.artifact_id).first()
if not artifact:
raise HTTPException(status_code=404, detail="Artifact not found")
# Create or update tag
existing = db.query(Tag).filter(Tag.package_id == package.id, Tag.name == tag.name).first()
if existing:
existing.artifact_id = tag.artifact_id
existing.created_by = user_id
db.commit()
db.refresh(existing)
return existing
db_tag = Tag(
package_id=package.id,
name=tag.name,
artifact_id=tag.artifact_id,
created_by=user_id,
)
db.add(db_tag)
db.commit()
db.refresh(db_tag)
return db_tag
# Consumer routes
@router.get("/api/v1/project/{project_name}/{package_name}/consumers", response_model=List[ConsumerResponse])
def get_consumers(project_name: str, package_name: str, db: Session = Depends(get_db)):
project = db.query(Project).filter(Project.name == project_name).first()
if not project:
raise HTTPException(status_code=404, detail="Project not found")
package = db.query(Package).filter(Package.project_id == project.id, Package.name == package_name).first()
if not package:
raise HTTPException(status_code=404, detail="Package not found")
consumers = db.query(Consumer).filter(Consumer.package_id == package.id).order_by(Consumer.last_access.desc()).all()
return consumers
# Artifact by ID
@router.get("/api/v1/artifact/{artifact_id}", response_model=ArtifactResponse)
def get_artifact(artifact_id: str, db: Session = Depends(get_db)):
artifact = db.query(Artifact).filter(Artifact.id == artifact_id).first()
if not artifact:
raise HTTPException(status_code=404, detail="Artifact not found")
return artifact