Implement backend upload/download API enhancements

This commit is contained in:
Mondo Diaz
2025-12-11 18:05:08 -06:00
parent e9404a4425
commit c119ab4a04
10 changed files with 1214 additions and 41 deletions

View File

@@ -1,13 +1,15 @@
from fastapi import APIRouter, Depends, HTTPException, UploadFile, File, Form, Request, Query
from fastapi import APIRouter, Depends, HTTPException, UploadFile, File, Form, Request, Query, Header, Response
from fastapi.responses import StreamingResponse
from sqlalchemy.orm import Session
from sqlalchemy import or_, func
from typing import List, Optional
import math
import re
import io
import hashlib
from .database import get_db
from .storage import get_storage, S3Storage
from .storage import get_storage, S3Storage, MULTIPART_CHUNK_SIZE
from .models import Project, Package, Artifact, Tag, Upload, Consumer
from .schemas import (
ProjectCreate, ProjectResponse,
@@ -18,7 +20,14 @@ from .schemas import (
ConsumerResponse,
HealthResponse,
PaginatedResponse, PaginationMeta,
ResumableUploadInitRequest,
ResumableUploadInitResponse,
ResumableUploadPartResponse,
ResumableUploadCompleteRequest,
ResumableUploadCompleteResponse,
ResumableUploadStatusResponse,
)
from .metadata import extract_metadata
router = APIRouter()
@@ -151,6 +160,7 @@ def upload_artifact(
tag: Optional[str] = Form(None),
db: Session = Depends(get_db),
storage: S3Storage = Depends(get_storage),
content_length: Optional[int] = Header(None, alias="Content-Length"),
):
user_id = get_user_id(request)
@@ -163,13 +173,36 @@ def upload_artifact(
if not package:
raise HTTPException(status_code=404, detail="Package not found")
# Store file
sha256_hash, size, s3_key = storage.store(file.file)
# Extract format-specific metadata before storing
file_metadata = {}
if file.filename:
# Read file into memory for metadata extraction
file_content = file.file.read()
file.file.seek(0)
# Extract metadata
file_metadata = extract_metadata(
io.BytesIO(file_content),
file.filename,
file.content_type
)
# Store file (uses multipart for large files)
sha256_hash, size, s3_key = storage.store(file.file, content_length)
# Check if this is a deduplicated upload
deduplicated = False
# Create or update artifact record
artifact = db.query(Artifact).filter(Artifact.id == sha256_hash).first()
if artifact:
artifact.ref_count += 1
deduplicated = True
# Merge metadata if new metadata was extracted
if file_metadata and artifact.format_metadata:
artifact.format_metadata = {**artifact.format_metadata, **file_metadata}
elif file_metadata:
artifact.format_metadata = file_metadata
else:
artifact = Artifact(
id=sha256_hash,
@@ -178,6 +211,7 @@ def upload_artifact(
original_name=file.filename,
created_by=user_id,
s3_key=s3_key,
format_metadata=file_metadata or {},
)
db.add(artifact)
@@ -214,17 +248,265 @@ def upload_artifact(
project=project_name,
package=package_name,
tag=tag,
format_metadata=artifact.format_metadata,
deduplicated=deduplicated,
)
# Download artifact
# Resumable upload endpoints
@router.post("/api/v1/project/{project_name}/{package_name}/upload/init", response_model=ResumableUploadInitResponse)
def init_resumable_upload(
project_name: str,
package_name: str,
init_request: ResumableUploadInitRequest,
request: Request,
db: Session = Depends(get_db),
storage: S3Storage = Depends(get_storage),
):
"""
Initialize a resumable upload session.
Client must provide the SHA256 hash of the file in advance.
"""
user_id = get_user_id(request)
# Validate project and package
project = db.query(Project).filter(Project.name == project_name).first()
if not project:
raise HTTPException(status_code=404, detail="Project not found")
package = db.query(Package).filter(Package.project_id == project.id, Package.name == package_name).first()
if not package:
raise HTTPException(status_code=404, detail="Package not found")
# Check if artifact already exists (deduplication)
existing_artifact = db.query(Artifact).filter(Artifact.id == init_request.expected_hash).first()
if existing_artifact:
# File already exists - increment ref count and return immediately
existing_artifact.ref_count += 1
# Record the upload
upload = Upload(
artifact_id=init_request.expected_hash,
package_id=package.id,
original_name=init_request.filename,
uploaded_by=user_id,
source_ip=request.client.host if request.client else None,
)
db.add(upload)
# Create tag if provided
if init_request.tag:
existing_tag = db.query(Tag).filter(
Tag.package_id == package.id, Tag.name == init_request.tag
).first()
if existing_tag:
existing_tag.artifact_id = init_request.expected_hash
existing_tag.created_by = user_id
else:
new_tag = Tag(
package_id=package.id,
name=init_request.tag,
artifact_id=init_request.expected_hash,
created_by=user_id,
)
db.add(new_tag)
db.commit()
return ResumableUploadInitResponse(
upload_id=None,
already_exists=True,
artifact_id=init_request.expected_hash,
chunk_size=MULTIPART_CHUNK_SIZE,
)
# Initialize resumable upload
session = storage.initiate_resumable_upload(init_request.expected_hash)
return ResumableUploadInitResponse(
upload_id=session["upload_id"],
already_exists=False,
artifact_id=None,
chunk_size=MULTIPART_CHUNK_SIZE,
)
@router.put("/api/v1/project/{project_name}/{package_name}/upload/{upload_id}/part/{part_number}")
def upload_part(
project_name: str,
package_name: str,
upload_id: str,
part_number: int,
request: Request,
db: Session = Depends(get_db),
storage: S3Storage = Depends(get_storage),
):
"""
Upload a part of a resumable upload.
Part numbers start at 1.
"""
# Validate project and package exist
project = db.query(Project).filter(Project.name == project_name).first()
if not project:
raise HTTPException(status_code=404, detail="Project not found")
package = db.query(Package).filter(Package.project_id == project.id, Package.name == package_name).first()
if not package:
raise HTTPException(status_code=404, detail="Package not found")
if part_number < 1:
raise HTTPException(status_code=400, detail="Part number must be >= 1")
# Read part data from request body
import asyncio
loop = asyncio.new_event_loop()
async def read_body():
return await request.body()
try:
data = loop.run_until_complete(read_body())
finally:
loop.close()
if not data:
raise HTTPException(status_code=400, detail="No data in request body")
try:
part_info = storage.upload_part(upload_id, part_number, data)
return ResumableUploadPartResponse(
part_number=part_info["PartNumber"],
etag=part_info["ETag"],
)
except ValueError as e:
raise HTTPException(status_code=404, detail=str(e))
@router.post("/api/v1/project/{project_name}/{package_name}/upload/{upload_id}/complete")
def complete_resumable_upload(
project_name: str,
package_name: str,
upload_id: str,
complete_request: ResumableUploadCompleteRequest,
request: Request,
db: Session = Depends(get_db),
storage: S3Storage = Depends(get_storage),
):
"""Complete a resumable upload"""
user_id = get_user_id(request)
# Validate project and package
project = db.query(Project).filter(Project.name == project_name).first()
if not project:
raise HTTPException(status_code=404, detail="Project not found")
package = db.query(Package).filter(Package.project_id == project.id, Package.name == package_name).first()
if not package:
raise HTTPException(status_code=404, detail="Package not found")
try:
sha256_hash, s3_key = storage.complete_resumable_upload(upload_id)
except ValueError as e:
raise HTTPException(status_code=404, detail=str(e))
# Get file size from S3
obj_info = storage.get_object_info(s3_key)
size = obj_info["size"] if obj_info else 0
# Create artifact record
artifact = Artifact(
id=sha256_hash,
size=size,
s3_key=s3_key,
created_by=user_id,
format_metadata={},
)
db.add(artifact)
# Record upload
upload = Upload(
artifact_id=sha256_hash,
package_id=package.id,
uploaded_by=user_id,
source_ip=request.client.host if request.client else None,
)
db.add(upload)
# Create tag if provided
if complete_request.tag:
existing_tag = db.query(Tag).filter(
Tag.package_id == package.id, Tag.name == complete_request.tag
).first()
if existing_tag:
existing_tag.artifact_id = sha256_hash
existing_tag.created_by = user_id
else:
new_tag = Tag(
package_id=package.id,
name=complete_request.tag,
artifact_id=sha256_hash,
created_by=user_id,
)
db.add(new_tag)
db.commit()
return ResumableUploadCompleteResponse(
artifact_id=sha256_hash,
size=size,
project=project_name,
package=package_name,
tag=complete_request.tag,
)
@router.delete("/api/v1/project/{project_name}/{package_name}/upload/{upload_id}")
def abort_resumable_upload(
project_name: str,
package_name: str,
upload_id: str,
storage: S3Storage = Depends(get_storage),
):
"""Abort a resumable upload"""
try:
storage.abort_resumable_upload(upload_id)
return {"status": "aborted"}
except ValueError as e:
raise HTTPException(status_code=404, detail=str(e))
@router.get("/api/v1/project/{project_name}/{package_name}/upload/{upload_id}/status")
def get_upload_status(
project_name: str,
package_name: str,
upload_id: str,
storage: S3Storage = Depends(get_storage),
):
"""Get status of a resumable upload"""
try:
parts = storage.list_upload_parts(upload_id)
uploaded_parts = [p["PartNumber"] for p in parts]
total_bytes = sum(p.get("Size", 0) for p in parts)
return ResumableUploadStatusResponse(
upload_id=upload_id,
uploaded_parts=uploaded_parts,
total_uploaded_bytes=total_bytes,
)
except ValueError as e:
raise HTTPException(status_code=404, detail=str(e))
# Download artifact with range request support
@router.get("/api/v1/project/{project_name}/{package_name}/+/{ref}")
def download_artifact(
project_name: str,
package_name: str,
ref: str,
request: Request,
db: Session = Depends(get_db),
storage: S3Storage = Depends(get_storage),
range: Optional[str] = Header(None),
):
# Get project and package
project = db.query(Project).filter(Project.name == project_name).first()
@@ -259,15 +541,90 @@ def download_artifact(
if not artifact:
raise HTTPException(status_code=404, detail="Artifact not found")
# Stream from S3
stream = storage.get_stream(artifact.s3_key)
filename = artifact.original_name or f"{artifact.id}"
# Handle range requests
if range:
stream, content_length, content_range = storage.get_stream(artifact.s3_key, range)
headers = {
"Content-Disposition": f'attachment; filename="{filename}"',
"Accept-Ranges": "bytes",
"Content-Length": str(content_length),
}
if content_range:
headers["Content-Range"] = content_range
return StreamingResponse(
stream,
status_code=206, # Partial Content
media_type=artifact.content_type or "application/octet-stream",
headers=headers,
)
# Full download
stream, content_length, _ = storage.get_stream(artifact.s3_key)
return StreamingResponse(
stream,
media_type=artifact.content_type or "application/octet-stream",
headers={"Content-Disposition": f'attachment; filename="{filename}"'},
headers={
"Content-Disposition": f'attachment; filename="{filename}"',
"Accept-Ranges": "bytes",
"Content-Length": str(content_length),
},
)
# HEAD request for download (to check file info without downloading)
@router.head("/api/v1/project/{project_name}/{package_name}/+/{ref}")
def head_artifact(
project_name: str,
package_name: str,
ref: str,
db: Session = Depends(get_db),
storage: S3Storage = Depends(get_storage),
):
# Get project and package
project = db.query(Project).filter(Project.name == project_name).first()
if not project:
raise HTTPException(status_code=404, detail="Project not found")
package = db.query(Package).filter(Package.project_id == project.id, Package.name == package_name).first()
if not package:
raise HTTPException(status_code=404, detail="Package not found")
# Resolve reference to artifact (same logic as download)
artifact = None
if ref.startswith("artifact:"):
artifact_id = ref[9:]
artifact = db.query(Artifact).filter(Artifact.id == artifact_id).first()
elif ref.startswith("tag:") or ref.startswith("version:"):
tag_name = ref.split(":", 1)[1]
tag = db.query(Tag).filter(Tag.package_id == package.id, Tag.name == tag_name).first()
if tag:
artifact = db.query(Artifact).filter(Artifact.id == tag.artifact_id).first()
else:
tag = db.query(Tag).filter(Tag.package_id == package.id, Tag.name == ref).first()
if tag:
artifact = db.query(Artifact).filter(Artifact.id == tag.artifact_id).first()
else:
artifact = db.query(Artifact).filter(Artifact.id == ref).first()
if not artifact:
raise HTTPException(status_code=404, detail="Artifact not found")
filename = artifact.original_name or f"{artifact.id}"
return Response(
content=b"",
media_type=artifact.content_type or "application/octet-stream",
headers={
"Content-Disposition": f'attachment; filename="{filename}"',
"Accept-Ranges": "bytes",
"Content-Length": str(artifact.size),
"X-Artifact-Id": artifact.id,
},
)
@@ -277,10 +634,12 @@ def download_artifact_compat(
project_name: str,
package_name: str,
ref: str,
request: Request,
db: Session = Depends(get_db),
storage: S3Storage = Depends(get_storage),
range: Optional[str] = Header(None),
):
return download_artifact(project_name, package_name, ref, db, storage)
return download_artifact(project_name, package_name, ref, request, db, storage, range)
# Tag routes