Add presigned URL support for direct S3 downloads

- Add ORCHARD_DOWNLOAD_MODE config (presigned, redirect, proxy)
- Add ORCHARD_PRESIGNED_URL_EXPIRY config (default: 3600s)
- Add generate_presigned_url() method to S3Storage
- Modify download endpoint to support ?mode= query parameter
- Add /url endpoint for getting presigned URL without redirect
- Add PresignedUrlResponse schema with URL, expiry, and checksums
- Default download mode is now presigned for better performance
This commit is contained in:
Mondo Diaz
2025-12-15 15:21:44 -06:00
parent caa0c5af0c
commit 51d4fc4545
5 changed files with 201 additions and 40 deletions

View File

@@ -7,6 +7,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## [Unreleased] ## [Unreleased]
### Added ### Added
- Added presigned URL support for direct S3 downloads (#26)
- Added `ORCHARD_DOWNLOAD_MODE` config option (`presigned`, `redirect`, `proxy`) (#26)
- Added `ORCHARD_PRESIGNED_URL_EXPIRY` config option (default: 3600 seconds) (#26)
- Added `?mode=` query parameter to override download mode per-request (#26)
- Added `/api/v1/project/{project}/{package}/+/{ref}/url` endpoint for getting presigned URLs (#26)
- Added `PresignedUrlResponse` schema with URL, expiry, checksums, and artifact metadata (#26)
- Added integrity verification workflow design document (#24) - Added integrity verification workflow design document (#24)
- Added `sha256` field to API responses for clarity (alias of `id`) (#25) - Added `sha256` field to API responses for clarity (alias of `id`) (#25)
- Added `checksum_sha1` field to artifacts table for compatibility (#25) - Added `checksum_sha1` field to artifacts table for compatibility (#25)
@@ -14,6 +20,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Compute and store MD5, SHA1, and S3 ETag alongside SHA256 during upload (#25) - Compute and store MD5, SHA1, and S3 ETag alongside SHA256 during upload (#25)
- Added `Dockerfile.local` and `docker-compose.local.yml` for local development (#25) - Added `Dockerfile.local` and `docker-compose.local.yml` for local development (#25)
- Added migration script `003_checksum_fields.sql` for existing databases (#25) - Added migration script `003_checksum_fields.sql` for existing databases (#25)
### Changed
- Changed default download mode from `proxy` to `presigned` for better performance (#26)
## [0.2.0] - 2025-12-15 ## [0.2.0] - 2025-12-15
### Changed ### Changed

View File

@@ -32,6 +32,10 @@ class Settings(BaseSettings):
s3_secret_access_key: str = "" s3_secret_access_key: str = ""
s3_use_path_style: bool = True s3_use_path_style: bool = True
# Download settings
download_mode: str = "presigned" # "presigned", "redirect", or "proxy"
presigned_url_expiry: int = 3600 # Presigned URL expiry in seconds (default: 1 hour)
@property @property
def database_url(self) -> str: def database_url(self) -> str:
sslmode = f"?sslmode={self.database_sslmode}" if self.database_sslmode else "" sslmode = f"?sslmode={self.database_sslmode}" if self.database_sslmode else ""

View File

@@ -1,9 +1,9 @@
from datetime import datetime from datetime import datetime, timedelta, timezone
from fastapi import APIRouter, Depends, HTTPException, UploadFile, File, Form, Request, Query, Header, Response from fastapi import APIRouter, Depends, HTTPException, UploadFile, File, Form, Request, Query, Header, Response
from fastapi.responses import StreamingResponse from fastapi.responses import StreamingResponse, RedirectResponse
from sqlalchemy.orm import Session from sqlalchemy.orm import Session
from sqlalchemy import or_, func from sqlalchemy import or_, func
from typing import List, Optional from typing import List, Optional, Literal
import math import math
import re import re
import io import io
@@ -29,8 +29,10 @@ from .schemas import (
ResumableUploadCompleteResponse, ResumableUploadCompleteResponse,
ResumableUploadStatusResponse, ResumableUploadStatusResponse,
GlobalSearchResponse, SearchResultProject, SearchResultPackage, SearchResultArtifact, GlobalSearchResponse, SearchResultProject, SearchResultPackage, SearchResultArtifact,
PresignedUrlResponse,
) )
from .metadata import extract_metadata from .metadata import extract_metadata
from .config import get_settings
router = APIRouter() router = APIRouter()
@@ -844,27 +846,13 @@ def get_upload_status(
raise HTTPException(status_code=404, detail=str(e)) raise HTTPException(status_code=404, detail=str(e))
# Download artifact with range request support # Helper function to resolve artifact reference
@router.get("/api/v1/project/{project_name}/{package_name}/+/{ref}") def _resolve_artifact_ref(
def download_artifact(
project_name: str,
package_name: str,
ref: str, ref: str,
request: Request, package: Package,
db: Session = Depends(get_db), db: Session,
storage: S3Storage = Depends(get_storage), ) -> Optional[Artifact]:
range: Optional[str] = Header(None), """Resolve a reference (tag name, artifact:hash, tag:name) to an artifact"""
):
# Get project and package
project = db.query(Project).filter(Project.name == project_name).first()
if not project:
raise HTTPException(status_code=404, detail="Project not found")
package = db.query(Package).filter(Package.project_id == project.id, Package.name == package_name).first()
if not package:
raise HTTPException(status_code=404, detail="Package not found")
# Resolve reference to artifact
artifact = None artifact = None
# Check for explicit prefixes # Check for explicit prefixes
@@ -885,11 +873,76 @@ def download_artifact(
# Try as direct artifact ID # Try as direct artifact ID
artifact = db.query(Artifact).filter(Artifact.id == ref).first() artifact = db.query(Artifact).filter(Artifact.id == ref).first()
return artifact
# Download artifact with range request support and download modes
@router.get("/api/v1/project/{project_name}/{package_name}/+/{ref}")
def download_artifact(
project_name: str,
package_name: str,
ref: str,
request: Request,
db: Session = Depends(get_db),
storage: S3Storage = Depends(get_storage),
range: Optional[str] = Header(None),
mode: Optional[Literal["proxy", "redirect", "presigned"]] = Query(
default=None,
description="Download mode: proxy (stream through backend), redirect (302 to presigned URL), presigned (return JSON with URL)"
),
):
settings = get_settings()
# Get project and package
project = db.query(Project).filter(Project.name == project_name).first()
if not project:
raise HTTPException(status_code=404, detail="Project not found")
package = db.query(Package).filter(Package.project_id == project.id, Package.name == package_name).first()
if not package:
raise HTTPException(status_code=404, detail="Package not found")
# Resolve reference to artifact
artifact = _resolve_artifact_ref(ref, package, db)
if not artifact: if not artifact:
raise HTTPException(status_code=404, detail="Artifact not found") raise HTTPException(status_code=404, detail="Artifact not found")
filename = artifact.original_name or f"{artifact.id}" filename = artifact.original_name or f"{artifact.id}"
# Determine download mode (query param overrides server default)
download_mode = mode or settings.download_mode
# Handle presigned mode - return JSON with presigned URL
if download_mode == "presigned":
presigned_url = storage.generate_presigned_url(
artifact.s3_key,
response_content_type=artifact.content_type,
response_content_disposition=f'attachment; filename="{filename}"',
)
expires_at = datetime.now(timezone.utc) + timedelta(seconds=settings.presigned_url_expiry)
return PresignedUrlResponse(
url=presigned_url,
expires_at=expires_at,
method="GET",
artifact_id=artifact.id,
size=artifact.size,
content_type=artifact.content_type,
original_name=artifact.original_name,
checksum_sha256=artifact.id,
checksum_md5=artifact.checksum_md5,
)
# Handle redirect mode - return 302 redirect to presigned URL
if download_mode == "redirect":
presigned_url = storage.generate_presigned_url(
artifact.s3_key,
response_content_type=artifact.content_type,
response_content_disposition=f'attachment; filename="{filename}"',
)
return RedirectResponse(url=presigned_url, status_code=302)
# Proxy mode (default fallback) - stream through backend
# Handle range requests # Handle range requests
if range: if range:
stream, content_length, content_range = storage.get_stream(artifact.s3_key, range) stream, content_length, content_range = storage.get_stream(artifact.s3_key, range)
@@ -923,6 +976,63 @@ def download_artifact(
) )
# Get presigned URL endpoint (explicit endpoint for getting URL without redirect)
@router.get("/api/v1/project/{project_name}/{package_name}/+/{ref}/url", response_model=PresignedUrlResponse)
def get_artifact_url(
project_name: str,
package_name: str,
ref: str,
db: Session = Depends(get_db),
storage: S3Storage = Depends(get_storage),
expiry: Optional[int] = Query(
default=None,
description="Custom expiry time in seconds (defaults to server setting)"
),
):
"""
Get a presigned URL for direct S3 download.
This endpoint always returns a presigned URL regardless of server download mode.
"""
settings = get_settings()
# Get project and package
project = db.query(Project).filter(Project.name == project_name).first()
if not project:
raise HTTPException(status_code=404, detail="Project not found")
package = db.query(Package).filter(Package.project_id == project.id, Package.name == package_name).first()
if not package:
raise HTTPException(status_code=404, detail="Package not found")
# Resolve reference to artifact
artifact = _resolve_artifact_ref(ref, package, db)
if not artifact:
raise HTTPException(status_code=404, detail="Artifact not found")
filename = artifact.original_name or f"{artifact.id}"
url_expiry = expiry or settings.presigned_url_expiry
presigned_url = storage.generate_presigned_url(
artifact.s3_key,
expiry=url_expiry,
response_content_type=artifact.content_type,
response_content_disposition=f'attachment; filename="{filename}"',
)
expires_at = datetime.now(timezone.utc) + timedelta(seconds=url_expiry)
return PresignedUrlResponse(
url=presigned_url,
expires_at=expires_at,
method="GET",
artifact_id=artifact.id,
size=artifact.size,
content_type=artifact.content_type,
original_name=artifact.original_name,
checksum_sha256=artifact.id,
checksum_md5=artifact.checksum_md5,
)
# HEAD request for download (to check file info without downloading) # HEAD request for download (to check file info without downloading)
@router.head("/api/v1/project/{project_name}/{package_name}/+/{ref}") @router.head("/api/v1/project/{project_name}/{package_name}/+/{ref}")
def head_artifact( def head_artifact(
@@ -941,23 +1051,8 @@ def head_artifact(
if not package: if not package:
raise HTTPException(status_code=404, detail="Package not found") raise HTTPException(status_code=404, detail="Package not found")
# Resolve reference to artifact (same logic as download) # Resolve reference to artifact
artifact = None artifact = _resolve_artifact_ref(ref, package, db)
if ref.startswith("artifact:"):
artifact_id = ref[9:]
artifact = db.query(Artifact).filter(Artifact.id == artifact_id).first()
elif ref.startswith("tag:") or ref.startswith("version:"):
tag_name = ref.split(":", 1)[1]
tag = db.query(Tag).filter(Tag.package_id == package.id, Tag.name == tag_name).first()
if tag:
artifact = db.query(Artifact).filter(Artifact.id == tag.artifact_id).first()
else:
tag = db.query(Tag).filter(Tag.package_id == package.id, Tag.name == ref).first()
if tag:
artifact = db.query(Artifact).filter(Artifact.id == tag.artifact_id).first()
else:
artifact = db.query(Artifact).filter(Artifact.id == ref).first()
if not artifact: if not artifact:
raise HTTPException(status_code=404, detail="Artifact not found") raise HTTPException(status_code=404, detail="Artifact not found")

View File

@@ -330,6 +330,20 @@ class GlobalSearchResponse(BaseModel):
counts: Dict[str, int] # Total counts for each type counts: Dict[str, int] # Total counts for each type
# Presigned URL response
class PresignedUrlResponse(BaseModel):
"""Response containing a presigned URL for direct S3 download"""
url: str
expires_at: datetime
method: str = "GET"
artifact_id: str
size: int
content_type: Optional[str] = None
original_name: Optional[str] = None
checksum_sha256: Optional[str] = None
checksum_md5: Optional[str] = None
# Health check # Health check
class HealthResponse(BaseModel): class HealthResponse(BaseModel):
status: str status: str

View File

@@ -450,6 +450,46 @@ class S3Storage:
except ClientError: except ClientError:
return False return False
def generate_presigned_url(
self,
s3_key: str,
expiry: Optional[int] = None,
response_content_type: Optional[str] = None,
response_content_disposition: Optional[str] = None,
) -> str:
"""
Generate a presigned URL for downloading an object.
Args:
s3_key: The S3 key of the object
expiry: URL expiry in seconds (defaults to settings.presigned_url_expiry)
response_content_type: Override Content-Type header in response
response_content_disposition: Override Content-Disposition header in response
Returns:
Presigned URL string
"""
if expiry is None:
expiry = settings.presigned_url_expiry
params = {
"Bucket": self.bucket,
"Key": s3_key,
}
# Add response header overrides if specified
if response_content_type:
params["ResponseContentType"] = response_content_type
if response_content_disposition:
params["ResponseContentDisposition"] = response_content_disposition
url = self.client.generate_presigned_url(
"get_object",
Params=params,
ExpiresIn=expiry,
)
return url
# Singleton instance # Singleton instance
_storage = None _storage = None